]> CyberLeo.Net >> Repos - FreeBSD/releng/9.2.git/blob - sys/dev/e1000/if_igb.c
- Copy stable/9 to releng/9.2 as part of the 9.2-RELEASE cycle.
[FreeBSD/releng/9.2.git] / sys / dev / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2013, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38
39 #ifdef HAVE_KERNEL_OPTION_HEADERS
40 #include "opt_device_polling.h"
41 #include "opt_altq.h"
42 #endif
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifndef IGB_LEGACY_TX
47 #include <sys/buf_ring.h>
48 #endif
49 #include <sys/bus.h>
50 #include <sys/endian.h>
51 #include <sys/kernel.h>
52 #include <sys/kthread.h>
53 #include <sys/malloc.h>
54 #include <sys/mbuf.h>
55 #include <sys/module.h>
56 #include <sys/rman.h>
57 #include <sys/socket.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/taskqueue.h>
61 #include <sys/eventhandler.h>
62 #include <sys/pcpu.h>
63 #include <sys/smp.h>
64 #include <machine/smp.h>
65 #include <machine/bus.h>
66 #include <machine/resource.h>
67
68 #include <net/bpf.h>
69 #include <net/ethernet.h>
70 #include <net/if.h>
71 #include <net/if_arp.h>
72 #include <net/if_dl.h>
73 #include <net/if_media.h>
74
75 #include <net/if_types.h>
76 #include <net/if_vlan_var.h>
77
78 #include <netinet/in_systm.h>
79 #include <netinet/in.h>
80 #include <netinet/if_ether.h>
81 #include <netinet/ip.h>
82 #include <netinet/ip6.h>
83 #include <netinet/tcp.h>
84 #include <netinet/tcp_lro.h>
85 #include <netinet/udp.h>
86
87 #include <machine/in_cksum.h>
88 #include <dev/led/led.h>
89 #include <dev/pci/pcivar.h>
90 #include <dev/pci/pcireg.h>
91
92 #include "e1000_api.h"
93 #include "e1000_82575.h"
94 #include "if_igb.h"
95
96 /*********************************************************************
97  *  Set this to one to display debug statistics
98  *********************************************************************/
99 int     igb_display_debug_stats = 0;
100
101 /*********************************************************************
102  *  Driver version:
103  *********************************************************************/
104 char igb_driver_version[] = "version - 2.3.10";
105
106
107 /*********************************************************************
108  *  PCI Device ID Table
109  *
110  *  Used by probe to select devices to load on
111  *  Last field stores an index into e1000_strings
112  *  Last entry must be all 0s
113  *
114  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
115  *********************************************************************/
116
117 static igb_vendor_info_t igb_vendor_info_array[] =
118 {
119         { 0x8086, E1000_DEV_ID_82575EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
123                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82576,           PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82576_NS,        PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82576_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82576_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
130                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
132                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
134                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82576_VF,        PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82580_COPPER,    PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82580_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_82580_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_82580_SGMII,     PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
141                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
143                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_DH89XXCC_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,  PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_DH89XXCC_SFP,    PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
148                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_I350_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_I350_FIBER,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_I350_SERDES,     PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_I350_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_I350_VF,         PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_I210_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_I210_COPPER_IT,  PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
157                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_I210_FIBER,      PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_I210_SERDES,     PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_I210_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_I211_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
162         /* required last entry */
163         { 0, 0, 0, 0, 0}
164 };
165
166 /*********************************************************************
167  *  Table of branding strings for all supported NICs.
168  *********************************************************************/
169
170 static char *igb_strings[] = {
171         "Intel(R) PRO/1000 Network Connection"
172 };
173
174 /*********************************************************************
175  *  Function prototypes
176  *********************************************************************/
177 static int      igb_probe(device_t);
178 static int      igb_attach(device_t);
179 static int      igb_detach(device_t);
180 static int      igb_shutdown(device_t);
181 static int      igb_suspend(device_t);
182 static int      igb_resume(device_t);
183 #ifndef IGB_LEGACY_TX
184 static int      igb_mq_start(struct ifnet *, struct mbuf *);
185 static int      igb_mq_start_locked(struct ifnet *, struct tx_ring *);
186 static void     igb_qflush(struct ifnet *);
187 static void     igb_deferred_mq_start(void *, int);
188 #else
189 static void     igb_start(struct ifnet *);
190 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
191 #endif
192 static int      igb_ioctl(struct ifnet *, u_long, caddr_t);
193 static void     igb_init(void *);
194 static void     igb_init_locked(struct adapter *);
195 static void     igb_stop(void *);
196 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
197 static int      igb_media_change(struct ifnet *);
198 static void     igb_identify_hardware(struct adapter *);
199 static int      igb_allocate_pci_resources(struct adapter *);
200 static int      igb_allocate_msix(struct adapter *);
201 static int      igb_allocate_legacy(struct adapter *);
202 static int      igb_setup_msix(struct adapter *);
203 static void     igb_free_pci_resources(struct adapter *);
204 static void     igb_local_timer(void *);
205 static void     igb_reset(struct adapter *);
206 static int      igb_setup_interface(device_t, struct adapter *);
207 static int      igb_allocate_queues(struct adapter *);
208 static void     igb_configure_queues(struct adapter *);
209
210 static int      igb_allocate_transmit_buffers(struct tx_ring *);
211 static void     igb_setup_transmit_structures(struct adapter *);
212 static void     igb_setup_transmit_ring(struct tx_ring *);
213 static void     igb_initialize_transmit_units(struct adapter *);
214 static void     igb_free_transmit_structures(struct adapter *);
215 static void     igb_free_transmit_buffers(struct tx_ring *);
216
217 static int      igb_allocate_receive_buffers(struct rx_ring *);
218 static int      igb_setup_receive_structures(struct adapter *);
219 static int      igb_setup_receive_ring(struct rx_ring *);
220 static void     igb_initialize_receive_units(struct adapter *);
221 static void     igb_free_receive_structures(struct adapter *);
222 static void     igb_free_receive_buffers(struct rx_ring *);
223 static void     igb_free_receive_ring(struct rx_ring *);
224
225 static void     igb_enable_intr(struct adapter *);
226 static void     igb_disable_intr(struct adapter *);
227 static void     igb_update_stats_counters(struct adapter *);
228 static bool     igb_txeof(struct tx_ring *);
229
230 static __inline void igb_rx_discard(struct rx_ring *, int);
231 static __inline void igb_rx_input(struct rx_ring *,
232                     struct ifnet *, struct mbuf *, u32);
233
234 static bool     igb_rxeof(struct igb_queue *, int, int *);
235 static void     igb_rx_checksum(u32, struct mbuf *, u32);
236 static bool     igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
237 static bool     igb_tso_setup(struct tx_ring *, struct mbuf *, int,
238                     struct ip *, struct tcphdr *);
239 static void     igb_set_promisc(struct adapter *);
240 static void     igb_disable_promisc(struct adapter *);
241 static void     igb_set_multi(struct adapter *);
242 static void     igb_update_link_status(struct adapter *);
243 static void     igb_refresh_mbufs(struct rx_ring *, int);
244
245 static void     igb_register_vlan(void *, struct ifnet *, u16);
246 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
247 static void     igb_setup_vlan_hw_support(struct adapter *);
248
249 static int      igb_xmit(struct tx_ring *, struct mbuf **);
250 static int      igb_dma_malloc(struct adapter *, bus_size_t,
251                     struct igb_dma_alloc *, int);
252 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
253 static int      igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
254 static void     igb_print_nvm_info(struct adapter *);
255 static int      igb_is_valid_ether_addr(u8 *);
256 static void     igb_add_hw_stats(struct adapter *);
257
258 static void     igb_vf_init_stats(struct adapter *);
259 static void     igb_update_vf_stats_counters(struct adapter *);
260
261 /* Management and WOL Support */
262 static void     igb_init_manageability(struct adapter *);
263 static void     igb_release_manageability(struct adapter *);
264 static void     igb_get_hw_control(struct adapter *);
265 static void     igb_release_hw_control(struct adapter *);
266 static void     igb_enable_wakeup(device_t);
267 static void     igb_led_func(void *, int);
268
269 static int      igb_irq_fast(void *);
270 static void     igb_msix_que(void *);
271 static void     igb_msix_link(void *);
272 static void     igb_handle_que(void *context, int pending);
273 static void     igb_handle_link(void *context, int pending);
274 static void     igb_handle_link_locked(struct adapter *);
275
276 static void     igb_set_sysctl_value(struct adapter *, const char *,
277                     const char *, int *, int);
278 static int      igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
279 static int      igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
280 static int      igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
281
282 #ifdef DEVICE_POLLING
283 static poll_handler_t igb_poll;
284 #endif /* POLLING */
285
286 /*********************************************************************
287  *  FreeBSD Device Interface Entry Points
288  *********************************************************************/
289
290 static device_method_t igb_methods[] = {
291         /* Device interface */
292         DEVMETHOD(device_probe, igb_probe),
293         DEVMETHOD(device_attach, igb_attach),
294         DEVMETHOD(device_detach, igb_detach),
295         DEVMETHOD(device_shutdown, igb_shutdown),
296         DEVMETHOD(device_suspend, igb_suspend),
297         DEVMETHOD(device_resume, igb_resume),
298         DEVMETHOD_END
299 };
300
301 static driver_t igb_driver = {
302         "igb", igb_methods, sizeof(struct adapter),
303 };
304
305 static devclass_t igb_devclass;
306 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
307 MODULE_DEPEND(igb, pci, 1, 1, 1);
308 MODULE_DEPEND(igb, ether, 1, 1, 1);
309
310 /*********************************************************************
311  *  Tunable default values.
312  *********************************************************************/
313
314 static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
315
316 /* Descriptor defaults */
317 static int igb_rxd = IGB_DEFAULT_RXD;
318 static int igb_txd = IGB_DEFAULT_TXD;
319 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
320 TUNABLE_INT("hw.igb.txd", &igb_txd);
321 SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
322     "Number of receive descriptors per queue");
323 SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
324     "Number of transmit descriptors per queue");
325
326 /*
327 ** AIM: Adaptive Interrupt Moderation
328 ** which means that the interrupt rate
329 ** is varied over time based on the
330 ** traffic for that interrupt vector
331 */
332 static int igb_enable_aim = TRUE;
333 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
334 SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
335     "Enable adaptive interrupt moderation");
336
337 /*
338  * MSIX should be the default for best performance,
339  * but this allows it to be forced off for testing.
340  */         
341 static int igb_enable_msix = 1;
342 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
343 SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
344     "Enable MSI-X interrupts");
345
346 /*
347 ** Tuneable Interrupt rate
348 */
349 static int igb_max_interrupt_rate = 8000;
350 TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
351 SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
352     &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
353
354 #if __FreeBSD_version >= 800000
355 /*
356 ** Tuneable number of buffers in the buf-ring (drbr_xxx)
357 */
358 static int igb_buf_ring_size = IGB_BR_SIZE;
359 TUNABLE_INT("hw.igb.buf_ring_size", &igb_buf_ring_size);
360 SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
361     &igb_buf_ring_size, 0, "Size of the bufring");
362 #endif
363
364 /*
365 ** Header split causes the packet header to
366 ** be dma'd to a seperate mbuf from the payload.
367 ** this can have memory alignment benefits. But
368 ** another plus is that small packets often fit
369 ** into the header and thus use no cluster. Its
370 ** a very workload dependent type feature.
371 */
372 static int igb_header_split = FALSE;
373 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
374 SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
375     "Enable receive mbuf header split");
376
377 /*
378 ** This will autoconfigure based on the
379 ** number of CPUs and max supported
380 ** MSIX messages if left at 0.
381 */
382 static int igb_num_queues = 0;
383 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
384 SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
385     "Number of queues to configure, 0 indicates autoconfigure");
386
387 /*
388 ** Global variable to store last used CPU when binding queues
389 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
390 ** queue is bound to a cpu.
391 */
392 static int igb_last_bind_cpu = -1;
393
394 /* How many packets rxeof tries to clean at a time */
395 static int igb_rx_process_limit = 100;
396 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
397 SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
398     &igb_rx_process_limit, 0,
399     "Maximum number of received packets to process at a time, -1 means unlimited");
400
401 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
402 #include <dev/netmap/if_igb_netmap.h>
403 #endif /* DEV_NETMAP */
404 /*********************************************************************
405  *  Device identification routine
406  *
407  *  igb_probe determines if the driver should be loaded on
408  *  adapter based on PCI vendor/device id of the adapter.
409  *
410  *  return BUS_PROBE_DEFAULT on success, positive on failure
411  *********************************************************************/
412
413 static int
414 igb_probe(device_t dev)
415 {
416         char            adapter_name[60];
417         uint16_t        pci_vendor_id = 0;
418         uint16_t        pci_device_id = 0;
419         uint16_t        pci_subvendor_id = 0;
420         uint16_t        pci_subdevice_id = 0;
421         igb_vendor_info_t *ent;
422
423         INIT_DEBUGOUT("igb_probe: begin");
424
425         pci_vendor_id = pci_get_vendor(dev);
426         if (pci_vendor_id != IGB_VENDOR_ID)
427                 return (ENXIO);
428
429         pci_device_id = pci_get_device(dev);
430         pci_subvendor_id = pci_get_subvendor(dev);
431         pci_subdevice_id = pci_get_subdevice(dev);
432
433         ent = igb_vendor_info_array;
434         while (ent->vendor_id != 0) {
435                 if ((pci_vendor_id == ent->vendor_id) &&
436                     (pci_device_id == ent->device_id) &&
437
438                     ((pci_subvendor_id == ent->subvendor_id) ||
439                     (ent->subvendor_id == PCI_ANY_ID)) &&
440
441                     ((pci_subdevice_id == ent->subdevice_id) ||
442                     (ent->subdevice_id == PCI_ANY_ID))) {
443                         sprintf(adapter_name, "%s %s",
444                                 igb_strings[ent->index],
445                                 igb_driver_version);
446                         device_set_desc_copy(dev, adapter_name);
447                         return (BUS_PROBE_DEFAULT);
448                 }
449                 ent++;
450         }
451
452         return (ENXIO);
453 }
454
455 /*********************************************************************
456  *  Device initialization routine
457  *
458  *  The attach entry point is called when the driver is being loaded.
459  *  This routine identifies the type of hardware, allocates all resources
460  *  and initializes the hardware.
461  *
462  *  return 0 on success, positive on failure
463  *********************************************************************/
464
465 static int
466 igb_attach(device_t dev)
467 {
468         struct adapter  *adapter;
469         int             error = 0;
470         u16             eeprom_data;
471
472         INIT_DEBUGOUT("igb_attach: begin");
473
474         if (resource_disabled("igb", device_get_unit(dev))) {
475                 device_printf(dev, "Disabled by device hint\n");
476                 return (ENXIO);
477         }
478
479         adapter = device_get_softc(dev);
480         adapter->dev = adapter->osdep.dev = dev;
481         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
482
483         /* SYSCTL stuff */
484         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
485             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
486             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
487             igb_sysctl_nvm_info, "I", "NVM Information");
488
489         igb_set_sysctl_value(adapter, "enable_aim",
490             "Interrupt Moderation", &adapter->enable_aim,
491             igb_enable_aim);
492
493         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
494             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
495             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
496             adapter, 0, igb_set_flowcntl, "I", "Flow Control");
497
498         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
499
500         /* Determine hardware and mac info */
501         igb_identify_hardware(adapter);
502
503         /* Setup PCI resources */
504         if (igb_allocate_pci_resources(adapter)) {
505                 device_printf(dev, "Allocation of PCI resources failed\n");
506                 error = ENXIO;
507                 goto err_pci;
508         }
509
510         /* Do Shared Code initialization */
511         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
512                 device_printf(dev, "Setup of Shared code failed\n");
513                 error = ENXIO;
514                 goto err_pci;
515         }
516
517         e1000_get_bus_info(&adapter->hw);
518
519         /* Sysctl for limiting the amount of work done in the taskqueue */
520         igb_set_sysctl_value(adapter, "rx_processing_limit",
521             "max number of rx packets to process",
522             &adapter->rx_process_limit, igb_rx_process_limit);
523
524         /*
525          * Validate number of transmit and receive descriptors. It
526          * must not exceed hardware maximum, and must be multiple
527          * of E1000_DBA_ALIGN.
528          */
529         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
530             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
531                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
532                     IGB_DEFAULT_TXD, igb_txd);
533                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
534         } else
535                 adapter->num_tx_desc = igb_txd;
536         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
537             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
538                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
539                     IGB_DEFAULT_RXD, igb_rxd);
540                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
541         } else
542                 adapter->num_rx_desc = igb_rxd;
543
544         adapter->hw.mac.autoneg = DO_AUTO_NEG;
545         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
546         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
547
548         /* Copper options */
549         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
550                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
551                 adapter->hw.phy.disable_polarity_correction = FALSE;
552                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
553         }
554
555         /*
556          * Set the frame limits assuming
557          * standard ethernet sized frames.
558          */
559         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
560         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
561
562         /*
563         ** Allocate and Setup Queues
564         */
565         if (igb_allocate_queues(adapter)) {
566                 error = ENOMEM;
567                 goto err_pci;
568         }
569
570         /* Allocate the appropriate stats memory */
571         if (adapter->vf_ifp) {
572                 adapter->stats =
573                     (struct e1000_vf_stats *)malloc(sizeof \
574                     (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
575                 igb_vf_init_stats(adapter);
576         } else
577                 adapter->stats =
578                     (struct e1000_hw_stats *)malloc(sizeof \
579                     (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
580         if (adapter->stats == NULL) {
581                 device_printf(dev, "Can not allocate stats memory\n");
582                 error = ENOMEM;
583                 goto err_late;
584         }
585
586         /* Allocate multicast array memory. */
587         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
588             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
589         if (adapter->mta == NULL) {
590                 device_printf(dev, "Can not allocate multicast setup array\n");
591                 error = ENOMEM;
592                 goto err_late;
593         }
594
595         /* Some adapter-specific advanced features */
596         if (adapter->hw.mac.type >= e1000_i350) {
597                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
598                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
599                     OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
600                     adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
601                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
602                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
603                     OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
604                     adapter, 0, igb_sysctl_eee, "I",
605                     "Disable Energy Efficient Ethernet");
606                 if (adapter->hw.phy.media_type == e1000_media_type_copper)
607                         e1000_set_eee_i350(&adapter->hw);
608         }
609
610         /*
611         ** Start from a known state, this is
612         ** important in reading the nvm and
613         ** mac from that.
614         */
615         e1000_reset_hw(&adapter->hw);
616
617         /* Make sure we have a good EEPROM before we read from it */
618         if (((adapter->hw.mac.type != e1000_i210) &&
619             (adapter->hw.mac.type != e1000_i211)) &&
620             (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
621                 /*
622                 ** Some PCI-E parts fail the first check due to
623                 ** the link being in sleep state, call it again,
624                 ** if it fails a second time its a real issue.
625                 */
626                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
627                         device_printf(dev,
628                             "The EEPROM Checksum Is Not Valid\n");
629                         error = EIO;
630                         goto err_late;
631                 }
632         }
633
634         /*
635         ** Copy the permanent MAC address out of the EEPROM
636         */
637         if (e1000_read_mac_addr(&adapter->hw) < 0) {
638                 device_printf(dev, "EEPROM read error while reading MAC"
639                     " address\n");
640                 error = EIO;
641                 goto err_late;
642         }
643         /* Check its sanity */
644         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
645                 device_printf(dev, "Invalid MAC address\n");
646                 error = EIO;
647                 goto err_late;
648         }
649
650         /* Setup OS specific network interface */
651         if (igb_setup_interface(dev, adapter) != 0)
652                 goto err_late;
653
654         /* Now get a good starting state */
655         igb_reset(adapter);
656
657         /* Initialize statistics */
658         igb_update_stats_counters(adapter);
659
660         adapter->hw.mac.get_link_status = 1;
661         igb_update_link_status(adapter);
662
663         /* Indicate SOL/IDER usage */
664         if (e1000_check_reset_block(&adapter->hw))
665                 device_printf(dev,
666                     "PHY reset is blocked due to SOL/IDER session.\n");
667
668         /* Determine if we have to control management hardware */
669         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
670
671         /*
672          * Setup Wake-on-Lan
673          */
674         /* APME bit in EEPROM is mapped to WUC.APME */
675         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
676         if (eeprom_data)
677                 adapter->wol = E1000_WUFC_MAG;
678
679         /* Register for VLAN events */
680         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
681              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
682         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
683              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
684
685         igb_add_hw_stats(adapter);
686
687         /* Tell the stack that the interface is not active */
688         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
689         adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
690
691         adapter->led_dev = led_create(igb_led_func, adapter,
692             device_get_nameunit(dev));
693
694         /* 
695         ** Configure Interrupts
696         */
697         if ((adapter->msix > 1) && (igb_enable_msix))
698                 error = igb_allocate_msix(adapter);
699         else /* MSI or Legacy */
700                 error = igb_allocate_legacy(adapter);
701         if (error)
702                 goto err_late;
703
704 #ifdef DEV_NETMAP
705         igb_netmap_attach(adapter);
706 #endif /* DEV_NETMAP */
707         INIT_DEBUGOUT("igb_attach: end");
708
709         return (0);
710
711 err_late:
712         igb_detach(dev);
713         igb_free_transmit_structures(adapter);
714         igb_free_receive_structures(adapter);
715         igb_release_hw_control(adapter);
716 err_pci:
717         igb_free_pci_resources(adapter);
718         if (adapter->ifp != NULL)
719                 if_free(adapter->ifp);
720         free(adapter->mta, M_DEVBUF);
721         IGB_CORE_LOCK_DESTROY(adapter);
722
723         return (error);
724 }
725
726 /*********************************************************************
727  *  Device removal routine
728  *
729  *  The detach entry point is called when the driver is being removed.
730  *  This routine stops the adapter and deallocates all the resources
731  *  that were allocated for driver operation.
732  *
733  *  return 0 on success, positive on failure
734  *********************************************************************/
735
736 static int
737 igb_detach(device_t dev)
738 {
739         struct adapter  *adapter = device_get_softc(dev);
740         struct ifnet    *ifp = adapter->ifp;
741
742         INIT_DEBUGOUT("igb_detach: begin");
743
744         /* Make sure VLANS are not using driver */
745         if (adapter->ifp->if_vlantrunk != NULL) {
746                 device_printf(dev,"Vlan in use, detach first\n");
747                 return (EBUSY);
748         }
749
750         ether_ifdetach(adapter->ifp);
751
752         if (adapter->led_dev != NULL)
753                 led_destroy(adapter->led_dev);
754
755 #ifdef DEVICE_POLLING
756         if (ifp->if_capenable & IFCAP_POLLING)
757                 ether_poll_deregister(ifp);
758 #endif
759
760         IGB_CORE_LOCK(adapter);
761         adapter->in_detach = 1;
762         igb_stop(adapter);
763         IGB_CORE_UNLOCK(adapter);
764
765         e1000_phy_hw_reset(&adapter->hw);
766
767         /* Give control back to firmware */
768         igb_release_manageability(adapter);
769         igb_release_hw_control(adapter);
770
771         if (adapter->wol) {
772                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
773                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
774                 igb_enable_wakeup(dev);
775         }
776
777         /* Unregister VLAN events */
778         if (adapter->vlan_attach != NULL)
779                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
780         if (adapter->vlan_detach != NULL)
781                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
782
783         callout_drain(&adapter->timer);
784
785 #ifdef DEV_NETMAP
786         netmap_detach(adapter->ifp);
787 #endif /* DEV_NETMAP */
788         igb_free_pci_resources(adapter);
789         bus_generic_detach(dev);
790         if_free(ifp);
791
792         igb_free_transmit_structures(adapter);
793         igb_free_receive_structures(adapter);
794         if (adapter->mta != NULL)
795                 free(adapter->mta, M_DEVBUF);
796
797         IGB_CORE_LOCK_DESTROY(adapter);
798
799         return (0);
800 }
801
802 /*********************************************************************
803  *
804  *  Shutdown entry point
805  *
806  **********************************************************************/
807
808 static int
809 igb_shutdown(device_t dev)
810 {
811         return igb_suspend(dev);
812 }
813
814 /*
815  * Suspend/resume device methods.
816  */
817 static int
818 igb_suspend(device_t dev)
819 {
820         struct adapter *adapter = device_get_softc(dev);
821
822         IGB_CORE_LOCK(adapter);
823
824         igb_stop(adapter);
825
826         igb_release_manageability(adapter);
827         igb_release_hw_control(adapter);
828
829         if (adapter->wol) {
830                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
831                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
832                 igb_enable_wakeup(dev);
833         }
834
835         IGB_CORE_UNLOCK(adapter);
836
837         return bus_generic_suspend(dev);
838 }
839
840 static int
841 igb_resume(device_t dev)
842 {
843         struct adapter *adapter = device_get_softc(dev);
844         struct tx_ring  *txr = adapter->tx_rings;
845         struct ifnet *ifp = adapter->ifp;
846
847         IGB_CORE_LOCK(adapter);
848         igb_init_locked(adapter);
849         igb_init_manageability(adapter);
850
851         if ((ifp->if_flags & IFF_UP) &&
852             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
853                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
854                         IGB_TX_LOCK(txr);
855 #ifndef IGB_LEGACY_TX
856                         /* Process the stack queue only if not depleted */
857                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
858                             !drbr_empty(ifp, txr->br))
859                                 igb_mq_start_locked(ifp, txr);
860 #else
861                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
862                                 igb_start_locked(txr, ifp);
863 #endif
864                         IGB_TX_UNLOCK(txr);
865                 }
866         }
867         IGB_CORE_UNLOCK(adapter);
868
869         return bus_generic_resume(dev);
870 }
871
872
873 #ifdef IGB_LEGACY_TX
874
875 /*********************************************************************
876  *  Transmit entry point
877  *
878  *  igb_start is called by the stack to initiate a transmit.
879  *  The driver will remain in this routine as long as there are
880  *  packets to transmit and transmit resources are available.
881  *  In case resources are not available stack is notified and
882  *  the packet is requeued.
883  **********************************************************************/
884
885 static void
886 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
887 {
888         struct adapter  *adapter = ifp->if_softc;
889         struct mbuf     *m_head;
890
891         IGB_TX_LOCK_ASSERT(txr);
892
893         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
894             IFF_DRV_RUNNING)
895                 return;
896         if (!adapter->link_active)
897                 return;
898
899         /* Call cleanup if number of TX descriptors low */
900         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
901                 igb_txeof(txr);
902
903         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
904                 if (txr->tx_avail <= IGB_MAX_SCATTER) {
905                         txr->queue_status |= IGB_QUEUE_DEPLETED;
906                         break;
907                 }
908                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
909                 if (m_head == NULL)
910                         break;
911                 /*
912                  *  Encapsulation can modify our pointer, and or make it
913                  *  NULL on failure.  In that event, we can't requeue.
914                  */
915                 if (igb_xmit(txr, &m_head)) {
916                         if (m_head != NULL)
917                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
918                         if (txr->tx_avail <= IGB_MAX_SCATTER)
919                                 txr->queue_status |= IGB_QUEUE_DEPLETED;
920                         break;
921                 }
922
923                 /* Send a copy of the frame to the BPF listener */
924                 ETHER_BPF_MTAP(ifp, m_head);
925
926                 /* Set watchdog on */
927                 txr->watchdog_time = ticks;
928                 txr->queue_status |= IGB_QUEUE_WORKING;
929         }
930 }
931  
932 /*
933  * Legacy TX driver routine, called from the
934  * stack, always uses tx[0], and spins for it.
935  * Should not be used with multiqueue tx
936  */
937 static void
938 igb_start(struct ifnet *ifp)
939 {
940         struct adapter  *adapter = ifp->if_softc;
941         struct tx_ring  *txr = adapter->tx_rings;
942
943         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
944                 IGB_TX_LOCK(txr);
945                 igb_start_locked(txr, ifp);
946                 IGB_TX_UNLOCK(txr);
947         }
948         return;
949 }
950
951 #else /* ~IGB_LEGACY_TX */
952
953 /*
954 ** Multiqueue Transmit Entry:
955 **  quick turnaround to the stack
956 **
957 */
958 static int
959 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
960 {
961         struct adapter          *adapter = ifp->if_softc;
962         struct igb_queue        *que;
963         struct tx_ring          *txr;
964         int                     i, err = 0;
965
966         /* Which queue to use */
967         if ((m->m_flags & M_FLOWID) != 0)
968                 i = m->m_pkthdr.flowid % adapter->num_queues;
969         else
970                 i = curcpu % adapter->num_queues;
971         txr = &adapter->tx_rings[i];
972         que = &adapter->queues[i];
973
974         err = drbr_enqueue(ifp, txr->br, m);
975         taskqueue_enqueue(que->tq, &txr->txq_task);
976
977         return (err);
978 }
979
980 static int
981 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
982 {
983         struct adapter  *adapter = txr->adapter;
984         struct mbuf     *next;
985         int             err = 0, enq;
986
987         IGB_TX_LOCK_ASSERT(txr);
988
989         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
990             adapter->link_active == 0)
991                 return (ENETDOWN);
992
993         enq = 0;
994
995         /* Process the queue */
996         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
997                 if ((err = igb_xmit(txr, &next)) != 0) {
998                         if (next == NULL) {
999                                 /* It was freed, move forward */
1000                                 drbr_advance(ifp, txr->br);
1001                         } else {
1002                                 /* 
1003                                  * Still have one left, it may not be
1004                                  * the same since the transmit function
1005                                  * may have changed it.
1006                                  */
1007                                 drbr_putback(ifp, txr->br, next);
1008                         }
1009                         break;
1010                 }
1011                 drbr_advance(ifp, txr->br);
1012                 enq++;
1013                 ifp->if_obytes += next->m_pkthdr.len;
1014                 if (next->m_flags & M_MCAST)
1015                         ifp->if_omcasts++;
1016                 ETHER_BPF_MTAP(ifp, next);
1017                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1018                         break;
1019         }
1020         if (enq > 0) {
1021                 /* Set the watchdog */
1022                 txr->queue_status |= IGB_QUEUE_WORKING;
1023                 txr->watchdog_time = ticks;
1024         }
1025         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1026                 igb_txeof(txr);
1027         if (txr->tx_avail <= IGB_MAX_SCATTER)
1028                 txr->queue_status |= IGB_QUEUE_DEPLETED;
1029         return (err);
1030 }
1031
1032 /*
1033  * Called from a taskqueue to drain queued transmit packets.
1034  */
1035 static void
1036 igb_deferred_mq_start(void *arg, int pending)
1037 {
1038         struct tx_ring *txr = arg;
1039         struct adapter *adapter = txr->adapter;
1040         struct ifnet *ifp = adapter->ifp;
1041
1042         IGB_TX_LOCK(txr);
1043         if (!drbr_empty(ifp, txr->br))
1044                 igb_mq_start_locked(ifp, txr);
1045         IGB_TX_UNLOCK(txr);
1046 }
1047
1048 /*
1049 ** Flush all ring buffers
1050 */
1051 static void
1052 igb_qflush(struct ifnet *ifp)
1053 {
1054         struct adapter  *adapter = ifp->if_softc;
1055         struct tx_ring  *txr = adapter->tx_rings;
1056         struct mbuf     *m;
1057
1058         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1059                 IGB_TX_LOCK(txr);
1060                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1061                         m_freem(m);
1062                 IGB_TX_UNLOCK(txr);
1063         }
1064         if_qflush(ifp);
1065 }
1066 #endif /* ~IGB_LEGACY_TX */
1067
1068 /*********************************************************************
1069  *  Ioctl entry point
1070  *
1071  *  igb_ioctl is called when the user wants to configure the
1072  *  interface.
1073  *
1074  *  return 0 on success, positive on failure
1075  **********************************************************************/
1076
1077 static int
1078 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1079 {
1080         struct adapter  *adapter = ifp->if_softc;
1081         struct ifreq    *ifr = (struct ifreq *)data;
1082 #if defined(INET) || defined(INET6)
1083         struct ifaddr   *ifa = (struct ifaddr *)data;
1084 #endif
1085         bool            avoid_reset = FALSE;
1086         int             error = 0;
1087
1088         if (adapter->in_detach)
1089                 return (error);
1090
1091         switch (command) {
1092         case SIOCSIFADDR:
1093 #ifdef INET
1094                 if (ifa->ifa_addr->sa_family == AF_INET)
1095                         avoid_reset = TRUE;
1096 #endif
1097 #ifdef INET6
1098                 if (ifa->ifa_addr->sa_family == AF_INET6)
1099                         avoid_reset = TRUE;
1100 #endif
1101                 /*
1102                 ** Calling init results in link renegotiation,
1103                 ** so we avoid doing it when possible.
1104                 */
1105                 if (avoid_reset) {
1106                         ifp->if_flags |= IFF_UP;
1107                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1108                                 igb_init(adapter);
1109 #ifdef INET
1110                         if (!(ifp->if_flags & IFF_NOARP))
1111                                 arp_ifinit(ifp, ifa);
1112 #endif
1113                 } else
1114                         error = ether_ioctl(ifp, command, data);
1115                 break;
1116         case SIOCSIFMTU:
1117             {
1118                 int max_frame_size;
1119
1120                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1121
1122                 IGB_CORE_LOCK(adapter);
1123                 max_frame_size = 9234;
1124                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1125                     ETHER_CRC_LEN) {
1126                         IGB_CORE_UNLOCK(adapter);
1127                         error = EINVAL;
1128                         break;
1129                 }
1130
1131                 ifp->if_mtu = ifr->ifr_mtu;
1132                 adapter->max_frame_size =
1133                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1134                 igb_init_locked(adapter);
1135                 IGB_CORE_UNLOCK(adapter);
1136                 break;
1137             }
1138         case SIOCSIFFLAGS:
1139                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1140                     SIOCSIFFLAGS (Set Interface Flags)");
1141                 IGB_CORE_LOCK(adapter);
1142                 if (ifp->if_flags & IFF_UP) {
1143                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1144                                 if ((ifp->if_flags ^ adapter->if_flags) &
1145                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1146                                         igb_disable_promisc(adapter);
1147                                         igb_set_promisc(adapter);
1148                                 }
1149                         } else
1150                                 igb_init_locked(adapter);
1151                 } else
1152                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1153                                 igb_stop(adapter);
1154                 adapter->if_flags = ifp->if_flags;
1155                 IGB_CORE_UNLOCK(adapter);
1156                 break;
1157         case SIOCADDMULTI:
1158         case SIOCDELMULTI:
1159                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1160                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1161                         IGB_CORE_LOCK(adapter);
1162                         igb_disable_intr(adapter);
1163                         igb_set_multi(adapter);
1164 #ifdef DEVICE_POLLING
1165                         if (!(ifp->if_capenable & IFCAP_POLLING))
1166 #endif
1167                                 igb_enable_intr(adapter);
1168                         IGB_CORE_UNLOCK(adapter);
1169                 }
1170                 break;
1171         case SIOCSIFMEDIA:
1172                 /* Check SOL/IDER usage */
1173                 IGB_CORE_LOCK(adapter);
1174                 if (e1000_check_reset_block(&adapter->hw)) {
1175                         IGB_CORE_UNLOCK(adapter);
1176                         device_printf(adapter->dev, "Media change is"
1177                             " blocked due to SOL/IDER session.\n");
1178                         break;
1179                 }
1180                 IGB_CORE_UNLOCK(adapter);
1181         case SIOCGIFMEDIA:
1182                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1183                     SIOCxIFMEDIA (Get/Set Interface Media)");
1184                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1185                 break;
1186         case SIOCSIFCAP:
1187             {
1188                 int mask, reinit;
1189
1190                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1191                 reinit = 0;
1192                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1193 #ifdef DEVICE_POLLING
1194                 if (mask & IFCAP_POLLING) {
1195                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1196                                 error = ether_poll_register(igb_poll, ifp);
1197                                 if (error)
1198                                         return (error);
1199                                 IGB_CORE_LOCK(adapter);
1200                                 igb_disable_intr(adapter);
1201                                 ifp->if_capenable |= IFCAP_POLLING;
1202                                 IGB_CORE_UNLOCK(adapter);
1203                         } else {
1204                                 error = ether_poll_deregister(ifp);
1205                                 /* Enable interrupt even in error case */
1206                                 IGB_CORE_LOCK(adapter);
1207                                 igb_enable_intr(adapter);
1208                                 ifp->if_capenable &= ~IFCAP_POLLING;
1209                                 IGB_CORE_UNLOCK(adapter);
1210                         }
1211                 }
1212 #endif
1213                 if (mask & IFCAP_HWCSUM) {
1214                         ifp->if_capenable ^= IFCAP_HWCSUM;
1215                         reinit = 1;
1216                 }
1217                 if (mask & IFCAP_TSO4) {
1218                         ifp->if_capenable ^= IFCAP_TSO4;
1219                         reinit = 1;
1220                 }
1221                 if (mask & IFCAP_VLAN_HWTAGGING) {
1222                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1223                         reinit = 1;
1224                 }
1225                 if (mask & IFCAP_VLAN_HWFILTER) {
1226                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1227                         reinit = 1;
1228                 }
1229                 if (mask & IFCAP_VLAN_HWTSO) {
1230                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1231                         reinit = 1;
1232                 }
1233                 if (mask & IFCAP_LRO) {
1234                         ifp->if_capenable ^= IFCAP_LRO;
1235                         reinit = 1;
1236                 }
1237                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1238                         igb_init(adapter);
1239                 VLAN_CAPABILITIES(ifp);
1240                 break;
1241             }
1242
1243         default:
1244                 error = ether_ioctl(ifp, command, data);
1245                 break;
1246         }
1247
1248         return (error);
1249 }
1250
1251
1252 /*********************************************************************
1253  *  Init entry point
1254  *
1255  *  This routine is used in two ways. It is used by the stack as
1256  *  init entry point in network interface structure. It is also used
1257  *  by the driver as a hw/sw initialization routine to get to a
1258  *  consistent state.
1259  *
1260  *  return 0 on success, positive on failure
1261  **********************************************************************/
1262
1263 static void
1264 igb_init_locked(struct adapter *adapter)
1265 {
1266         struct ifnet    *ifp = adapter->ifp;
1267         device_t        dev = adapter->dev;
1268
1269         INIT_DEBUGOUT("igb_init: begin");
1270
1271         IGB_CORE_LOCK_ASSERT(adapter);
1272
1273         igb_disable_intr(adapter);
1274         callout_stop(&adapter->timer);
1275
1276         /* Get the latest mac address, User can use a LAA */
1277         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1278               ETHER_ADDR_LEN);
1279
1280         /* Put the address into the Receive Address Array */
1281         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1282
1283         igb_reset(adapter);
1284         igb_update_link_status(adapter);
1285
1286         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1287
1288         /* Set hardware offload abilities */
1289         ifp->if_hwassist = 0;
1290         if (ifp->if_capenable & IFCAP_TXCSUM) {
1291                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1292 #if __FreeBSD_version >= 800000
1293                 if (adapter->hw.mac.type == e1000_82576)
1294                         ifp->if_hwassist |= CSUM_SCTP;
1295 #endif
1296         }
1297
1298         if (ifp->if_capenable & IFCAP_TSO4)
1299                 ifp->if_hwassist |= CSUM_TSO;
1300
1301         /* Configure for OS presence */
1302         igb_init_manageability(adapter);
1303
1304         /* Prepare transmit descriptors and buffers */
1305         igb_setup_transmit_structures(adapter);
1306         igb_initialize_transmit_units(adapter);
1307
1308         /* Setup Multicast table */
1309         igb_set_multi(adapter);
1310
1311         /*
1312         ** Figure out the desired mbuf pool
1313         ** for doing jumbo/packetsplit
1314         */
1315         if (adapter->max_frame_size <= 2048)
1316                 adapter->rx_mbuf_sz = MCLBYTES;
1317         else if (adapter->max_frame_size <= 4096)
1318                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1319         else
1320                 adapter->rx_mbuf_sz = MJUM9BYTES;
1321
1322         /* Prepare receive descriptors and buffers */
1323         if (igb_setup_receive_structures(adapter)) {
1324                 device_printf(dev, "Could not setup receive structures\n");
1325                 return;
1326         }
1327         igb_initialize_receive_units(adapter);
1328
1329         /* Enable VLAN support */
1330         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1331                 igb_setup_vlan_hw_support(adapter);
1332                                 
1333         /* Don't lose promiscuous settings */
1334         igb_set_promisc(adapter);
1335
1336         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1337         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1338
1339         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1340         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1341
1342         if (adapter->msix > 1) /* Set up queue routing */
1343                 igb_configure_queues(adapter);
1344
1345         /* this clears any pending interrupts */
1346         E1000_READ_REG(&adapter->hw, E1000_ICR);
1347 #ifdef DEVICE_POLLING
1348         /*
1349          * Only enable interrupts if we are not polling, make sure
1350          * they are off otherwise.
1351          */
1352         if (ifp->if_capenable & IFCAP_POLLING)
1353                 igb_disable_intr(adapter);
1354         else
1355 #endif /* DEVICE_POLLING */
1356         {
1357                 igb_enable_intr(adapter);
1358                 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1359         }
1360
1361         /* Set Energy Efficient Ethernet */
1362         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1363                 e1000_set_eee_i350(&adapter->hw);
1364 }
1365
1366 static void
1367 igb_init(void *arg)
1368 {
1369         struct adapter *adapter = arg;
1370
1371         IGB_CORE_LOCK(adapter);
1372         igb_init_locked(adapter);
1373         IGB_CORE_UNLOCK(adapter);
1374 }
1375
1376
1377 static void
1378 igb_handle_que(void *context, int pending)
1379 {
1380         struct igb_queue *que = context;
1381         struct adapter *adapter = que->adapter;
1382         struct tx_ring *txr = que->txr;
1383         struct ifnet    *ifp = adapter->ifp;
1384
1385         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1386                 bool    more;
1387
1388                 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1389
1390                 IGB_TX_LOCK(txr);
1391                 igb_txeof(txr);
1392 #ifndef IGB_LEGACY_TX
1393                 /* Process the stack queue only if not depleted */
1394                 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1395                     !drbr_empty(ifp, txr->br))
1396                         igb_mq_start_locked(ifp, txr);
1397 #else
1398                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1399                         igb_start_locked(txr, ifp);
1400 #endif
1401                 IGB_TX_UNLOCK(txr);
1402                 /* Do we need another? */
1403                 if (more) {
1404                         taskqueue_enqueue(que->tq, &que->que_task);
1405                         return;
1406                 }
1407         }
1408
1409 #ifdef DEVICE_POLLING
1410         if (ifp->if_capenable & IFCAP_POLLING)
1411                 return;
1412 #endif
1413         /* Reenable this interrupt */
1414         if (que->eims)
1415                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1416         else
1417                 igb_enable_intr(adapter);
1418 }
1419
1420 /* Deal with link in a sleepable context */
1421 static void
1422 igb_handle_link(void *context, int pending)
1423 {
1424         struct adapter *adapter = context;
1425
1426         IGB_CORE_LOCK(adapter);
1427         igb_handle_link_locked(adapter);
1428         IGB_CORE_UNLOCK(adapter);
1429 }
1430
1431 static void
1432 igb_handle_link_locked(struct adapter *adapter)
1433 {
1434         struct tx_ring  *txr = adapter->tx_rings;
1435         struct ifnet *ifp = adapter->ifp;
1436
1437         IGB_CORE_LOCK_ASSERT(adapter);
1438         adapter->hw.mac.get_link_status = 1;
1439         igb_update_link_status(adapter);
1440         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1441                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1442                         IGB_TX_LOCK(txr);
1443 #ifndef IGB_LEGACY_TX
1444                         /* Process the stack queue only if not depleted */
1445                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1446                             !drbr_empty(ifp, txr->br))
1447                                 igb_mq_start_locked(ifp, txr);
1448 #else
1449                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1450                                 igb_start_locked(txr, ifp);
1451 #endif
1452                         IGB_TX_UNLOCK(txr);
1453                 }
1454         }
1455 }
1456
1457 /*********************************************************************
1458  *
1459  *  MSI/Legacy Deferred
1460  *  Interrupt Service routine  
1461  *
1462  *********************************************************************/
1463 static int
1464 igb_irq_fast(void *arg)
1465 {
1466         struct adapter          *adapter = arg;
1467         struct igb_queue        *que = adapter->queues;
1468         u32                     reg_icr;
1469
1470
1471         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1472
1473         /* Hot eject?  */
1474         if (reg_icr == 0xffffffff)
1475                 return FILTER_STRAY;
1476
1477         /* Definitely not our interrupt.  */
1478         if (reg_icr == 0x0)
1479                 return FILTER_STRAY;
1480
1481         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1482                 return FILTER_STRAY;
1483
1484         /*
1485          * Mask interrupts until the taskqueue is finished running.  This is
1486          * cheap, just assume that it is needed.  This also works around the
1487          * MSI message reordering errata on certain systems.
1488          */
1489         igb_disable_intr(adapter);
1490         taskqueue_enqueue(que->tq, &que->que_task);
1491
1492         /* Link status change */
1493         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1494                 taskqueue_enqueue(que->tq, &adapter->link_task);
1495
1496         if (reg_icr & E1000_ICR_RXO)
1497                 adapter->rx_overruns++;
1498         return FILTER_HANDLED;
1499 }
1500
1501 #ifdef DEVICE_POLLING
1502 #if __FreeBSD_version >= 800000
1503 #define POLL_RETURN_COUNT(a) (a)
1504 static int
1505 #else
1506 #define POLL_RETURN_COUNT(a)
1507 static void
1508 #endif
1509 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1510 {
1511         struct adapter          *adapter = ifp->if_softc;
1512         struct igb_queue        *que;
1513         struct tx_ring          *txr;
1514         u32                     reg_icr, rx_done = 0;
1515         u32                     loop = IGB_MAX_LOOP;
1516         bool                    more;
1517
1518         IGB_CORE_LOCK(adapter);
1519         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1520                 IGB_CORE_UNLOCK(adapter);
1521                 return POLL_RETURN_COUNT(rx_done);
1522         }
1523
1524         if (cmd == POLL_AND_CHECK_STATUS) {
1525                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1526                 /* Link status change */
1527                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1528                         igb_handle_link_locked(adapter);
1529
1530                 if (reg_icr & E1000_ICR_RXO)
1531                         adapter->rx_overruns++;
1532         }
1533         IGB_CORE_UNLOCK(adapter);
1534
1535         for (int i = 0; i < adapter->num_queues; i++) {
1536                 que = &adapter->queues[i];
1537                 txr = que->txr;
1538
1539                 igb_rxeof(que, count, &rx_done);
1540
1541                 IGB_TX_LOCK(txr);
1542                 do {
1543                         more = igb_txeof(txr);
1544                 } while (loop-- && more);
1545 #ifndef IGB_LEGACY_TX
1546                 if (!drbr_empty(ifp, txr->br))
1547                         igb_mq_start_locked(ifp, txr);
1548 #else
1549                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1550                         igb_start_locked(txr, ifp);
1551 #endif
1552                 IGB_TX_UNLOCK(txr);
1553         }
1554
1555         return POLL_RETURN_COUNT(rx_done);
1556 }
1557 #endif /* DEVICE_POLLING */
1558
1559 /*********************************************************************
1560  *
1561  *  MSIX Que Interrupt Service routine
1562  *
1563  **********************************************************************/
1564 static void
1565 igb_msix_que(void *arg)
1566 {
1567         struct igb_queue *que = arg;
1568         struct adapter *adapter = que->adapter;
1569         struct ifnet   *ifp = adapter->ifp;
1570         struct tx_ring *txr = que->txr;
1571         struct rx_ring *rxr = que->rxr;
1572         u32             newitr = 0;
1573         bool            more_rx;
1574
1575         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1576         ++que->irqs;
1577
1578         IGB_TX_LOCK(txr);
1579         igb_txeof(txr);
1580 #ifndef IGB_LEGACY_TX
1581         /* Process the stack queue only if not depleted */
1582         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1583             !drbr_empty(ifp, txr->br))
1584                 igb_mq_start_locked(ifp, txr);
1585 #else
1586         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1587                 igb_start_locked(txr, ifp);
1588 #endif
1589         IGB_TX_UNLOCK(txr);
1590
1591         more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1592
1593         if (adapter->enable_aim == FALSE)
1594                 goto no_calc;
1595         /*
1596         ** Do Adaptive Interrupt Moderation:
1597         **  - Write out last calculated setting
1598         **  - Calculate based on average size over
1599         **    the last interval.
1600         */
1601         if (que->eitr_setting)
1602                 E1000_WRITE_REG(&adapter->hw,
1603                     E1000_EITR(que->msix), que->eitr_setting);
1604  
1605         que->eitr_setting = 0;
1606
1607         /* Idle, do nothing */
1608         if ((txr->bytes == 0) && (rxr->bytes == 0))
1609                 goto no_calc;
1610                                 
1611         /* Used half Default if sub-gig */
1612         if (adapter->link_speed != 1000)
1613                 newitr = IGB_DEFAULT_ITR / 2;
1614         else {
1615                 if ((txr->bytes) && (txr->packets))
1616                         newitr = txr->bytes/txr->packets;
1617                 if ((rxr->bytes) && (rxr->packets))
1618                         newitr = max(newitr,
1619                             (rxr->bytes / rxr->packets));
1620                 newitr += 24; /* account for hardware frame, crc */
1621                 /* set an upper boundary */
1622                 newitr = min(newitr, 3000);
1623                 /* Be nice to the mid range */
1624                 if ((newitr > 300) && (newitr < 1200))
1625                         newitr = (newitr / 3);
1626                 else
1627                         newitr = (newitr / 2);
1628         }
1629         newitr &= 0x7FFC;  /* Mask invalid bits */
1630         if (adapter->hw.mac.type == e1000_82575)
1631                 newitr |= newitr << 16;
1632         else
1633                 newitr |= E1000_EITR_CNT_IGNR;
1634                  
1635         /* save for next interrupt */
1636         que->eitr_setting = newitr;
1637
1638         /* Reset state */
1639         txr->bytes = 0;
1640         txr->packets = 0;
1641         rxr->bytes = 0;
1642         rxr->packets = 0;
1643
1644 no_calc:
1645         /* Schedule a clean task if needed*/
1646         if (more_rx)
1647                 taskqueue_enqueue(que->tq, &que->que_task);
1648         else
1649                 /* Reenable this interrupt */
1650                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1651         return;
1652 }
1653
1654
1655 /*********************************************************************
1656  *
1657  *  MSIX Link Interrupt Service routine
1658  *
1659  **********************************************************************/
1660
1661 static void
1662 igb_msix_link(void *arg)
1663 {
1664         struct adapter  *adapter = arg;
1665         u32             icr;
1666
1667         ++adapter->link_irq;
1668         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1669         if (!(icr & E1000_ICR_LSC))
1670                 goto spurious;
1671         igb_handle_link(adapter, 0);
1672
1673 spurious:
1674         /* Rearm */
1675         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1676         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1677         return;
1678 }
1679
1680
1681 /*********************************************************************
1682  *
1683  *  Media Ioctl callback
1684  *
1685  *  This routine is called whenever the user queries the status of
1686  *  the interface using ifconfig.
1687  *
1688  **********************************************************************/
1689 static void
1690 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1691 {
1692         struct adapter *adapter = ifp->if_softc;
1693
1694         INIT_DEBUGOUT("igb_media_status: begin");
1695
1696         IGB_CORE_LOCK(adapter);
1697         igb_update_link_status(adapter);
1698
1699         ifmr->ifm_status = IFM_AVALID;
1700         ifmr->ifm_active = IFM_ETHER;
1701
1702         if (!adapter->link_active) {
1703                 IGB_CORE_UNLOCK(adapter);
1704                 return;
1705         }
1706
1707         ifmr->ifm_status |= IFM_ACTIVE;
1708
1709         switch (adapter->link_speed) {
1710         case 10:
1711                 ifmr->ifm_active |= IFM_10_T;
1712                 break;
1713         case 100:
1714                 /*
1715                 ** Support for 100Mb SFP - these are Fiber 
1716                 ** but the media type appears as serdes
1717                 */
1718                 if (adapter->hw.phy.media_type ==
1719                     e1000_media_type_internal_serdes)
1720                         ifmr->ifm_active |= IFM_100_FX;
1721                 else
1722                         ifmr->ifm_active |= IFM_100_TX;
1723                 break;
1724         case 1000:
1725                 ifmr->ifm_active |= IFM_1000_T;
1726                 break;
1727         }
1728
1729         if (adapter->link_duplex == FULL_DUPLEX)
1730                 ifmr->ifm_active |= IFM_FDX;
1731         else
1732                 ifmr->ifm_active |= IFM_HDX;
1733
1734         IGB_CORE_UNLOCK(adapter);
1735 }
1736
1737 /*********************************************************************
1738  *
1739  *  Media Ioctl callback
1740  *
1741  *  This routine is called when the user changes speed/duplex using
1742  *  media/mediopt option with ifconfig.
1743  *
1744  **********************************************************************/
1745 static int
1746 igb_media_change(struct ifnet *ifp)
1747 {
1748         struct adapter *adapter = ifp->if_softc;
1749         struct ifmedia  *ifm = &adapter->media;
1750
1751         INIT_DEBUGOUT("igb_media_change: begin");
1752
1753         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1754                 return (EINVAL);
1755
1756         IGB_CORE_LOCK(adapter);
1757         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1758         case IFM_AUTO:
1759                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1760                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1761                 break;
1762         case IFM_1000_LX:
1763         case IFM_1000_SX:
1764         case IFM_1000_T:
1765                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1766                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1767                 break;
1768         case IFM_100_TX:
1769                 adapter->hw.mac.autoneg = FALSE;
1770                 adapter->hw.phy.autoneg_advertised = 0;
1771                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1772                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1773                 else
1774                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1775                 break;
1776         case IFM_10_T:
1777                 adapter->hw.mac.autoneg = FALSE;
1778                 adapter->hw.phy.autoneg_advertised = 0;
1779                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1780                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1781                 else
1782                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1783                 break;
1784         default:
1785                 device_printf(adapter->dev, "Unsupported media type\n");
1786         }
1787
1788         igb_init_locked(adapter);
1789         IGB_CORE_UNLOCK(adapter);
1790
1791         return (0);
1792 }
1793
1794
1795 /*********************************************************************
1796  *
1797  *  This routine maps the mbufs to Advanced TX descriptors.
1798  *  
1799  **********************************************************************/
1800 static int
1801 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1802 {
1803         struct adapter          *adapter = txr->adapter;
1804         bus_dma_segment_t       segs[IGB_MAX_SCATTER];
1805         bus_dmamap_t            map;
1806         struct igb_tx_buffer    *tx_buffer, *tx_buffer_mapped;
1807         union e1000_adv_tx_desc *txd = NULL;
1808         struct mbuf             *m_head = *m_headp;
1809         struct ether_vlan_header *eh = NULL;
1810         struct ip               *ip = NULL;
1811         struct tcphdr           *th = NULL;
1812         u32                     hdrlen, cmd_type_len, olinfo_status = 0;
1813         int                     ehdrlen, poff;
1814         int                     nsegs, i, first, last = 0;
1815         int                     error, do_tso, remap = 1;
1816
1817         /* Set basic descriptor constants */
1818         cmd_type_len = E1000_ADVTXD_DTYP_DATA;
1819         cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1820         if (m_head->m_flags & M_VLANTAG)
1821                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1822
1823 retry:
1824         m_head = *m_headp;
1825         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1826         hdrlen = ehdrlen = poff = 0;
1827
1828         /*
1829          * Intel recommends entire IP/TCP header length reside in a single
1830          * buffer. If multiple descriptors are used to describe the IP and
1831          * TCP header, each descriptor should describe one or more
1832          * complete headers; descriptors referencing only parts of headers
1833          * are not supported. If all layer headers are not coalesced into
1834          * a single buffer, each buffer should not cross a 4KB boundary,
1835          * or be larger than the maximum read request size.
1836          * Controller also requires modifing IP/TCP header to make TSO work
1837          * so we firstly get a writable mbuf chain then coalesce ethernet/
1838          * IP/TCP header into a single buffer to meet the requirement of
1839          * controller. This also simplifies IP/TCP/UDP checksum offloading
1840          * which also has similiar restrictions.
1841          */
1842         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1843                 if (do_tso || (m_head->m_next != NULL && 
1844                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1845                         if (M_WRITABLE(*m_headp) == 0) {
1846                                 m_head = m_dup(*m_headp, M_NOWAIT);
1847                                 m_freem(*m_headp);
1848                                 if (m_head == NULL) {
1849                                         *m_headp = NULL;
1850                                         return (ENOBUFS);
1851                                 }
1852                                 *m_headp = m_head;
1853                         }
1854                 }
1855                 /*
1856                  * Assume IPv4, we don't have TSO/checksum offload support
1857                  * for IPv6 yet.
1858                  */
1859                 ehdrlen = sizeof(struct ether_header);
1860                 m_head = m_pullup(m_head, ehdrlen);
1861                 if (m_head == NULL) {
1862                         *m_headp = NULL;
1863                         return (ENOBUFS);
1864                 }
1865                 eh = mtod(m_head, struct ether_vlan_header *);
1866                 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1867                         ehdrlen = sizeof(struct ether_vlan_header);
1868                         m_head = m_pullup(m_head, ehdrlen);
1869                         if (m_head == NULL) {
1870                                 *m_headp = NULL;
1871                                 return (ENOBUFS);
1872                         }
1873                 }
1874                 m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip));
1875                 if (m_head == NULL) {
1876                         *m_headp = NULL;
1877                         return (ENOBUFS);
1878                 }
1879                 ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1880                 poff = ehdrlen + (ip->ip_hl << 2);
1881                 if (do_tso) {
1882                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1883                         if (m_head == NULL) {
1884                                 *m_headp = NULL;
1885                                 return (ENOBUFS);
1886                         }
1887                         /*
1888                          * The pseudo TCP checksum does not include TCP payload
1889                          * length so driver should recompute the checksum here
1890                          * what hardware expect to see. This is adherence of
1891                          * Microsoft's Large Send specification.
1892                          */
1893                         th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1894                         th->th_sum = in_pseudo(ip->ip_src.s_addr,
1895                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1896                         /* Keep track of the full header length */
1897                         hdrlen = poff + (th->th_off << 2);
1898                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1899                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1900                         if (m_head == NULL) {
1901                                 *m_headp = NULL;
1902                                 return (ENOBUFS);
1903                         }
1904                         th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1905                         m_head = m_pullup(m_head, poff + (th->th_off << 2));
1906                         if (m_head == NULL) {
1907                                 *m_headp = NULL;
1908                                 return (ENOBUFS);
1909                         }
1910                         ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1911                         th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1912                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1913                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1914                         if (m_head == NULL) {
1915                                 *m_headp = NULL;
1916                                 return (ENOBUFS);
1917                         }
1918                         ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1919                 }
1920                 *m_headp = m_head;
1921         }
1922
1923         /*
1924          * Map the packet for DMA
1925          *
1926          * Capture the first descriptor index,
1927          * this descriptor will have the index
1928          * of the EOP which is the only one that
1929          * now gets a DONE bit writeback.
1930          */
1931         first = txr->next_avail_desc;
1932         tx_buffer = &txr->tx_buffers[first];
1933         tx_buffer_mapped = tx_buffer;
1934         map = tx_buffer->map;
1935
1936         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1937             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1938
1939         /*
1940          * There are two types of errors we can (try) to handle:
1941          * - EFBIG means the mbuf chain was too long and bus_dma ran
1942          *   out of segments.  Defragment the mbuf chain and try again.
1943          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1944          *   at this point in time.  Defer sending and try again later.
1945          * All other errors, in particular EINVAL, are fatal and prevent the
1946          * mbuf chain from ever going through.  Drop it and report error.
1947          */
1948         if (error == EFBIG && remap) {
1949                 struct mbuf *m;
1950
1951                 m = m_defrag(*m_headp, M_NOWAIT);
1952                 if (m == NULL) {
1953                         adapter->mbuf_defrag_failed++;
1954                         m_freem(*m_headp);
1955                         *m_headp = NULL;
1956                         return (ENOBUFS);
1957                 }
1958                 *m_headp = m;
1959
1960                 /* Try it again, but only once */
1961                 remap = 0;
1962                 goto retry;
1963         } else if (error == ENOMEM) {
1964                 adapter->no_tx_dma_setup++;
1965                 return (error);
1966         } else if (error != 0) {
1967                 adapter->no_tx_dma_setup++;
1968                 m_freem(*m_headp);
1969                 *m_headp = NULL;
1970                 return (error);
1971         }
1972
1973         /*
1974         ** Make sure we don't overrun the ring,
1975         ** we need nsegs descriptors and one for
1976         ** the context descriptor used for the
1977         ** offloads.
1978         */
1979         if ((nsegs + 1) > (txr->tx_avail - 2)) {
1980                 txr->no_desc_avail++;
1981                 bus_dmamap_unload(txr->txtag, map);
1982                 return (ENOBUFS);
1983         }
1984         m_head = *m_headp;
1985
1986         /* Do hardware assists:
1987          * Set up the context descriptor, used
1988          * when any hardware offload is done.
1989          * This includes CSUM, VLAN, and TSO.
1990          * It will use the first descriptor.
1991          */
1992
1993         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1994                 if (igb_tso_setup(txr, m_head, ehdrlen, ip, th)) {
1995                         cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1996                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1997                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1998                 } else
1999                         return (ENXIO);
2000         } else if (igb_tx_ctx_setup(txr, m_head))
2001                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2002
2003         /* Calculate payload length */
2004         olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
2005             << E1000_ADVTXD_PAYLEN_SHIFT);
2006
2007         /* 82575 needs the queue index added */
2008         if (adapter->hw.mac.type == e1000_82575)
2009                 olinfo_status |= txr->me << 4;
2010
2011         /* Set up our transmit descriptors */
2012         i = txr->next_avail_desc;
2013         for (int j = 0; j < nsegs; j++) {
2014                 bus_size_t seg_len;
2015                 bus_addr_t seg_addr;
2016
2017                 tx_buffer = &txr->tx_buffers[i];
2018                 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
2019                 seg_addr = segs[j].ds_addr;
2020                 seg_len  = segs[j].ds_len;
2021
2022                 txd->read.buffer_addr = htole64(seg_addr);
2023                 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
2024                 txd->read.olinfo_status = htole32(olinfo_status);
2025                 last = i;
2026                 if (++i == adapter->num_tx_desc)
2027                         i = 0;
2028                 tx_buffer->m_head = NULL;
2029                 tx_buffer->next_eop = -1;
2030         }
2031
2032         txr->next_avail_desc = i;
2033         txr->tx_avail -= nsegs;
2034         tx_buffer->m_head = m_head;
2035
2036         /*
2037         ** Here we swap the map so the last descriptor,
2038         ** which gets the completion interrupt has the
2039         ** real map, and the first descriptor gets the
2040         ** unused map from this descriptor.
2041         */
2042         tx_buffer_mapped->map = tx_buffer->map;
2043         tx_buffer->map = map;
2044         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2045
2046         /*
2047          * Last Descriptor of Packet
2048          * needs End Of Packet (EOP)
2049          * and Report Status (RS)
2050          */
2051         txd->read.cmd_type_len |=
2052             htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
2053         /*
2054          * Keep track in the first buffer which
2055          * descriptor will be written back
2056          */
2057         tx_buffer = &txr->tx_buffers[first];
2058         tx_buffer->next_eop = last;
2059         /* Update the watchdog time early and often */
2060         txr->watchdog_time = ticks;
2061
2062         /*
2063          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2064          * that this frame is available to transmit.
2065          */
2066         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2067             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2068         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2069         ++txr->tx_packets;
2070
2071         return (0);
2072 }
2073 static void
2074 igb_set_promisc(struct adapter *adapter)
2075 {
2076         struct ifnet    *ifp = adapter->ifp;
2077         struct e1000_hw *hw = &adapter->hw;
2078         u32             reg;
2079
2080         if (adapter->vf_ifp) {
2081                 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
2082                 return;
2083         }
2084
2085         reg = E1000_READ_REG(hw, E1000_RCTL);
2086         if (ifp->if_flags & IFF_PROMISC) {
2087                 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2088                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
2089         } else if (ifp->if_flags & IFF_ALLMULTI) {
2090                 reg |= E1000_RCTL_MPE;
2091                 reg &= ~E1000_RCTL_UPE;
2092                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
2093         }
2094 }
2095
2096 static void
2097 igb_disable_promisc(struct adapter *adapter)
2098 {
2099         struct e1000_hw *hw = &adapter->hw;
2100         struct ifnet    *ifp = adapter->ifp;
2101         u32             reg;
2102         int             mcnt = 0;
2103
2104         if (adapter->vf_ifp) {
2105                 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2106                 return;
2107         }
2108         reg = E1000_READ_REG(hw, E1000_RCTL);
2109         reg &=  (~E1000_RCTL_UPE);
2110         if (ifp->if_flags & IFF_ALLMULTI)
2111                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2112         else {
2113                 struct  ifmultiaddr *ifma;
2114 #if __FreeBSD_version < 800000
2115                 IF_ADDR_LOCK(ifp);
2116 #else   
2117                 if_maddr_rlock(ifp);
2118 #endif
2119                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2120                         if (ifma->ifma_addr->sa_family != AF_LINK)
2121                                 continue;
2122                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2123                                 break;
2124                         mcnt++;
2125                 }
2126 #if __FreeBSD_version < 800000
2127                 IF_ADDR_UNLOCK(ifp);
2128 #else
2129                 if_maddr_runlock(ifp);
2130 #endif
2131         }
2132         /* Don't disable if in MAX groups */
2133         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2134                 reg &=  (~E1000_RCTL_MPE);
2135         E1000_WRITE_REG(hw, E1000_RCTL, reg);
2136 }
2137
2138
2139 /*********************************************************************
2140  *  Multicast Update
2141  *
2142  *  This routine is called whenever multicast address list is updated.
2143  *
2144  **********************************************************************/
2145
2146 static void
2147 igb_set_multi(struct adapter *adapter)
2148 {
2149         struct ifnet    *ifp = adapter->ifp;
2150         struct ifmultiaddr *ifma;
2151         u32 reg_rctl = 0;
2152         u8  *mta;
2153
2154         int mcnt = 0;
2155
2156         IOCTL_DEBUGOUT("igb_set_multi: begin");
2157
2158         mta = adapter->mta;
2159         bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2160             MAX_NUM_MULTICAST_ADDRESSES);
2161
2162 #if __FreeBSD_version < 800000
2163         IF_ADDR_LOCK(ifp);
2164 #else
2165         if_maddr_rlock(ifp);
2166 #endif
2167         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2168                 if (ifma->ifma_addr->sa_family != AF_LINK)
2169                         continue;
2170
2171                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2172                         break;
2173
2174                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2175                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2176                 mcnt++;
2177         }
2178 #if __FreeBSD_version < 800000
2179         IF_ADDR_UNLOCK(ifp);
2180 #else
2181         if_maddr_runlock(ifp);
2182 #endif
2183
2184         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2185                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2186                 reg_rctl |= E1000_RCTL_MPE;
2187                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2188         } else
2189                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2190 }
2191
2192
2193 /*********************************************************************
2194  *  Timer routine:
2195  *      This routine checks for link status,
2196  *      updates statistics, and does the watchdog.
2197  *
2198  **********************************************************************/
2199
2200 static void
2201 igb_local_timer(void *arg)
2202 {
2203         struct adapter          *adapter = arg;
2204         device_t                dev = adapter->dev;
2205         struct ifnet            *ifp = adapter->ifp;
2206         struct tx_ring          *txr = adapter->tx_rings;
2207         struct igb_queue        *que = adapter->queues;
2208         int                     hung = 0, busy = 0;
2209
2210
2211         IGB_CORE_LOCK_ASSERT(adapter);
2212
2213         igb_update_link_status(adapter);
2214         igb_update_stats_counters(adapter);
2215
2216         /*
2217         ** Check the TX queues status
2218         **      - central locked handling of OACTIVE
2219         **      - watchdog only if all queues show hung
2220         */
2221         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2222                 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2223                     (adapter->pause_frames == 0))
2224                         ++hung;
2225                 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2226                         ++busy;
2227                 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2228                         taskqueue_enqueue(que->tq, &que->que_task);
2229         }
2230         if (hung == adapter->num_queues)
2231                 goto timeout;
2232         if (busy == adapter->num_queues)
2233                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2234         else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2235             (busy < adapter->num_queues))
2236                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2237
2238         adapter->pause_frames = 0;
2239         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2240 #ifndef DEVICE_POLLING
2241         /* Schedule all queue interrupts - deadlock protection */
2242         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2243 #endif
2244         return;
2245
2246 timeout:
2247         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2248         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2249             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2250             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2251         device_printf(dev,"TX(%d) desc avail = %d,"
2252             "Next TX to Clean = %d\n",
2253             txr->me, txr->tx_avail, txr->next_to_clean);
2254         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2255         adapter->watchdog_events++;
2256         igb_init_locked(adapter);
2257 }
2258
2259 static void
2260 igb_update_link_status(struct adapter *adapter)
2261 {
2262         struct e1000_hw         *hw = &adapter->hw;
2263         struct e1000_fc_info    *fc = &hw->fc;
2264         struct ifnet            *ifp = adapter->ifp;
2265         device_t                dev = adapter->dev;
2266         struct tx_ring          *txr = adapter->tx_rings;
2267         u32                     link_check, thstat, ctrl;
2268         char                    *flowctl = NULL;
2269
2270         link_check = thstat = ctrl = 0;
2271
2272         /* Get the cached link value or read for real */
2273         switch (hw->phy.media_type) {
2274         case e1000_media_type_copper:
2275                 if (hw->mac.get_link_status) {
2276                         /* Do the work to read phy */
2277                         e1000_check_for_link(hw);
2278                         link_check = !hw->mac.get_link_status;
2279                 } else
2280                         link_check = TRUE;
2281                 break;
2282         case e1000_media_type_fiber:
2283                 e1000_check_for_link(hw);
2284                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2285                                  E1000_STATUS_LU);
2286                 break;
2287         case e1000_media_type_internal_serdes:
2288                 e1000_check_for_link(hw);
2289                 link_check = adapter->hw.mac.serdes_has_link;
2290                 break;
2291         /* VF device is type_unknown */
2292         case e1000_media_type_unknown:
2293                 e1000_check_for_link(hw);
2294                 link_check = !hw->mac.get_link_status;
2295                 /* Fall thru */
2296         default:
2297                 break;
2298         }
2299
2300         /* Check for thermal downshift or shutdown */
2301         if (hw->mac.type == e1000_i350) {
2302                 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2303                 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2304         }
2305
2306         /* Get the flow control for display */
2307         switch (fc->current_mode) {
2308         case e1000_fc_rx_pause:
2309                 flowctl = "RX";
2310                 break;  
2311         case e1000_fc_tx_pause:
2312                 flowctl = "TX";
2313                 break;  
2314         case e1000_fc_full:
2315                 flowctl = "Full";
2316                 break;  
2317         case e1000_fc_none:
2318         default:
2319                 flowctl = "None";
2320                 break;  
2321         }
2322
2323         /* Now we check if a transition has happened */
2324         if (link_check && (adapter->link_active == 0)) {
2325                 e1000_get_speed_and_duplex(&adapter->hw, 
2326                     &adapter->link_speed, &adapter->link_duplex);
2327                 if (bootverbose)
2328                         device_printf(dev, "Link is up %d Mbps %s,"
2329                             " Flow Control: %s\n",
2330                             adapter->link_speed,
2331                             ((adapter->link_duplex == FULL_DUPLEX) ?
2332                             "Full Duplex" : "Half Duplex"), flowctl);
2333                 adapter->link_active = 1;
2334                 ifp->if_baudrate = adapter->link_speed * 1000000;
2335                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2336                     (thstat & E1000_THSTAT_LINK_THROTTLE))
2337                         device_printf(dev, "Link: thermal downshift\n");
2338                 /* This can sleep */
2339                 if_link_state_change(ifp, LINK_STATE_UP);
2340         } else if (!link_check && (adapter->link_active == 1)) {
2341                 ifp->if_baudrate = adapter->link_speed = 0;
2342                 adapter->link_duplex = 0;
2343                 if (bootverbose)
2344                         device_printf(dev, "Link is Down\n");
2345                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2346                     (thstat & E1000_THSTAT_PWR_DOWN))
2347                         device_printf(dev, "Link: thermal shutdown\n");
2348                 adapter->link_active = 0;
2349                 /* This can sleep */
2350                 if_link_state_change(ifp, LINK_STATE_DOWN);
2351                 /* Reset queue state */
2352                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2353                         txr->queue_status = IGB_QUEUE_IDLE;
2354         }
2355 }
2356
2357 /*********************************************************************
2358  *
2359  *  This routine disables all traffic on the adapter by issuing a
2360  *  global reset on the MAC and deallocates TX/RX buffers.
2361  *
2362  **********************************************************************/
2363
2364 static void
2365 igb_stop(void *arg)
2366 {
2367         struct adapter  *adapter = arg;
2368         struct ifnet    *ifp = adapter->ifp;
2369         struct tx_ring *txr = adapter->tx_rings;
2370
2371         IGB_CORE_LOCK_ASSERT(adapter);
2372
2373         INIT_DEBUGOUT("igb_stop: begin");
2374
2375         igb_disable_intr(adapter);
2376
2377         callout_stop(&adapter->timer);
2378
2379         /* Tell the stack that the interface is no longer active */
2380         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2381         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2382
2383         /* Disarm watchdog timer. */
2384         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2385                 IGB_TX_LOCK(txr);
2386                 txr->queue_status = IGB_QUEUE_IDLE;
2387                 IGB_TX_UNLOCK(txr);
2388         }
2389
2390         e1000_reset_hw(&adapter->hw);
2391         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2392
2393         e1000_led_off(&adapter->hw);
2394         e1000_cleanup_led(&adapter->hw);
2395 }
2396
2397
2398 /*********************************************************************
2399  *
2400  *  Determine hardware revision.
2401  *
2402  **********************************************************************/
2403 static void
2404 igb_identify_hardware(struct adapter *adapter)
2405 {
2406         device_t dev = adapter->dev;
2407
2408         /* Make sure our PCI config space has the necessary stuff set */
2409         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2410         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2411             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2412                 INIT_DEBUGOUT("Memory Access and/or Bus Master "
2413                     "bits were not set!\n");
2414                 adapter->hw.bus.pci_cmd_word |=
2415                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2416                 pci_write_config(dev, PCIR_COMMAND,
2417                     adapter->hw.bus.pci_cmd_word, 2);
2418         }
2419
2420         /* Save off the information about this board */
2421         adapter->hw.vendor_id = pci_get_vendor(dev);
2422         adapter->hw.device_id = pci_get_device(dev);
2423         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2424         adapter->hw.subsystem_vendor_id =
2425             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2426         adapter->hw.subsystem_device_id =
2427             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2428
2429         /* Set MAC type early for PCI setup */
2430         e1000_set_mac_type(&adapter->hw);
2431
2432         /* Are we a VF device? */
2433         if ((adapter->hw.mac.type == e1000_vfadapt) ||
2434             (adapter->hw.mac.type == e1000_vfadapt_i350))
2435                 adapter->vf_ifp = 1;
2436         else
2437                 adapter->vf_ifp = 0;
2438 }
2439
2440 static int
2441 igb_allocate_pci_resources(struct adapter *adapter)
2442 {
2443         device_t        dev = adapter->dev;
2444         int             rid;
2445
2446         rid = PCIR_BAR(0);
2447         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2448             &rid, RF_ACTIVE);
2449         if (adapter->pci_mem == NULL) {
2450                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2451                 return (ENXIO);
2452         }
2453         adapter->osdep.mem_bus_space_tag =
2454             rman_get_bustag(adapter->pci_mem);
2455         adapter->osdep.mem_bus_space_handle =
2456             rman_get_bushandle(adapter->pci_mem);
2457         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2458
2459         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2460
2461         /* This will setup either MSI/X or MSI */
2462         adapter->msix = igb_setup_msix(adapter);
2463         adapter->hw.back = &adapter->osdep;
2464
2465         return (0);
2466 }
2467
2468 /*********************************************************************
2469  *
2470  *  Setup the Legacy or MSI Interrupt handler
2471  *
2472  **********************************************************************/
2473 static int
2474 igb_allocate_legacy(struct adapter *adapter)
2475 {
2476         device_t                dev = adapter->dev;
2477         struct igb_queue        *que = adapter->queues;
2478         int                     error, rid = 0;
2479
2480         /* Turn off all interrupts */
2481         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2482
2483         /* MSI RID is 1 */
2484         if (adapter->msix == 1)
2485                 rid = 1;
2486
2487         /* We allocate a single interrupt resource */
2488         adapter->res = bus_alloc_resource_any(dev,
2489             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2490         if (adapter->res == NULL) {
2491                 device_printf(dev, "Unable to allocate bus resource: "
2492                     "interrupt\n");
2493                 return (ENXIO);
2494         }
2495
2496 #ifndef IGB_LEGACY_TX
2497         TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start, que->txr);
2498 #endif
2499
2500         /*
2501          * Try allocating a fast interrupt and the associated deferred
2502          * processing contexts.
2503          */
2504         TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2505         /* Make tasklet for deferred link handling */
2506         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2507         que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2508             taskqueue_thread_enqueue, &que->tq);
2509         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2510             device_get_nameunit(adapter->dev));
2511         if ((error = bus_setup_intr(dev, adapter->res,
2512             INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2513             adapter, &adapter->tag)) != 0) {
2514                 device_printf(dev, "Failed to register fast interrupt "
2515                             "handler: %d\n", error);
2516                 taskqueue_free(que->tq);
2517                 que->tq = NULL;
2518                 return (error);
2519         }
2520
2521         return (0);
2522 }
2523
2524
2525 /*********************************************************************
2526  *
2527  *  Setup the MSIX Queue Interrupt handlers: 
2528  *
2529  **********************************************************************/
2530 static int
2531 igb_allocate_msix(struct adapter *adapter)
2532 {
2533         device_t                dev = adapter->dev;
2534         struct igb_queue        *que = adapter->queues;
2535         int                     error, rid, vector = 0;
2536
2537         /* Be sure to start with all interrupts disabled */
2538         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2539         E1000_WRITE_FLUSH(&adapter->hw);
2540
2541         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2542                 rid = vector +1;
2543                 que->res = bus_alloc_resource_any(dev,
2544                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2545                 if (que->res == NULL) {
2546                         device_printf(dev,
2547                             "Unable to allocate bus resource: "
2548                             "MSIX Queue Interrupt\n");
2549                         return (ENXIO);
2550                 }
2551                 error = bus_setup_intr(dev, que->res,
2552                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2553                     igb_msix_que, que, &que->tag);
2554                 if (error) {
2555                         que->res = NULL;
2556                         device_printf(dev, "Failed to register Queue handler");
2557                         return (error);
2558                 }
2559 #if __FreeBSD_version >= 800504
2560                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2561 #endif
2562                 que->msix = vector;
2563                 if (adapter->hw.mac.type == e1000_82575)
2564                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2565                 else
2566                         que->eims = 1 << vector;
2567                 /*
2568                 ** Bind the msix vector, and thus the
2569                 ** rings to the corresponding cpu.
2570                 */
2571                 if (adapter->num_queues > 1) {
2572                         if (igb_last_bind_cpu < 0)
2573                                 igb_last_bind_cpu = CPU_FIRST();
2574                         bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2575                         device_printf(dev,
2576                                 "Bound queue %d to cpu %d\n",
2577                                 i,igb_last_bind_cpu);
2578                         igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2579                 }
2580 #ifndef IGB_LEGACY_TX
2581                 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2582                     que->txr);
2583 #endif
2584                 /* Make tasklet for deferred handling */
2585                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2586                 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2587                     taskqueue_thread_enqueue, &que->tq);
2588                 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2589                     device_get_nameunit(adapter->dev));
2590         }
2591
2592         /* And Link */
2593         rid = vector + 1;
2594         adapter->res = bus_alloc_resource_any(dev,
2595             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2596         if (adapter->res == NULL) {
2597                 device_printf(dev,
2598                     "Unable to allocate bus resource: "
2599                     "MSIX Link Interrupt\n");
2600                 return (ENXIO);
2601         }
2602         if ((error = bus_setup_intr(dev, adapter->res,
2603             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2604             igb_msix_link, adapter, &adapter->tag)) != 0) {
2605                 device_printf(dev, "Failed to register Link handler");
2606                 return (error);
2607         }
2608 #if __FreeBSD_version >= 800504
2609         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2610 #endif
2611         adapter->linkvec = vector;
2612
2613         return (0);
2614 }
2615
2616
2617 static void
2618 igb_configure_queues(struct adapter *adapter)
2619 {
2620         struct  e1000_hw        *hw = &adapter->hw;
2621         struct  igb_queue       *que;
2622         u32                     tmp, ivar = 0, newitr = 0;
2623
2624         /* First turn on RSS capability */
2625         if (adapter->hw.mac.type != e1000_82575)
2626                 E1000_WRITE_REG(hw, E1000_GPIE,
2627                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2628                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2629
2630         /* Turn on MSIX */
2631         switch (adapter->hw.mac.type) {
2632         case e1000_82580:
2633         case e1000_i350:
2634         case e1000_i210:
2635         case e1000_i211:
2636         case e1000_vfadapt:
2637         case e1000_vfadapt_i350:
2638                 /* RX entries */
2639                 for (int i = 0; i < adapter->num_queues; i++) {
2640                         u32 index = i >> 1;
2641                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2642                         que = &adapter->queues[i];
2643                         if (i & 1) {
2644                                 ivar &= 0xFF00FFFF;
2645                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2646                         } else {
2647                                 ivar &= 0xFFFFFF00;
2648                                 ivar |= que->msix | E1000_IVAR_VALID;
2649                         }
2650                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2651                 }
2652                 /* TX entries */
2653                 for (int i = 0; i < adapter->num_queues; i++) {
2654                         u32 index = i >> 1;
2655                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2656                         que = &adapter->queues[i];
2657                         if (i & 1) {
2658                                 ivar &= 0x00FFFFFF;
2659                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2660                         } else {
2661                                 ivar &= 0xFFFF00FF;
2662                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2663                         }
2664                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2665                         adapter->que_mask |= que->eims;
2666                 }
2667
2668                 /* And for the link interrupt */
2669                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2670                 adapter->link_mask = 1 << adapter->linkvec;
2671                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2672                 break;
2673         case e1000_82576:
2674                 /* RX entries */
2675                 for (int i = 0; i < adapter->num_queues; i++) {
2676                         u32 index = i & 0x7; /* Each IVAR has two entries */
2677                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2678                         que = &adapter->queues[i];
2679                         if (i < 8) {
2680                                 ivar &= 0xFFFFFF00;
2681                                 ivar |= que->msix | E1000_IVAR_VALID;
2682                         } else {
2683                                 ivar &= 0xFF00FFFF;
2684                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2685                         }
2686                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2687                         adapter->que_mask |= que->eims;
2688                 }
2689                 /* TX entries */
2690                 for (int i = 0; i < adapter->num_queues; i++) {
2691                         u32 index = i & 0x7; /* Each IVAR has two entries */
2692                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2693                         que = &adapter->queues[i];
2694                         if (i < 8) {
2695                                 ivar &= 0xFFFF00FF;
2696                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2697                         } else {
2698                                 ivar &= 0x00FFFFFF;
2699                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2700                         }
2701                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2702                         adapter->que_mask |= que->eims;
2703                 }
2704
2705                 /* And for the link interrupt */
2706                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2707                 adapter->link_mask = 1 << adapter->linkvec;
2708                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2709                 break;
2710
2711         case e1000_82575:
2712                 /* enable MSI-X support*/
2713                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2714                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2715                 /* Auto-Mask interrupts upon ICR read. */
2716                 tmp |= E1000_CTRL_EXT_EIAME;
2717                 tmp |= E1000_CTRL_EXT_IRCA;
2718                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2719
2720                 /* Queues */
2721                 for (int i = 0; i < adapter->num_queues; i++) {
2722                         que = &adapter->queues[i];
2723                         tmp = E1000_EICR_RX_QUEUE0 << i;
2724                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2725                         que->eims = tmp;
2726                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2727                             i, que->eims);
2728                         adapter->que_mask |= que->eims;
2729                 }
2730
2731                 /* Link */
2732                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2733                     E1000_EIMS_OTHER);
2734                 adapter->link_mask |= E1000_EIMS_OTHER;
2735         default:
2736                 break;
2737         }
2738
2739         /* Set the starting interrupt rate */
2740         if (igb_max_interrupt_rate > 0)
2741                 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2742
2743         if (hw->mac.type == e1000_82575)
2744                 newitr |= newitr << 16;
2745         else
2746                 newitr |= E1000_EITR_CNT_IGNR;
2747
2748         for (int i = 0; i < adapter->num_queues; i++) {
2749                 que = &adapter->queues[i];
2750                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2751         }
2752
2753         return;
2754 }
2755
2756
2757 static void
2758 igb_free_pci_resources(struct adapter *adapter)
2759 {
2760         struct          igb_queue *que = adapter->queues;
2761         device_t        dev = adapter->dev;
2762         int             rid;
2763
2764         /*
2765         ** There is a slight possibility of a failure mode
2766         ** in attach that will result in entering this function
2767         ** before interrupt resources have been initialized, and
2768         ** in that case we do not want to execute the loops below
2769         ** We can detect this reliably by the state of the adapter
2770         ** res pointer.
2771         */
2772         if (adapter->res == NULL)
2773                 goto mem;
2774
2775         /*
2776          * First release all the interrupt resources:
2777          */
2778         for (int i = 0; i < adapter->num_queues; i++, que++) {
2779                 rid = que->msix + 1;
2780                 if (que->tag != NULL) {
2781                         bus_teardown_intr(dev, que->res, que->tag);
2782                         que->tag = NULL;
2783                 }
2784                 if (que->res != NULL)
2785                         bus_release_resource(dev,
2786                             SYS_RES_IRQ, rid, que->res);
2787         }
2788
2789         /* Clean the Legacy or Link interrupt last */
2790         if (adapter->linkvec) /* we are doing MSIX */
2791                 rid = adapter->linkvec + 1;
2792         else
2793                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2794
2795         que = adapter->queues;
2796         if (adapter->tag != NULL) {
2797                 taskqueue_drain(que->tq, &adapter->link_task);
2798                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2799                 adapter->tag = NULL;
2800         }
2801         if (adapter->res != NULL)
2802                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2803
2804         for (int i = 0; i < adapter->num_queues; i++, que++) {
2805                 if (que->tq != NULL) {
2806 #ifndef IGB_LEGACY_TX
2807                         taskqueue_drain(que->tq, &que->txr->txq_task);
2808 #endif
2809                         taskqueue_drain(que->tq, &que->que_task);
2810                         taskqueue_free(que->tq);
2811                 }
2812         }
2813 mem:
2814         if (adapter->msix)
2815                 pci_release_msi(dev);
2816
2817         if (adapter->msix_mem != NULL)
2818                 bus_release_resource(dev, SYS_RES_MEMORY,
2819                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2820
2821         if (adapter->pci_mem != NULL)
2822                 bus_release_resource(dev, SYS_RES_MEMORY,
2823                     PCIR_BAR(0), adapter->pci_mem);
2824
2825 }
2826
2827 /*
2828  * Setup Either MSI/X or MSI
2829  */
2830 static int
2831 igb_setup_msix(struct adapter *adapter)
2832 {
2833         device_t dev = adapter->dev;
2834         int rid, want, queues, msgs, maxqueues;
2835
2836         /* tuneable override */
2837         if (igb_enable_msix == 0)
2838                 goto msi;
2839
2840         /* First try MSI/X */
2841         rid = PCIR_BAR(IGB_MSIX_BAR);
2842         adapter->msix_mem = bus_alloc_resource_any(dev,
2843             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2844         if (!adapter->msix_mem) {
2845                 /* May not be enabled */
2846                 device_printf(adapter->dev,
2847                     "Unable to map MSIX table \n");
2848                 goto msi;
2849         }
2850
2851         msgs = pci_msix_count(dev); 
2852         if (msgs == 0) { /* system has msix disabled */
2853                 bus_release_resource(dev, SYS_RES_MEMORY,
2854                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2855                 adapter->msix_mem = NULL;
2856                 goto msi;
2857         }
2858
2859         /* Figure out a reasonable auto config value */
2860         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2861
2862         /* Manual override */
2863         if (igb_num_queues != 0)
2864                 queues = igb_num_queues;
2865
2866         /* Sanity check based on HW */
2867         switch (adapter->hw.mac.type) {
2868                 case e1000_82575:
2869                         maxqueues = 4;
2870                         break;
2871                 case e1000_82576:
2872                 case e1000_82580:
2873                 case e1000_i350:
2874                         maxqueues = 8;
2875                         break;
2876                 case e1000_i210:
2877                         maxqueues = 4;
2878                         break;
2879                 case e1000_i211:
2880                         maxqueues = 2;
2881                         break;
2882                 default:  /* VF interfaces */
2883                         maxqueues = 1;
2884                         break;
2885         }
2886         if (queues > maxqueues)
2887                 queues = maxqueues;
2888
2889         /*
2890         ** One vector (RX/TX pair) per queue
2891         ** plus an additional for Link interrupt
2892         */
2893         want = queues + 1;
2894         if (msgs >= want)
2895                 msgs = want;
2896         else {
2897                 device_printf(adapter->dev,
2898                     "MSIX Configuration Problem, "
2899                     "%d vectors configured, but %d queues wanted!\n",
2900                     msgs, want);
2901                 return (0);
2902         }
2903         if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2904                 device_printf(adapter->dev,
2905                     "Using MSIX interrupts with %d vectors\n", msgs);
2906                 adapter->num_queues = queues;
2907                 return (msgs);
2908         }
2909 msi:
2910         msgs = pci_msi_count(dev);
2911         if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0) {
2912                 device_printf(adapter->dev," Using MSI interrupt\n");
2913                 return (msgs);
2914         }
2915         return (0);
2916 }
2917
2918 /*********************************************************************
2919  *
2920  *  Set up an fresh starting state
2921  *
2922  **********************************************************************/
2923 static void
2924 igb_reset(struct adapter *adapter)
2925 {
2926         device_t        dev = adapter->dev;
2927         struct e1000_hw *hw = &adapter->hw;
2928         struct e1000_fc_info *fc = &hw->fc;
2929         struct ifnet    *ifp = adapter->ifp;
2930         u32             pba = 0;
2931         u16             hwm;
2932
2933         INIT_DEBUGOUT("igb_reset: begin");
2934
2935         /* Let the firmware know the OS is in control */
2936         igb_get_hw_control(adapter);
2937
2938         /*
2939          * Packet Buffer Allocation (PBA)
2940          * Writing PBA sets the receive portion of the buffer
2941          * the remainder is used for the transmit buffer.
2942          */
2943         switch (hw->mac.type) {
2944         case e1000_82575:
2945                 pba = E1000_PBA_32K;
2946                 break;
2947         case e1000_82576:
2948         case e1000_vfadapt:
2949                 pba = E1000_READ_REG(hw, E1000_RXPBS);
2950                 pba &= E1000_RXPBS_SIZE_MASK_82576;
2951                 break;
2952         case e1000_82580:
2953         case e1000_i350:
2954         case e1000_vfadapt_i350:
2955                 pba = E1000_READ_REG(hw, E1000_RXPBS);
2956                 pba = e1000_rxpbs_adjust_82580(pba);
2957                 break;
2958         case e1000_i210:
2959         case e1000_i211:
2960                 pba = E1000_PBA_34K;
2961         default:
2962                 break;
2963         }
2964
2965         /* Special needs in case of Jumbo frames */
2966         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2967                 u32 tx_space, min_tx, min_rx;
2968                 pba = E1000_READ_REG(hw, E1000_PBA);
2969                 tx_space = pba >> 16;
2970                 pba &= 0xffff;
2971                 min_tx = (adapter->max_frame_size +
2972                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2973                 min_tx = roundup2(min_tx, 1024);
2974                 min_tx >>= 10;
2975                 min_rx = adapter->max_frame_size;
2976                 min_rx = roundup2(min_rx, 1024);
2977                 min_rx >>= 10;
2978                 if (tx_space < min_tx &&
2979                     ((min_tx - tx_space) < pba)) {
2980                         pba = pba - (min_tx - tx_space);
2981                         /*
2982                          * if short on rx space, rx wins
2983                          * and must trump tx adjustment
2984                          */
2985                         if (pba < min_rx)
2986                                 pba = min_rx;
2987                 }
2988                 E1000_WRITE_REG(hw, E1000_PBA, pba);
2989         }
2990
2991         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2992
2993         /*
2994          * These parameters control the automatic generation (Tx) and
2995          * response (Rx) to Ethernet PAUSE frames.
2996          * - High water mark should allow for at least two frames to be
2997          *   received after sending an XOFF.
2998          * - Low water mark works best when it is very near the high water mark.
2999          *   This allows the receiver to restart by sending XON when it has
3000          *   drained a bit.
3001          */
3002         hwm = min(((pba << 10) * 9 / 10),
3003             ((pba << 10) - 2 * adapter->max_frame_size));
3004
3005         if (hw->mac.type < e1000_82576) {
3006                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3007                 fc->low_water = fc->high_water - 8;
3008         } else {
3009                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3010                 fc->low_water = fc->high_water - 16;
3011         }
3012
3013         fc->pause_time = IGB_FC_PAUSE_TIME;
3014         fc->send_xon = TRUE;
3015         if (adapter->fc)
3016                 fc->requested_mode = adapter->fc;
3017         else
3018                 fc->requested_mode = e1000_fc_default;
3019
3020         /* Issue a global reset */
3021         e1000_reset_hw(hw);
3022         E1000_WRITE_REG(hw, E1000_WUC, 0);
3023
3024         if (e1000_init_hw(hw) < 0)
3025                 device_printf(dev, "Hardware Initialization Failed\n");
3026
3027         /* Setup DMA Coalescing */
3028         if ((hw->mac.type > e1000_82580) &&
3029             (hw->mac.type != e1000_i211)) {
3030                 u32 dmac;
3031                 u32 reg = ~E1000_DMACR_DMAC_EN;
3032
3033                 if (adapter->dmac == 0) { /* Disabling it */
3034                         E1000_WRITE_REG(hw, E1000_DMACR, reg);
3035                         goto reset_out;
3036                 }
3037
3038                 /* Set starting thresholds */
3039                 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
3040                 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
3041
3042                 hwm = 64 * pba - adapter->max_frame_size / 16;
3043                 if (hwm < 64 * (pba - 6))
3044                         hwm = 64 * (pba - 6);
3045                 reg = E1000_READ_REG(hw, E1000_FCRTC);
3046                 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
3047                 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
3048                     & E1000_FCRTC_RTH_COAL_MASK);
3049                 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
3050
3051
3052                 dmac = pba - adapter->max_frame_size / 512;
3053                 if (dmac < pba - 10)
3054                         dmac = pba - 10;
3055                 reg = E1000_READ_REG(hw, E1000_DMACR);
3056                 reg &= ~E1000_DMACR_DMACTHR_MASK;
3057                 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
3058                     & E1000_DMACR_DMACTHR_MASK);
3059                 /* transition to L0x or L1 if available..*/
3060                 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
3061                 /* timer = value in adapter->dmac in 32usec intervals */
3062                 reg |= (adapter->dmac >> 5);
3063                 E1000_WRITE_REG(hw, E1000_DMACR, reg);
3064
3065                 /* Set the interval before transition */
3066                 reg = E1000_READ_REG(hw, E1000_DMCTLX);
3067                 reg |= 0x80000004;
3068                 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3069
3070                 /* free space in tx packet buffer to wake from DMA coal */
3071                 E1000_WRITE_REG(hw, E1000_DMCTXTH,
3072                     (20480 - (2 * adapter->max_frame_size)) >> 6);
3073
3074                 /* make low power state decision controlled by DMA coal */
3075                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3076                 reg &= ~E1000_PCIEMISC_LX_DECISION;
3077                 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3078                 device_printf(dev, "DMA Coalescing enabled\n");
3079
3080         } else if (hw->mac.type == e1000_82580) {
3081                 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3082                 E1000_WRITE_REG(hw, E1000_DMACR, 0);
3083                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
3084                     reg & ~E1000_PCIEMISC_LX_DECISION);
3085         }
3086
3087 reset_out:
3088         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3089         e1000_get_phy_info(hw);
3090         e1000_check_for_link(hw);
3091         return;
3092 }
3093
3094 /*********************************************************************
3095  *
3096  *  Setup networking device structure and register an interface.
3097  *
3098  **********************************************************************/
3099 static int
3100 igb_setup_interface(device_t dev, struct adapter *adapter)
3101 {
3102         struct ifnet   *ifp;
3103
3104         INIT_DEBUGOUT("igb_setup_interface: begin");
3105
3106         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3107         if (ifp == NULL) {
3108                 device_printf(dev, "can not allocate ifnet structure\n");
3109                 return (-1);
3110         }
3111         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3112         ifp->if_init =  igb_init;
3113         ifp->if_softc = adapter;
3114         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3115         ifp->if_ioctl = igb_ioctl;
3116 #ifndef IGB_LEGACY_TX
3117         ifp->if_transmit = igb_mq_start;
3118         ifp->if_qflush = igb_qflush;
3119 #else
3120         ifp->if_start = igb_start;
3121         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3122         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3123         IFQ_SET_READY(&ifp->if_snd);
3124 #endif
3125
3126         ether_ifattach(ifp, adapter->hw.mac.addr);
3127
3128         ifp->if_capabilities = ifp->if_capenable = 0;
3129
3130         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3131         ifp->if_capabilities |= IFCAP_TSO4;
3132         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3133         ifp->if_capenable = ifp->if_capabilities;
3134
3135         /* Don't enable LRO by default */
3136         ifp->if_capabilities |= IFCAP_LRO;
3137
3138 #ifdef DEVICE_POLLING
3139         ifp->if_capabilities |= IFCAP_POLLING;
3140 #endif
3141
3142         /*
3143          * Tell the upper layer(s) we
3144          * support full VLAN capability.
3145          */
3146         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3147         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3148                              |  IFCAP_VLAN_HWTSO
3149                              |  IFCAP_VLAN_MTU;
3150         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3151                           |  IFCAP_VLAN_HWTSO
3152                           |  IFCAP_VLAN_MTU;
3153
3154         /*
3155         ** Don't turn this on by default, if vlans are
3156         ** created on another pseudo device (eg. lagg)
3157         ** then vlan events are not passed thru, breaking
3158         ** operation, but with HW FILTER off it works. If
3159         ** using vlans directly on the igb driver you can
3160         ** enable this and get full hardware tag filtering.
3161         */
3162         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3163
3164         /*
3165          * Specify the media types supported by this adapter and register
3166          * callbacks to update media and link information
3167          */
3168         ifmedia_init(&adapter->media, IFM_IMASK,
3169             igb_media_change, igb_media_status);
3170         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3171             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3172                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
3173                             0, NULL);
3174                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3175         } else {
3176                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3177                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3178                             0, NULL);
3179                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3180                             0, NULL);
3181                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3182                             0, NULL);
3183                 if (adapter->hw.phy.type != e1000_phy_ife) {
3184                         ifmedia_add(&adapter->media,
3185                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3186                         ifmedia_add(&adapter->media,
3187                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3188                 }
3189         }
3190         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3191         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3192         return (0);
3193 }
3194
3195
3196 /*
3197  * Manage DMA'able memory.
3198  */
3199 static void
3200 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3201 {
3202         if (error)
3203                 return;
3204         *(bus_addr_t *) arg = segs[0].ds_addr;
3205 }
3206
3207 static int
3208 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3209         struct igb_dma_alloc *dma, int mapflags)
3210 {
3211         int error;
3212
3213         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3214                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
3215                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3216                                 BUS_SPACE_MAXADDR,      /* highaddr */
3217                                 NULL, NULL,             /* filter, filterarg */
3218                                 size,                   /* maxsize */
3219                                 1,                      /* nsegments */
3220                                 size,                   /* maxsegsize */
3221                                 0,                      /* flags */
3222                                 NULL,                   /* lockfunc */
3223                                 NULL,                   /* lockarg */
3224                                 &dma->dma_tag);
3225         if (error) {
3226                 device_printf(adapter->dev,
3227                     "%s: bus_dma_tag_create failed: %d\n",
3228                     __func__, error);
3229                 goto fail_0;
3230         }
3231
3232         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3233             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3234         if (error) {
3235                 device_printf(adapter->dev,
3236                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3237                     __func__, (uintmax_t)size, error);
3238                 goto fail_2;
3239         }
3240
3241         dma->dma_paddr = 0;
3242         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3243             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3244         if (error || dma->dma_paddr == 0) {
3245                 device_printf(adapter->dev,
3246                     "%s: bus_dmamap_load failed: %d\n",
3247                     __func__, error);
3248                 goto fail_3;
3249         }
3250
3251         return (0);
3252
3253 fail_3:
3254         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3255 fail_2:
3256         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3257         bus_dma_tag_destroy(dma->dma_tag);
3258 fail_0:
3259         dma->dma_map = NULL;
3260         dma->dma_tag = NULL;
3261
3262         return (error);
3263 }
3264
3265 static void
3266 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3267 {
3268         if (dma->dma_tag == NULL)
3269                 return;
3270         if (dma->dma_map != NULL) {
3271                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3272                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3273                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3274                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3275                 dma->dma_map = NULL;
3276         }
3277         bus_dma_tag_destroy(dma->dma_tag);
3278         dma->dma_tag = NULL;
3279 }
3280
3281
3282 /*********************************************************************
3283  *
3284  *  Allocate memory for the transmit and receive rings, and then
3285  *  the descriptors associated with each, called only once at attach.
3286  *
3287  **********************************************************************/
3288 static int
3289 igb_allocate_queues(struct adapter *adapter)
3290 {
3291         device_t dev = adapter->dev;
3292         struct igb_queue        *que = NULL;
3293         struct tx_ring          *txr = NULL;
3294         struct rx_ring          *rxr = NULL;
3295         int rsize, tsize, error = E1000_SUCCESS;
3296         int txconf = 0, rxconf = 0;
3297
3298         /* First allocate the top level queue structs */
3299         if (!(adapter->queues =
3300             (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3301             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3302                 device_printf(dev, "Unable to allocate queue memory\n");
3303                 error = ENOMEM;
3304                 goto fail;
3305         }
3306
3307         /* Next allocate the TX ring struct memory */
3308         if (!(adapter->tx_rings =
3309             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3310             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3311                 device_printf(dev, "Unable to allocate TX ring memory\n");
3312                 error = ENOMEM;
3313                 goto tx_fail;
3314         }
3315
3316         /* Now allocate the RX */
3317         if (!(adapter->rx_rings =
3318             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3319             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3320                 device_printf(dev, "Unable to allocate RX ring memory\n");
3321                 error = ENOMEM;
3322                 goto rx_fail;
3323         }
3324
3325         tsize = roundup2(adapter->num_tx_desc *
3326             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3327         /*
3328          * Now set up the TX queues, txconf is needed to handle the
3329          * possibility that things fail midcourse and we need to
3330          * undo memory gracefully
3331          */ 
3332         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3333                 /* Set up some basics */
3334                 txr = &adapter->tx_rings[i];
3335                 txr->adapter = adapter;
3336                 txr->me = i;
3337
3338                 /* Initialize the TX lock */
3339                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3340                     device_get_nameunit(dev), txr->me);
3341                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3342
3343                 if (igb_dma_malloc(adapter, tsize,
3344                         &txr->txdma, BUS_DMA_NOWAIT)) {
3345                         device_printf(dev,
3346                             "Unable to allocate TX Descriptor memory\n");
3347                         error = ENOMEM;
3348                         goto err_tx_desc;
3349                 }
3350                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3351                 bzero((void *)txr->tx_base, tsize);
3352
3353                 /* Now allocate transmit buffers for the ring */
3354                 if (igb_allocate_transmit_buffers(txr)) {
3355                         device_printf(dev,
3356                             "Critical Failure setting up transmit buffers\n");
3357                         error = ENOMEM;
3358                         goto err_tx_desc;
3359                 }
3360 #ifndef IGB_LEGACY_TX
3361                 /* Allocate a buf ring */
3362                 txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3363                     M_WAITOK, &txr->tx_mtx);
3364 #endif
3365         }
3366
3367         /*
3368          * Next the RX queues...
3369          */ 
3370         rsize = roundup2(adapter->num_rx_desc *
3371             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3372         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3373                 rxr = &adapter->rx_rings[i];
3374                 rxr->adapter = adapter;
3375                 rxr->me = i;
3376
3377                 /* Initialize the RX lock */
3378                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3379                     device_get_nameunit(dev), txr->me);
3380                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3381
3382                 if (igb_dma_malloc(adapter, rsize,
3383                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3384                         device_printf(dev,
3385                             "Unable to allocate RxDescriptor memory\n");
3386                         error = ENOMEM;
3387                         goto err_rx_desc;
3388                 }
3389                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3390                 bzero((void *)rxr->rx_base, rsize);
3391
3392                 /* Allocate receive buffers for the ring*/
3393                 if (igb_allocate_receive_buffers(rxr)) {
3394                         device_printf(dev,
3395                             "Critical Failure setting up receive buffers\n");
3396                         error = ENOMEM;
3397                         goto err_rx_desc;
3398                 }
3399         }
3400
3401         /*
3402         ** Finally set up the queue holding structs
3403         */
3404         for (int i = 0; i < adapter->num_queues; i++) {
3405                 que = &adapter->queues[i];
3406                 que->adapter = adapter;
3407                 que->txr = &adapter->tx_rings[i];
3408                 que->rxr = &adapter->rx_rings[i];
3409         }
3410
3411         return (0);
3412
3413 err_rx_desc:
3414         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3415                 igb_dma_free(adapter, &rxr->rxdma);
3416 err_tx_desc:
3417         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3418                 igb_dma_free(adapter, &txr->txdma);
3419         free(adapter->rx_rings, M_DEVBUF);
3420 rx_fail:
3421 #ifndef IGB_LEGACY_TX
3422         buf_ring_free(txr->br, M_DEVBUF);
3423 #endif
3424         free(adapter->tx_rings, M_DEVBUF);
3425 tx_fail:
3426         free(adapter->queues, M_DEVBUF);
3427 fail:
3428         return (error);
3429 }
3430
3431 /*********************************************************************
3432  *
3433  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3434  *  the information needed to transmit a packet on the wire. This is
3435  *  called only once at attach, setup is done every reset.
3436  *
3437  **********************************************************************/
3438 static int
3439 igb_allocate_transmit_buffers(struct tx_ring *txr)
3440 {
3441         struct adapter *adapter = txr->adapter;
3442         device_t dev = adapter->dev;
3443         struct igb_tx_buffer *txbuf;
3444         int error, i;
3445
3446         /*
3447          * Setup DMA descriptor areas.
3448          */
3449         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3450                                1, 0,                    /* alignment, bounds */
3451                                BUS_SPACE_MAXADDR,       /* lowaddr */
3452                                BUS_SPACE_MAXADDR,       /* highaddr */
3453                                NULL, NULL,              /* filter, filterarg */
3454                                IGB_TSO_SIZE,            /* maxsize */
3455                                IGB_MAX_SCATTER,         /* nsegments */
3456                                PAGE_SIZE,               /* maxsegsize */
3457                                0,                       /* flags */
3458                                NULL,                    /* lockfunc */
3459                                NULL,                    /* lockfuncarg */
3460                                &txr->txtag))) {
3461                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3462                 goto fail;
3463         }
3464
3465         if (!(txr->tx_buffers =
3466             (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3467             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3468                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3469                 error = ENOMEM;
3470                 goto fail;
3471         }
3472
3473         /* Create the descriptor buffer dma maps */
3474         txbuf = txr->tx_buffers;
3475         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3476                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3477                 if (error != 0) {
3478                         device_printf(dev, "Unable to create TX DMA map\n");
3479                         goto fail;
3480                 }
3481         }
3482
3483         return 0;
3484 fail:
3485         /* We free all, it handles case where we are in the middle */
3486         igb_free_transmit_structures(adapter);
3487         return (error);
3488 }
3489
3490 /*********************************************************************
3491  *
3492  *  Initialize a transmit ring.
3493  *
3494  **********************************************************************/
3495 static void
3496 igb_setup_transmit_ring(struct tx_ring *txr)
3497 {
3498         struct adapter *adapter = txr->adapter;
3499         struct igb_tx_buffer *txbuf;
3500         int i;
3501 #ifdef DEV_NETMAP
3502         struct netmap_adapter *na = NA(adapter->ifp);
3503         struct netmap_slot *slot;
3504 #endif /* DEV_NETMAP */
3505
3506         /* Clear the old descriptor contents */
3507         IGB_TX_LOCK(txr);
3508 #ifdef DEV_NETMAP
3509         slot = netmap_reset(na, NR_TX, txr->me, 0);
3510 #endif /* DEV_NETMAP */
3511         bzero((void *)txr->tx_base,
3512               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3513         /* Reset indices */
3514         txr->next_avail_desc = 0;
3515         txr->next_to_clean = 0;
3516
3517         /* Free any existing tx buffers. */
3518         txbuf = txr->tx_buffers;
3519         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3520                 if (txbuf->m_head != NULL) {
3521                         bus_dmamap_sync(txr->txtag, txbuf->map,
3522                             BUS_DMASYNC_POSTWRITE);
3523                         bus_dmamap_unload(txr->txtag, txbuf->map);
3524                         m_freem(txbuf->m_head);
3525                         txbuf->m_head = NULL;
3526                 }
3527 #ifdef DEV_NETMAP
3528                 if (slot) {
3529                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3530                         /* no need to set the address */
3531                         netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3532                 }
3533 #endif /* DEV_NETMAP */
3534                 /* clear the watch index */
3535                 txbuf->next_eop = -1;
3536         }
3537
3538         /* Set number of descriptors available */
3539         txr->tx_avail = adapter->num_tx_desc;
3540
3541         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3542             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3543         IGB_TX_UNLOCK(txr);
3544 }
3545
3546 /*********************************************************************
3547  *
3548  *  Initialize all transmit rings.
3549  *
3550  **********************************************************************/
3551 static void
3552 igb_setup_transmit_structures(struct adapter *adapter)
3553 {
3554         struct tx_ring *txr = adapter->tx_rings;
3555
3556         for (int i = 0; i < adapter->num_queues; i++, txr++)
3557                 igb_setup_transmit_ring(txr);
3558
3559         return;
3560 }
3561
3562 /*********************************************************************
3563  *
3564  *  Enable transmit unit.
3565  *
3566  **********************************************************************/
3567 static void
3568 igb_initialize_transmit_units(struct adapter *adapter)
3569 {
3570         struct tx_ring  *txr = adapter->tx_rings;
3571         struct e1000_hw *hw = &adapter->hw;
3572         u32             tctl, txdctl;
3573
3574         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3575         tctl = txdctl = 0;
3576
3577         /* Setup the Tx Descriptor Rings */
3578         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3579                 u64 bus_addr = txr->txdma.dma_paddr;
3580
3581                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3582                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3583                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3584                     (uint32_t)(bus_addr >> 32));
3585                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3586                     (uint32_t)bus_addr);
3587
3588                 /* Setup the HW Tx Head and Tail descriptor pointers */
3589                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3590                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3591
3592                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3593                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3594                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3595
3596                 txr->queue_status = IGB_QUEUE_IDLE;
3597
3598                 txdctl |= IGB_TX_PTHRESH;
3599                 txdctl |= IGB_TX_HTHRESH << 8;
3600                 txdctl |= IGB_TX_WTHRESH << 16;
3601                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3602                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3603         }
3604
3605         if (adapter->vf_ifp)
3606                 return;
3607
3608         e1000_config_collision_dist(hw);
3609
3610         /* Program the Transmit Control Register */
3611         tctl = E1000_READ_REG(hw, E1000_TCTL);
3612         tctl &= ~E1000_TCTL_CT;
3613         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3614                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3615
3616         /* This write will effectively turn on the transmit unit. */
3617         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3618 }
3619
3620 /*********************************************************************
3621  *
3622  *  Free all transmit rings.
3623  *
3624  **********************************************************************/
3625 static void
3626 igb_free_transmit_structures(struct adapter *adapter)
3627 {
3628         struct tx_ring *txr = adapter->tx_rings;
3629
3630         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3631                 IGB_TX_LOCK(txr);
3632                 igb_free_transmit_buffers(txr);
3633                 igb_dma_free(adapter, &txr->txdma);
3634                 IGB_TX_UNLOCK(txr);
3635                 IGB_TX_LOCK_DESTROY(txr);
3636         }
3637         free(adapter->tx_rings, M_DEVBUF);
3638 }
3639
3640 /*********************************************************************
3641  *
3642  *  Free transmit ring related data structures.
3643  *
3644  **********************************************************************/
3645 static void
3646 igb_free_transmit_buffers(struct tx_ring *txr)
3647 {
3648         struct adapter *adapter = txr->adapter;
3649         struct igb_tx_buffer *tx_buffer;
3650         int             i;
3651
3652         INIT_DEBUGOUT("free_transmit_ring: begin");
3653
3654         if (txr->tx_buffers == NULL)
3655                 return;
3656
3657         tx_buffer = txr->tx_buffers;
3658         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3659                 if (tx_buffer->m_head != NULL) {
3660                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3661                             BUS_DMASYNC_POSTWRITE);
3662                         bus_dmamap_unload(txr->txtag,
3663                             tx_buffer->map);
3664                         m_freem(tx_buffer->m_head);
3665                         tx_buffer->m_head = NULL;
3666                         if (tx_buffer->map != NULL) {
3667                                 bus_dmamap_destroy(txr->txtag,
3668                                     tx_buffer->map);
3669                                 tx_buffer->map = NULL;
3670                         }
3671                 } else if (tx_buffer->map != NULL) {
3672                         bus_dmamap_unload(txr->txtag,
3673                             tx_buffer->map);
3674                         bus_dmamap_destroy(txr->txtag,
3675                             tx_buffer->map);
3676                         tx_buffer->map = NULL;
3677                 }
3678         }
3679 #ifndef IGB_LEGACY_TX
3680         if (txr->br != NULL)
3681                 buf_ring_free(txr->br, M_DEVBUF);
3682 #endif
3683         if (txr->tx_buffers != NULL) {
3684                 free(txr->tx_buffers, M_DEVBUF);
3685                 txr->tx_buffers = NULL;
3686         }
3687         if (txr->txtag != NULL) {
3688                 bus_dma_tag_destroy(txr->txtag);
3689                 txr->txtag = NULL;
3690         }
3691         return;
3692 }
3693
3694 /**********************************************************************
3695  *
3696  *  Setup work for hardware segmentation offload (TSO)
3697  *
3698  **********************************************************************/
3699 static bool
3700 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ehdrlen,
3701         struct ip *ip, struct tcphdr *th)
3702 {
3703         struct adapter *adapter = txr->adapter;
3704         struct e1000_adv_tx_context_desc *TXD;
3705         struct igb_tx_buffer        *tx_buffer;
3706         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3707         u32 mss_l4len_idx = 0;
3708         u16 vtag = 0;
3709         int ctxd, ip_hlen, tcp_hlen;
3710
3711         ctxd = txr->next_avail_desc;
3712         tx_buffer = &txr->tx_buffers[ctxd];
3713         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3714
3715         ip->ip_sum = 0;
3716         ip_hlen = ip->ip_hl << 2;
3717         tcp_hlen = th->th_off << 2;
3718
3719         /* VLAN MACLEN IPLEN */
3720         if (mp->m_flags & M_VLANTAG) {
3721                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3722                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3723         }
3724
3725         vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3726         vlan_macip_lens |= ip_hlen;
3727         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3728
3729         /* ADV DTYPE TUCMD */
3730         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3731         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3732         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3733         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3734
3735         /* MSS L4LEN IDX */
3736         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3737         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3738         /* 82575 needs the queue index added */
3739         if (adapter->hw.mac.type == e1000_82575)
3740                 mss_l4len_idx |= txr->me << 4;
3741         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3742
3743         TXD->seqnum_seed = htole32(0);
3744         tx_buffer->m_head = NULL;
3745         tx_buffer->next_eop = -1;
3746
3747         if (++ctxd == adapter->num_tx_desc)
3748                 ctxd = 0;
3749
3750         txr->tx_avail--;
3751         txr->next_avail_desc = ctxd;
3752         return TRUE;
3753 }
3754
3755
3756 /*********************************************************************
3757  *
3758  *  Context Descriptor setup for VLAN or CSUM
3759  *
3760  **********************************************************************/
3761
3762 static bool
3763 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3764 {
3765         struct adapter *adapter = txr->adapter;
3766         struct e1000_adv_tx_context_desc *TXD;
3767         struct igb_tx_buffer        *tx_buffer;
3768         u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3769         struct ether_vlan_header *eh;
3770         struct ip *ip = NULL;
3771         struct ip6_hdr *ip6;
3772         int  ehdrlen, ctxd, ip_hlen = 0;
3773         u16     etype, vtag = 0;
3774         u8      ipproto = 0;
3775         bool    offload = TRUE;
3776
3777         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3778                 offload = FALSE;
3779
3780         vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3781         ctxd = txr->next_avail_desc;
3782         tx_buffer = &txr->tx_buffers[ctxd];
3783         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3784
3785         /*
3786         ** In advanced descriptors the vlan tag must 
3787         ** be placed into the context descriptor, thus
3788         ** we need to be here just for that setup.
3789         */
3790         if (mp->m_flags & M_VLANTAG) {
3791                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3792                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3793         } else if (offload == FALSE)
3794                 return FALSE;
3795
3796         /*
3797          * Determine where frame payload starts.
3798          * Jump over vlan headers if already present,
3799          * helpful for QinQ too.
3800          */
3801         eh = mtod(mp, struct ether_vlan_header *);
3802         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3803                 etype = ntohs(eh->evl_proto);
3804                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3805         } else {
3806                 etype = ntohs(eh->evl_encap_proto);
3807                 ehdrlen = ETHER_HDR_LEN;
3808         }
3809
3810         /* Set the ether header length */
3811         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3812
3813         switch (etype) {
3814                 case ETHERTYPE_IP:
3815                         ip = (struct ip *)(mp->m_data + ehdrlen);
3816                         ip_hlen = ip->ip_hl << 2;
3817                         if (mp->m_len < ehdrlen + ip_hlen) {
3818                                 offload = FALSE;
3819                                 break;
3820                         }
3821                         ipproto = ip->ip_p;
3822                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3823                         break;
3824                 case ETHERTYPE_IPV6:
3825                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3826                         ip_hlen = sizeof(struct ip6_hdr);
3827                         ipproto = ip6->ip6_nxt;
3828                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3829                         break;
3830                 default:
3831                         offload = FALSE;
3832                         break;
3833         }
3834
3835         vlan_macip_lens |= ip_hlen;
3836         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3837
3838         switch (ipproto) {
3839                 case IPPROTO_TCP:
3840                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3841                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3842                         break;
3843                 case IPPROTO_UDP:
3844                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3845                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3846                         break;
3847 #if __FreeBSD_version >= 800000
3848                 case IPPROTO_SCTP:
3849                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3850                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3851                         break;
3852 #endif
3853                 default:
3854                         offload = FALSE;
3855                         break;
3856         }
3857
3858         /* 82575 needs the queue index added */
3859         if (adapter->hw.mac.type == e1000_82575)
3860                 mss_l4len_idx = txr->me << 4;
3861
3862         /* Now copy bits into descriptor */
3863         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3864         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3865         TXD->seqnum_seed = htole32(0);
3866         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3867
3868         tx_buffer->m_head = NULL;
3869         tx_buffer->next_eop = -1;
3870
3871         /* We've consumed the first desc, adjust counters */
3872         if (++ctxd == adapter->num_tx_desc)
3873                 ctxd = 0;
3874         txr->next_avail_desc = ctxd;
3875         --txr->tx_avail;
3876
3877         return (offload);
3878 }
3879
3880
3881 /**********************************************************************
3882  *
3883  *  Examine each tx_buffer in the used queue. If the hardware is done
3884  *  processing the packet then free associated resources. The
3885  *  tx_buffer is put back on the free queue.
3886  *
3887  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3888  **********************************************************************/
3889 static bool
3890 igb_txeof(struct tx_ring *txr)
3891 {
3892         struct adapter  *adapter = txr->adapter;
3893         int first, last, done, processed;
3894         struct igb_tx_buffer *tx_buffer;
3895         struct e1000_tx_desc   *tx_desc, *eop_desc;
3896         struct ifnet   *ifp = adapter->ifp;
3897
3898         IGB_TX_LOCK_ASSERT(txr);
3899
3900 #ifdef DEV_NETMAP
3901         if (netmap_tx_irq(ifp, txr->me |
3902             (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT)))
3903                 return (FALSE);
3904 #endif /* DEV_NETMAP */
3905         if (txr->tx_avail == adapter->num_tx_desc) {
3906                 txr->queue_status = IGB_QUEUE_IDLE;
3907                 return FALSE;
3908         }
3909
3910         processed = 0;
3911         first = txr->next_to_clean;
3912         tx_desc = &txr->tx_base[first];
3913         tx_buffer = &txr->tx_buffers[first];
3914         last = tx_buffer->next_eop;
3915         eop_desc = &txr->tx_base[last];
3916
3917         /*
3918          * What this does is get the index of the
3919          * first descriptor AFTER the EOP of the 
3920          * first packet, that way we can do the
3921          * simple comparison on the inner while loop.
3922          */
3923         if (++last == adapter->num_tx_desc)
3924                 last = 0;
3925         done = last;
3926
3927         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3928             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3929
3930         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3931                 /* We clean the range of the packet */
3932                 while (first != done) {
3933                         tx_desc->upper.data = 0;
3934                         tx_desc->lower.data = 0;
3935                         tx_desc->buffer_addr = 0;
3936                         ++txr->tx_avail;
3937                         ++processed;
3938
3939                         if (tx_buffer->m_head) {
3940                                 txr->bytes +=
3941                                     tx_buffer->m_head->m_pkthdr.len;
3942                                 bus_dmamap_sync(txr->txtag,
3943                                     tx_buffer->map,
3944                                     BUS_DMASYNC_POSTWRITE);
3945                                 bus_dmamap_unload(txr->txtag,
3946                                     tx_buffer->map);
3947
3948                                 m_freem(tx_buffer->m_head);
3949                                 tx_buffer->m_head = NULL;
3950                         }
3951                         tx_buffer->next_eop = -1;
3952                         txr->watchdog_time = ticks;
3953
3954                         if (++first == adapter->num_tx_desc)
3955                                 first = 0;
3956
3957                         tx_buffer = &txr->tx_buffers[first];
3958                         tx_desc = &txr->tx_base[first];
3959                 }
3960                 ++txr->packets;
3961                 ++ifp->if_opackets;
3962                 /* See if we can continue to the next packet */
3963                 last = tx_buffer->next_eop;
3964                 if (last != -1) {
3965                         eop_desc = &txr->tx_base[last];
3966                         /* Get new done point */
3967                         if (++last == adapter->num_tx_desc) last = 0;
3968                         done = last;
3969                 } else
3970                         break;
3971         }
3972         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3973             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3974
3975         txr->next_to_clean = first;
3976
3977         /*
3978         ** Watchdog calculation, we know there's
3979         ** work outstanding or the first return
3980         ** would have been taken, so none processed
3981         ** for too long indicates a hang.
3982         */
3983         if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3984                 txr->queue_status |= IGB_QUEUE_HUNG;
3985         /*
3986          * If we have a minimum free,
3987          * clear depleted state bit
3988          */
3989         if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)          
3990                 txr->queue_status &= ~IGB_QUEUE_DEPLETED;
3991
3992         /* All clean, turn off the watchdog */
3993         if (txr->tx_avail == adapter->num_tx_desc) {
3994                 txr->queue_status = IGB_QUEUE_IDLE;
3995                 return (FALSE);
3996         }
3997
3998         return (TRUE);
3999 }
4000
4001 /*********************************************************************
4002  *
4003  *  Refresh mbuf buffers for RX descriptor rings
4004  *   - now keeps its own state so discards due to resource
4005  *     exhaustion are unnecessary, if an mbuf cannot be obtained
4006  *     it just returns, keeping its placeholder, thus it can simply
4007  *     be recalled to try again.
4008  *
4009  **********************************************************************/
4010 static void
4011 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4012 {
4013         struct adapter          *adapter = rxr->adapter;
4014         bus_dma_segment_t       hseg[1];
4015         bus_dma_segment_t       pseg[1];
4016         struct igb_rx_buf       *rxbuf;
4017         struct mbuf             *mh, *mp;
4018         int                     i, j, nsegs, error;
4019         bool                    refreshed = FALSE;
4020
4021         i = j = rxr->next_to_refresh;
4022         /*
4023         ** Get one descriptor beyond
4024         ** our work mark to control
4025         ** the loop.
4026         */
4027         if (++j == adapter->num_rx_desc)
4028                 j = 0;
4029
4030         while (j != limit) {
4031                 rxbuf = &rxr->rx_buffers[i];
4032                 /* No hdr mbuf used with header split off */
4033                 if (rxr->hdr_split == FALSE)
4034                         goto no_split;
4035                 if (rxbuf->m_head == NULL) {
4036                         mh = m_gethdr(M_NOWAIT, MT_DATA);
4037                         if (mh == NULL)
4038                                 goto update;
4039                 } else
4040                         mh = rxbuf->m_head;
4041
4042                 mh->m_pkthdr.len = mh->m_len = MHLEN;
4043                 mh->m_len = MHLEN;
4044                 mh->m_flags |= M_PKTHDR;
4045                 /* Get the memory mapping */
4046                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4047                     rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4048                 if (error != 0) {
4049                         printf("Refresh mbufs: hdr dmamap load"
4050                             " failure - %d\n", error);
4051                         m_free(mh);
4052                         rxbuf->m_head = NULL;
4053                         goto update;
4054                 }
4055                 rxbuf->m_head = mh;
4056                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4057                     BUS_DMASYNC_PREREAD);
4058                 rxr->rx_base[i].read.hdr_addr =
4059                     htole64(hseg[0].ds_addr);
4060 no_split:
4061                 if (rxbuf->m_pack == NULL) {
4062                         mp = m_getjcl(M_NOWAIT, MT_DATA,
4063                             M_PKTHDR, adapter->rx_mbuf_sz);
4064                         if (mp == NULL)
4065                                 goto update;
4066                 } else
4067                         mp = rxbuf->m_pack;
4068
4069                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4070                 /* Get the memory mapping */
4071                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4072                     rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4073                 if (error != 0) {
4074                         printf("Refresh mbufs: payload dmamap load"
4075                             " failure - %d\n", error);
4076                         m_free(mp);
4077                         rxbuf->m_pack = NULL;
4078                         goto update;
4079                 }
4080                 rxbuf->m_pack = mp;
4081                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4082                     BUS_DMASYNC_PREREAD);
4083                 rxr->rx_base[i].read.pkt_addr =
4084                     htole64(pseg[0].ds_addr);
4085                 refreshed = TRUE; /* I feel wefreshed :) */
4086
4087                 i = j; /* our next is precalculated */
4088                 rxr->next_to_refresh = i;
4089                 if (++j == adapter->num_rx_desc)
4090                         j = 0;
4091         }
4092 update:
4093         if (refreshed) /* update tail */
4094                 E1000_WRITE_REG(&adapter->hw,
4095                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4096         return;
4097 }
4098
4099
4100 /*********************************************************************
4101  *
4102  *  Allocate memory for rx_buffer structures. Since we use one
4103  *  rx_buffer per received packet, the maximum number of rx_buffer's
4104  *  that we'll need is equal to the number of receive descriptors
4105  *  that we've allocated.
4106  *
4107  **********************************************************************/
4108 static int
4109 igb_allocate_receive_buffers(struct rx_ring *rxr)
4110 {
4111         struct  adapter         *adapter = rxr->adapter;
4112         device_t                dev = adapter->dev;
4113         struct igb_rx_buf       *rxbuf;
4114         int                     i, bsize, error;
4115
4116         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4117         if (!(rxr->rx_buffers =
4118             (struct igb_rx_buf *) malloc(bsize,
4119             M_DEVBUF, M_NOWAIT | M_ZERO))) {
4120                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4121                 error = ENOMEM;
4122                 goto fail;
4123         }
4124
4125         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4126                                    1, 0,                /* alignment, bounds */
4127                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4128                                    BUS_SPACE_MAXADDR,   /* highaddr */
4129                                    NULL, NULL,          /* filter, filterarg */
4130                                    MSIZE,               /* maxsize */
4131                                    1,                   /* nsegments */
4132                                    MSIZE,               /* maxsegsize */
4133                                    0,                   /* flags */
4134                                    NULL,                /* lockfunc */
4135                                    NULL,                /* lockfuncarg */
4136                                    &rxr->htag))) {
4137                 device_printf(dev, "Unable to create RX DMA tag\n");
4138                 goto fail;
4139         }
4140
4141         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4142                                    1, 0,                /* alignment, bounds */
4143                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4144                                    BUS_SPACE_MAXADDR,   /* highaddr */
4145                                    NULL, NULL,          /* filter, filterarg */
4146                                    MJUM9BYTES,          /* maxsize */
4147                                    1,                   /* nsegments */
4148                                    MJUM9BYTES,          /* maxsegsize */
4149                                    0,                   /* flags */
4150                                    NULL,                /* lockfunc */
4151                                    NULL,                /* lockfuncarg */
4152                                    &rxr->ptag))) {
4153                 device_printf(dev, "Unable to create RX payload DMA tag\n");
4154                 goto fail;
4155         }
4156
4157         for (i = 0; i < adapter->num_rx_desc; i++) {
4158                 rxbuf = &rxr->rx_buffers[i];
4159                 error = bus_dmamap_create(rxr->htag,
4160                     BUS_DMA_NOWAIT, &rxbuf->hmap);
4161                 if (error) {
4162                         device_printf(dev,
4163                             "Unable to create RX head DMA maps\n");
4164                         goto fail;
4165                 }
4166                 error = bus_dmamap_create(rxr->ptag,
4167                     BUS_DMA_NOWAIT, &rxbuf->pmap);
4168                 if (error) {
4169                         device_printf(dev,
4170                             "Unable to create RX packet DMA maps\n");
4171                         goto fail;
4172                 }
4173         }
4174
4175         return (0);
4176
4177 fail:
4178         /* Frees all, but can handle partial completion */
4179         igb_free_receive_structures(adapter);
4180         return (error);
4181 }
4182
4183
4184 static void
4185 igb_free_receive_ring(struct rx_ring *rxr)
4186 {
4187         struct  adapter         *adapter = rxr->adapter;
4188         struct igb_rx_buf       *rxbuf;
4189
4190
4191         for (int i = 0; i < adapter->num_rx_desc; i++) {
4192                 rxbuf = &rxr->rx_buffers[i];
4193                 if (rxbuf->m_head != NULL) {
4194                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4195                             BUS_DMASYNC_POSTREAD);
4196                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4197                         rxbuf->m_head->m_flags |= M_PKTHDR;
4198                         m_freem(rxbuf->m_head);
4199                 }
4200                 if (rxbuf->m_pack != NULL) {
4201                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4202                             BUS_DMASYNC_POSTREAD);
4203                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4204                         rxbuf->m_pack->m_flags |= M_PKTHDR;
4205                         m_freem(rxbuf->m_pack);
4206                 }
4207                 rxbuf->m_head = NULL;
4208                 rxbuf->m_pack = NULL;
4209         }
4210 }
4211
4212
4213 /*********************************************************************
4214  *
4215  *  Initialize a receive ring and its buffers.
4216  *
4217  **********************************************************************/
4218 static int
4219 igb_setup_receive_ring(struct rx_ring *rxr)
4220 {
4221         struct  adapter         *adapter;
4222         struct  ifnet           *ifp;
4223         device_t                dev;
4224         struct igb_rx_buf       *rxbuf;
4225         bus_dma_segment_t       pseg[1], hseg[1];
4226         struct lro_ctrl         *lro = &rxr->lro;
4227         int                     rsize, nsegs, error = 0;
4228 #ifdef DEV_NETMAP
4229         struct netmap_adapter *na = NA(rxr->adapter->ifp);
4230         struct netmap_slot *slot;
4231 #endif /* DEV_NETMAP */
4232
4233         adapter = rxr->adapter;
4234         dev = adapter->dev;
4235         ifp = adapter->ifp;
4236
4237         /* Clear the ring contents */
4238         IGB_RX_LOCK(rxr);
4239 #ifdef DEV_NETMAP
4240         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4241 #endif /* DEV_NETMAP */
4242         rsize = roundup2(adapter->num_rx_desc *
4243             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4244         bzero((void *)rxr->rx_base, rsize);
4245
4246         /*
4247         ** Free current RX buffer structures and their mbufs
4248         */
4249         igb_free_receive_ring(rxr);
4250
4251         /* Configure for header split? */
4252         if (igb_header_split)
4253                 rxr->hdr_split = TRUE;
4254
4255         /* Now replenish the ring mbufs */
4256         for (int j = 0; j < adapter->num_rx_desc; ++j) {
4257                 struct mbuf     *mh, *mp;
4258
4259                 rxbuf = &rxr->rx_buffers[j];
4260 #ifdef DEV_NETMAP
4261                 if (slot) {
4262                         /* slot sj is mapped to the i-th NIC-ring entry */
4263                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4264                         uint64_t paddr;
4265                         void *addr;
4266
4267                         addr = PNMB(slot + sj, &paddr);
4268                         netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4269                         /* Update descriptor */
4270                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4271                         continue;
4272                 }
4273 #endif /* DEV_NETMAP */
4274                 if (rxr->hdr_split == FALSE)
4275                         goto skip_head;
4276
4277                 /* First the header */
4278                 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4279                 if (rxbuf->m_head == NULL) {
4280                         error = ENOBUFS;
4281                         goto fail;
4282                 }
4283                 m_adj(rxbuf->m_head, ETHER_ALIGN);
4284                 mh = rxbuf->m_head;
4285                 mh->m_len = mh->m_pkthdr.len = MHLEN;
4286                 mh->m_flags |= M_PKTHDR;
4287                 /* Get the memory mapping */
4288                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4289                     rxbuf->hmap, rxbuf->m_head, hseg,
4290                     &nsegs, BUS_DMA_NOWAIT);
4291                 if (error != 0) /* Nothing elegant to do here */
4292                         goto fail;
4293                 bus_dmamap_sync(rxr->htag,
4294                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
4295                 /* Update descriptor */
4296                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4297
4298 skip_head:
4299                 /* Now the payload cluster */
4300                 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4301                     M_PKTHDR, adapter->rx_mbuf_sz);
4302                 if (rxbuf->m_pack == NULL) {
4303                         error = ENOBUFS;
4304                         goto fail;
4305                 }
4306                 mp = rxbuf->m_pack;
4307                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4308                 /* Get the memory mapping */
4309                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4310                     rxbuf->pmap, mp, pseg,
4311                     &nsegs, BUS_DMA_NOWAIT);
4312                 if (error != 0)
4313                         goto fail;
4314                 bus_dmamap_sync(rxr->ptag,
4315                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4316                 /* Update descriptor */
4317                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4318         }
4319
4320         /* Setup our descriptor indices */
4321         rxr->next_to_check = 0;
4322         rxr->next_to_refresh = adapter->num_rx_desc - 1;
4323         rxr->lro_enabled = FALSE;
4324         rxr->rx_split_packets = 0;
4325         rxr->rx_bytes = 0;
4326
4327         rxr->fmp = NULL;
4328         rxr->lmp = NULL;
4329         rxr->discard = FALSE;
4330
4331         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4332             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4333
4334         /*
4335         ** Now set up the LRO interface, we
4336         ** also only do head split when LRO
4337         ** is enabled, since so often they
4338         ** are undesireable in similar setups.
4339         */
4340         if (ifp->if_capenable & IFCAP_LRO) {
4341                 error = tcp_lro_init(lro);
4342                 if (error) {
4343                         device_printf(dev, "LRO Initialization failed!\n");
4344                         goto fail;
4345                 }
4346                 INIT_DEBUGOUT("RX LRO Initialized\n");
4347                 rxr->lro_enabled = TRUE;
4348                 lro->ifp = adapter->ifp;
4349         }
4350
4351         IGB_RX_UNLOCK(rxr);
4352         return (0);
4353
4354 fail:
4355         igb_free_receive_ring(rxr);
4356         IGB_RX_UNLOCK(rxr);
4357         return (error);
4358 }
4359
4360
4361 /*********************************************************************
4362  *
4363  *  Initialize all receive rings.
4364  *
4365  **********************************************************************/
4366 static int
4367 igb_setup_receive_structures(struct adapter *adapter)
4368 {
4369         struct rx_ring *rxr = adapter->rx_rings;
4370         int i;
4371
4372         for (i = 0; i < adapter->num_queues; i++, rxr++)
4373                 if (igb_setup_receive_ring(rxr))
4374                         goto fail;
4375
4376         return (0);
4377 fail:
4378         /*
4379          * Free RX buffers allocated so far, we will only handle
4380          * the rings that completed, the failing case will have
4381          * cleaned up for itself. 'i' is the endpoint.
4382          */
4383         for (int j = 0; j < i; ++j) {
4384                 rxr = &adapter->rx_rings[j];
4385                 IGB_RX_LOCK(rxr);
4386                 igb_free_receive_ring(rxr);
4387                 IGB_RX_UNLOCK(rxr);
4388         }
4389
4390         return (ENOBUFS);
4391 }
4392
4393 /*********************************************************************
4394  *
4395  *  Enable receive unit.
4396  *
4397  **********************************************************************/
4398 static void
4399 igb_initialize_receive_units(struct adapter *adapter)
4400 {
4401         struct rx_ring  *rxr = adapter->rx_rings;
4402         struct ifnet    *ifp = adapter->ifp;
4403         struct e1000_hw *hw = &adapter->hw;
4404         u32             rctl, rxcsum, psize, srrctl = 0;
4405
4406         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4407
4408         /*
4409          * Make sure receives are disabled while setting
4410          * up the descriptor ring
4411          */
4412         rctl = E1000_READ_REG(hw, E1000_RCTL);
4413         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4414
4415         /*
4416         ** Set up for header split
4417         */
4418         if (igb_header_split) {
4419                 /* Use a standard mbuf for the header */
4420                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4421                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4422         } else
4423                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4424
4425         /*
4426         ** Set up for jumbo frames
4427         */
4428         if (ifp->if_mtu > ETHERMTU) {
4429                 rctl |= E1000_RCTL_LPE;
4430                 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4431                         srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4432                         rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4433                 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4434                         srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4435                         rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4436                 }
4437                 /* Set maximum packet len */
4438                 psize = adapter->max_frame_size;
4439                 /* are we on a vlan? */
4440                 if (adapter->ifp->if_vlantrunk != NULL)
4441                         psize += VLAN_TAG_SIZE;
4442                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4443         } else {
4444                 rctl &= ~E1000_RCTL_LPE;
4445                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4446                 rctl |= E1000_RCTL_SZ_2048;
4447         }
4448
4449         /* Setup the Base and Length of the Rx Descriptor Rings */
4450         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4451                 u64 bus_addr = rxr->rxdma.dma_paddr;
4452                 u32 rxdctl;
4453
4454                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4455                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4456                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4457                     (uint32_t)(bus_addr >> 32));
4458                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4459                     (uint32_t)bus_addr);
4460                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4461                 /* Enable this Queue */
4462                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4463                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4464                 rxdctl &= 0xFFF00000;
4465                 rxdctl |= IGB_RX_PTHRESH;
4466                 rxdctl |= IGB_RX_HTHRESH << 8;
4467                 rxdctl |= IGB_RX_WTHRESH << 16;
4468                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4469         }
4470
4471         /*
4472         ** Setup for RX MultiQueue
4473         */
4474         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4475         if (adapter->num_queues >1) {
4476                 u32 random[10], mrqc, shift = 0;
4477                 union igb_reta {
4478                         u32 dword;
4479                         u8  bytes[4];
4480                 } reta;
4481
4482                 arc4rand(&random, sizeof(random), 0);
4483                 if (adapter->hw.mac.type == e1000_82575)
4484                         shift = 6;
4485                 /* Warning FM follows */
4486                 for (int i = 0; i < 128; i++) {
4487                         reta.bytes[i & 3] =
4488                             (i % adapter->num_queues) << shift;
4489                         if ((i & 3) == 3)
4490                                 E1000_WRITE_REG(hw,
4491                                     E1000_RETA(i >> 2), reta.dword);
4492                 }
4493                 /* Now fill in hash table */
4494                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4495                 for (int i = 0; i < 10; i++)
4496                         E1000_WRITE_REG_ARRAY(hw,
4497                             E1000_RSSRK(0), i, random[i]);
4498
4499                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4500                     E1000_MRQC_RSS_FIELD_IPV4_TCP);
4501                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4502                     E1000_MRQC_RSS_FIELD_IPV6_TCP);
4503                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4504                     E1000_MRQC_RSS_FIELD_IPV6_UDP);
4505                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4506                     E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4507
4508                 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4509
4510                 /*
4511                 ** NOTE: Receive Full-Packet Checksum Offload 
4512                 ** is mutually exclusive with Multiqueue. However
4513                 ** this is not the same as TCP/IP checksums which
4514                 ** still work.
4515                 */
4516                 rxcsum |= E1000_RXCSUM_PCSD;
4517 #if __FreeBSD_version >= 800000
4518                 /* For SCTP Offload */
4519                 if ((hw->mac.type == e1000_82576)
4520                     && (ifp->if_capenable & IFCAP_RXCSUM))
4521                         rxcsum |= E1000_RXCSUM_CRCOFL;
4522 #endif
4523         } else {
4524                 /* Non RSS setup */
4525                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4526                         rxcsum |= E1000_RXCSUM_IPPCSE;
4527 #if __FreeBSD_version >= 800000
4528                         if (adapter->hw.mac.type == e1000_82576)
4529                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4530 #endif
4531                 } else
4532                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4533         }
4534         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4535
4536         /* Setup the Receive Control Register */
4537         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4538         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4539                    E1000_RCTL_RDMTS_HALF |
4540                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4541         /* Strip CRC bytes. */
4542         rctl |= E1000_RCTL_SECRC;
4543         /* Make sure VLAN Filters are off */
4544         rctl &= ~E1000_RCTL_VFE;
4545         /* Don't store bad packets */
4546         rctl &= ~E1000_RCTL_SBP;
4547
4548         /* Enable Receives */
4549         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4550
4551         /*
4552          * Setup the HW Rx Head and Tail Descriptor Pointers
4553          *   - needs to be after enable
4554          */
4555         for (int i = 0; i < adapter->num_queues; i++) {
4556                 rxr = &adapter->rx_rings[i];
4557                 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4558 #ifdef DEV_NETMAP
4559                 /*
4560                  * an init() while a netmap client is active must
4561                  * preserve the rx buffers passed to userspace.
4562                  * In this driver it means we adjust RDT to
4563                  * somthing different from next_to_refresh
4564                  * (which is not used in netmap mode).
4565                  */
4566                 if (ifp->if_capenable & IFCAP_NETMAP) {
4567                         struct netmap_adapter *na = NA(adapter->ifp);
4568                         struct netmap_kring *kring = &na->rx_rings[i];
4569                         int t = rxr->next_to_refresh - kring->nr_hwavail;
4570
4571                         if (t >= adapter->num_rx_desc)
4572                                 t -= adapter->num_rx_desc;
4573                         else if (t < 0)
4574                                 t += adapter->num_rx_desc;
4575                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4576                 } else
4577 #endif /* DEV_NETMAP */
4578                 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4579         }
4580         return;
4581 }
4582
4583 /*********************************************************************
4584  *
4585  *  Free receive rings.
4586  *
4587  **********************************************************************/
4588 static void
4589 igb_free_receive_structures(struct adapter *adapter)
4590 {
4591         struct rx_ring *rxr = adapter->rx_rings;
4592
4593         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4594                 struct lro_ctrl *lro = &rxr->lro;
4595                 igb_free_receive_buffers(rxr);
4596                 tcp_lro_free(lro);
4597                 igb_dma_free(adapter, &rxr->rxdma);
4598         }
4599
4600         free(adapter->rx_rings, M_DEVBUF);
4601 }
4602
4603 /*********************************************************************
4604  *
4605  *  Free receive ring data structures.
4606  *
4607  **********************************************************************/
4608 static void
4609 igb_free_receive_buffers(struct rx_ring *rxr)
4610 {
4611         struct adapter          *adapter = rxr->adapter;
4612         struct igb_rx_buf       *rxbuf;
4613         int i;
4614
4615         INIT_DEBUGOUT("free_receive_structures: begin");
4616
4617         /* Cleanup any existing buffers */
4618         if (rxr->rx_buffers != NULL) {
4619                 for (i = 0; i < adapter->num_rx_desc; i++) {
4620                         rxbuf = &rxr->rx_buffers[i];
4621                         if (rxbuf->m_head != NULL) {
4622                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4623                                     BUS_DMASYNC_POSTREAD);
4624                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4625                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4626                                 m_freem(rxbuf->m_head);
4627                         }
4628                         if (rxbuf->m_pack != NULL) {
4629                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4630                                     BUS_DMASYNC_POSTREAD);
4631                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4632                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4633                                 m_freem(rxbuf->m_pack);
4634                         }
4635                         rxbuf->m_head = NULL;
4636                         rxbuf->m_pack = NULL;
4637                         if (rxbuf->hmap != NULL) {
4638                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4639                                 rxbuf->hmap = NULL;
4640                         }
4641                         if (rxbuf->pmap != NULL) {
4642                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4643                                 rxbuf->pmap = NULL;
4644                         }
4645                 }
4646                 if (rxr->rx_buffers != NULL) {
4647                         free(rxr->rx_buffers, M_DEVBUF);
4648                         rxr->rx_buffers = NULL;
4649                 }
4650         }
4651
4652         if (rxr->htag != NULL) {
4653                 bus_dma_tag_destroy(rxr->htag);
4654                 rxr->htag = NULL;
4655         }
4656         if (rxr->ptag != NULL) {
4657                 bus_dma_tag_destroy(rxr->ptag);
4658                 rxr->ptag = NULL;
4659         }
4660 }
4661
4662 static __inline void
4663 igb_rx_discard(struct rx_ring *rxr, int i)
4664 {
4665         struct igb_rx_buf       *rbuf;
4666
4667         rbuf = &rxr->rx_buffers[i];
4668
4669         /* Partially received? Free the chain */
4670         if (rxr->fmp != NULL) {
4671                 rxr->fmp->m_flags |= M_PKTHDR;
4672                 m_freem(rxr->fmp);
4673                 rxr->fmp = NULL;
4674                 rxr->lmp = NULL;
4675         }
4676
4677         /*
4678         ** With advanced descriptors the writeback
4679         ** clobbers the buffer addrs, so its easier
4680         ** to just free the existing mbufs and take
4681         ** the normal refresh path to get new buffers
4682         ** and mapping.
4683         */
4684         if (rbuf->m_head) {
4685                 m_free(rbuf->m_head);
4686                 rbuf->m_head = NULL;
4687         }
4688
4689         if (rbuf->m_pack) {
4690                 m_free(rbuf->m_pack);
4691                 rbuf->m_pack = NULL;
4692         }
4693
4694         return;
4695 }
4696
4697 static __inline void
4698 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4699 {
4700
4701         /*
4702          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4703          * should be computed by hardware. Also it should not have VLAN tag in
4704          * ethernet header.
4705          */
4706         if (rxr->lro_enabled &&
4707             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4708             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4709             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4710             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4711             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4712             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4713                 /*
4714                  * Send to the stack if:
4715                  **  - LRO not enabled, or
4716                  **  - no LRO resources, or
4717                  **  - lro enqueue fails
4718                  */
4719                 if (rxr->lro.lro_cnt != 0)
4720                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4721                                 return;
4722         }
4723         IGB_RX_UNLOCK(rxr);
4724         (*ifp->if_input)(ifp, m);
4725         IGB_RX_LOCK(rxr);
4726 }
4727
4728 /*********************************************************************
4729  *
4730  *  This routine executes in interrupt context. It replenishes
4731  *  the mbufs in the descriptor and sends data which has been
4732  *  dma'ed into host memory to upper layer.
4733  *
4734  *  We loop at most count times if count is > 0, or until done if
4735  *  count < 0.
4736  *
4737  *  Return TRUE if more to clean, FALSE otherwise
4738  *********************************************************************/
4739 static bool
4740 igb_rxeof(struct igb_queue *que, int count, int *done)
4741 {
4742         struct adapter          *adapter = que->adapter;
4743         struct rx_ring          *rxr = que->rxr;
4744         struct ifnet            *ifp = adapter->ifp;
4745         struct lro_ctrl         *lro = &rxr->lro;
4746         struct lro_entry        *queued;
4747         int                     i, processed = 0, rxdone = 0;
4748         u32                     ptype, staterr = 0;
4749         union e1000_adv_rx_desc *cur;
4750
4751         IGB_RX_LOCK(rxr);
4752         /* Sync the ring. */
4753         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4754             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4755
4756 #ifdef DEV_NETMAP
4757         if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4758                 return (FALSE);
4759 #endif /* DEV_NETMAP */
4760
4761         /* Main clean loop */
4762         for (i = rxr->next_to_check; count != 0;) {
4763                 struct mbuf             *sendmp, *mh, *mp;
4764                 struct igb_rx_buf       *rxbuf;
4765                 u16                     hlen, plen, hdr, vtag;
4766                 bool                    eop = FALSE;
4767  
4768                 cur = &rxr->rx_base[i];
4769                 staterr = le32toh(cur->wb.upper.status_error);
4770                 if ((staterr & E1000_RXD_STAT_DD) == 0)
4771                         break;
4772                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4773                         break;
4774                 count--;
4775                 sendmp = mh = mp = NULL;
4776                 cur->wb.upper.status_error = 0;
4777                 rxbuf = &rxr->rx_buffers[i];
4778                 plen = le16toh(cur->wb.upper.length);
4779                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4780                 if ((adapter->hw.mac.type == e1000_i350) &&
4781                     (staterr & E1000_RXDEXT_STATERR_LB))
4782                         vtag = be16toh(cur->wb.upper.vlan);
4783                 else
4784                         vtag = le16toh(cur->wb.upper.vlan);
4785                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4786                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4787
4788                 /* Make sure all segments of a bad packet are discarded */
4789                 if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4790                     (rxr->discard)) {
4791                         adapter->dropped_pkts++;
4792                         ++rxr->rx_discarded;
4793                         if (!eop) /* Catch subsequent segs */
4794                                 rxr->discard = TRUE;
4795                         else
4796                                 rxr->discard = FALSE;
4797                         igb_rx_discard(rxr, i);
4798                         goto next_desc;
4799                 }
4800
4801                 /*
4802                 ** The way the hardware is configured to
4803                 ** split, it will ONLY use the header buffer
4804                 ** when header split is enabled, otherwise we
4805                 ** get normal behavior, ie, both header and
4806                 ** payload are DMA'd into the payload buffer.
4807                 **
4808                 ** The fmp test is to catch the case where a
4809                 ** packet spans multiple descriptors, in that
4810                 ** case only the first header is valid.
4811                 */
4812                 if (rxr->hdr_split && rxr->fmp == NULL) {
4813                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4814                             E1000_RXDADV_HDRBUFLEN_SHIFT;
4815                         if (hlen > IGB_HDR_BUF)
4816                                 hlen = IGB_HDR_BUF;
4817                         mh = rxr->rx_buffers[i].m_head;
4818                         mh->m_len = hlen;
4819                         /* clear buf pointer for refresh */
4820                         rxbuf->m_head = NULL;
4821                         /*
4822                         ** Get the payload length, this
4823                         ** could be zero if its a small
4824                         ** packet.
4825                         */
4826                         if (plen > 0) {
4827                                 mp = rxr->rx_buffers[i].m_pack;
4828                                 mp->m_len = plen;
4829                                 mh->m_next = mp;
4830                                 /* clear buf pointer */
4831                                 rxbuf->m_pack = NULL;
4832                                 rxr->rx_split_packets++;
4833                         }
4834                 } else {
4835                         /*
4836                         ** Either no header split, or a
4837                         ** secondary piece of a fragmented
4838                         ** split packet.
4839                         */
4840                         mh = rxr->rx_buffers[i].m_pack;
4841                         mh->m_len = plen;
4842                         /* clear buf info for refresh */
4843                         rxbuf->m_pack = NULL;
4844                 }
4845
4846                 ++processed; /* So we know when to refresh */
4847
4848                 /* Initial frame - setup */
4849                 if (rxr->fmp == NULL) {
4850                         mh->m_pkthdr.len = mh->m_len;
4851                         /* Save the head of the chain */
4852                         rxr->fmp = mh;
4853                         rxr->lmp = mh;
4854                         if (mp != NULL) {
4855                                 /* Add payload if split */
4856                                 mh->m_pkthdr.len += mp->m_len;
4857                                 rxr->lmp = mh->m_next;
4858                         }
4859                 } else {
4860                         /* Chain mbuf's together */
4861                         rxr->lmp->m_next = mh;
4862                         rxr->lmp = rxr->lmp->m_next;
4863                         rxr->fmp->m_pkthdr.len += mh->m_len;
4864                 }
4865
4866                 if (eop) {
4867                         rxr->fmp->m_pkthdr.rcvif = ifp;
4868                         ifp->if_ipackets++;
4869                         rxr->rx_packets++;
4870                         /* capture data for AIM */
4871                         rxr->packets++;
4872                         rxr->bytes += rxr->fmp->m_pkthdr.len;
4873                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4874
4875                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4876                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
4877
4878                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4879                             (staterr & E1000_RXD_STAT_VP) != 0) {
4880                                 rxr->fmp->m_pkthdr.ether_vtag = vtag;
4881                                 rxr->fmp->m_flags |= M_VLANTAG;
4882                         }
4883 #ifndef IGB_LEGACY_TX
4884                         rxr->fmp->m_pkthdr.flowid = que->msix;
4885                         rxr->fmp->m_flags |= M_FLOWID;
4886 #endif
4887                         sendmp = rxr->fmp;
4888                         /* Make sure to set M_PKTHDR. */
4889                         sendmp->m_flags |= M_PKTHDR;
4890                         rxr->fmp = NULL;
4891                         rxr->lmp = NULL;
4892                 }
4893
4894 next_desc:
4895                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4896                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4897
4898                 /* Advance our pointers to the next descriptor. */
4899                 if (++i == adapter->num_rx_desc)
4900                         i = 0;
4901                 /*
4902                 ** Send to the stack or LRO
4903                 */
4904                 if (sendmp != NULL) {
4905                         rxr->next_to_check = i;
4906                         igb_rx_input(rxr, ifp, sendmp, ptype);
4907                         i = rxr->next_to_check;
4908                         rxdone++;
4909                 }
4910
4911                 /* Every 8 descriptors we go to refresh mbufs */
4912                 if (processed == 8) {
4913                         igb_refresh_mbufs(rxr, i);
4914                         processed = 0;
4915                 }
4916         }
4917
4918         /* Catch any remainders */
4919         if (igb_rx_unrefreshed(rxr))
4920                 igb_refresh_mbufs(rxr, i);
4921
4922         rxr->next_to_check = i;
4923
4924         /*
4925          * Flush any outstanding LRO work
4926          */
4927         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4928                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4929                 tcp_lro_flush(lro, queued);
4930         }
4931
4932         if (done != NULL)
4933                 *done += rxdone;
4934
4935         IGB_RX_UNLOCK(rxr);
4936         return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4937 }
4938
4939 /*********************************************************************
4940  *
4941  *  Verify that the hardware indicated that the checksum is valid.
4942  *  Inform the stack about the status of checksum so that stack
4943  *  doesn't spend time verifying the checksum.
4944  *
4945  *********************************************************************/
4946 static void
4947 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4948 {
4949         u16 status = (u16)staterr;
4950         u8  errors = (u8) (staterr >> 24);
4951         int sctp;
4952
4953         /* Ignore Checksum bit is set */
4954         if (status & E1000_RXD_STAT_IXSM) {
4955                 mp->m_pkthdr.csum_flags = 0;
4956                 return;
4957         }
4958
4959         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4960             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4961                 sctp = 1;
4962         else
4963                 sctp = 0;
4964         if (status & E1000_RXD_STAT_IPCS) {
4965                 /* Did it pass? */
4966                 if (!(errors & E1000_RXD_ERR_IPE)) {
4967                         /* IP Checksum Good */
4968                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4969                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4970                 } else
4971                         mp->m_pkthdr.csum_flags = 0;
4972         }
4973
4974         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4975                 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4976 #if __FreeBSD_version >= 800000
4977                 if (sctp) /* reassign */
4978                         type = CSUM_SCTP_VALID;
4979 #endif
4980                 /* Did it pass? */
4981                 if (!(errors & E1000_RXD_ERR_TCPE)) {
4982                         mp->m_pkthdr.csum_flags |= type;
4983                         if (sctp == 0)
4984                                 mp->m_pkthdr.csum_data = htons(0xffff);
4985                 }
4986         }
4987         return;
4988 }
4989
4990 /*
4991  * This routine is run via an vlan
4992  * config EVENT
4993  */
4994 static void
4995 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4996 {
4997         struct adapter  *adapter = ifp->if_softc;
4998         u32             index, bit;
4999
5000         if (ifp->if_softc !=  arg)   /* Not our event */
5001                 return;
5002
5003         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5004                 return;
5005
5006         IGB_CORE_LOCK(adapter);
5007         index = (vtag >> 5) & 0x7F;
5008         bit = vtag & 0x1F;
5009         adapter->shadow_vfta[index] |= (1 << bit);
5010         ++adapter->num_vlans;
5011         /* Change hw filter setting */
5012         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5013                 igb_setup_vlan_hw_support(adapter);
5014         IGB_CORE_UNLOCK(adapter);
5015 }
5016
5017 /*
5018  * This routine is run via an vlan
5019  * unconfig EVENT
5020  */
5021 static void
5022 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5023 {
5024         struct adapter  *adapter = ifp->if_softc;
5025         u32             index, bit;
5026
5027         if (ifp->if_softc !=  arg)
5028                 return;
5029
5030         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5031                 return;
5032
5033         IGB_CORE_LOCK(adapter);
5034         index = (vtag >> 5) & 0x7F;
5035         bit = vtag & 0x1F;
5036         adapter->shadow_vfta[index] &= ~(1 << bit);
5037         --adapter->num_vlans;
5038         /* Change hw filter setting */
5039         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5040                 igb_setup_vlan_hw_support(adapter);
5041         IGB_CORE_UNLOCK(adapter);
5042 }
5043
5044 static void
5045 igb_setup_vlan_hw_support(struct adapter *adapter)
5046 {
5047         struct e1000_hw *hw = &adapter->hw;
5048         struct ifnet    *ifp = adapter->ifp;
5049         u32             reg;
5050
5051         if (adapter->vf_ifp) {
5052                 e1000_rlpml_set_vf(hw,
5053                     adapter->max_frame_size + VLAN_TAG_SIZE);
5054                 return;
5055         }
5056
5057         reg = E1000_READ_REG(hw, E1000_CTRL);
5058         reg |= E1000_CTRL_VME;
5059         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5060
5061         /* Enable the Filter Table */
5062         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5063                 reg = E1000_READ_REG(hw, E1000_RCTL);
5064                 reg &= ~E1000_RCTL_CFIEN;
5065                 reg |= E1000_RCTL_VFE;
5066                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5067         }
5068
5069         /* Update the frame size */
5070         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5071             adapter->max_frame_size + VLAN_TAG_SIZE);
5072
5073         /* Don't bother with table if no vlans */
5074         if ((adapter->num_vlans == 0) ||
5075             ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5076                 return;
5077         /*
5078         ** A soft reset zero's out the VFTA, so
5079         ** we need to repopulate it now.
5080         */
5081         for (int i = 0; i < IGB_VFTA_SIZE; i++)
5082                 if (adapter->shadow_vfta[i] != 0) {
5083                         if (adapter->vf_ifp)
5084                                 e1000_vfta_set_vf(hw,
5085                                     adapter->shadow_vfta[i], TRUE);
5086                         else
5087                                 e1000_write_vfta(hw,
5088                                     i, adapter->shadow_vfta[i]);
5089                 }
5090 }
5091
5092 static void
5093 igb_enable_intr(struct adapter *adapter)
5094 {
5095         /* With RSS set up what to auto clear */
5096         if (adapter->msix_mem) {
5097                 u32 mask = (adapter->que_mask | adapter->link_mask);
5098                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5099                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5100                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5101                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5102                     E1000_IMS_LSC);
5103         } else {
5104                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5105                     IMS_ENABLE_MASK);
5106         }
5107         E1000_WRITE_FLUSH(&adapter->hw);
5108
5109         return;
5110 }
5111
5112 static void
5113 igb_disable_intr(struct adapter *adapter)
5114 {
5115         if (adapter->msix_mem) {
5116                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5117                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5118         } 
5119         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5120         E1000_WRITE_FLUSH(&adapter->hw);
5121         return;
5122 }
5123
5124 /*
5125  * Bit of a misnomer, what this really means is
5126  * to enable OS management of the system... aka
5127  * to disable special hardware management features 
5128  */
5129 static void
5130 igb_init_manageability(struct adapter *adapter)
5131 {
5132         if (adapter->has_manage) {
5133                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5134                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5135
5136                 /* disable hardware interception of ARP */
5137                 manc &= ~(E1000_MANC_ARP_EN);
5138
5139                 /* enable receiving management packets to the host */
5140                 manc |= E1000_MANC_EN_MNG2HOST;
5141                 manc2h |= 1 << 5;  /* Mng Port 623 */
5142                 manc2h |= 1 << 6;  /* Mng Port 664 */
5143                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5144                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5145         }
5146 }
5147
5148 /*
5149  * Give control back to hardware management
5150  * controller if there is one.
5151  */
5152 static void
5153 igb_release_manageability(struct adapter *adapter)
5154 {
5155         if (adapter->has_manage) {
5156                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5157
5158                 /* re-enable hardware interception of ARP */
5159                 manc |= E1000_MANC_ARP_EN;
5160                 manc &= ~E1000_MANC_EN_MNG2HOST;
5161
5162                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5163         }
5164 }
5165
5166 /*
5167  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5168  * For ASF and Pass Through versions of f/w this means that
5169  * the driver is loaded. 
5170  *
5171  */
5172 static void
5173 igb_get_hw_control(struct adapter *adapter)
5174 {
5175         u32 ctrl_ext;
5176
5177         if (adapter->vf_ifp)
5178                 return;
5179
5180         /* Let firmware know the driver has taken over */
5181         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5182         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5183             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5184 }
5185
5186 /*
5187  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5188  * For ASF and Pass Through versions of f/w this means that the
5189  * driver is no longer loaded.
5190  *
5191  */
5192 static void
5193 igb_release_hw_control(struct adapter *adapter)
5194 {
5195         u32 ctrl_ext;
5196
5197         if (adapter->vf_ifp)
5198                 return;
5199
5200         /* Let firmware taken over control of h/w */
5201         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5202         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5203             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5204 }
5205
5206 static int
5207 igb_is_valid_ether_addr(uint8_t *addr)
5208 {
5209         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5210
5211         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5212                 return (FALSE);
5213         }
5214
5215         return (TRUE);
5216 }
5217
5218
5219 /*
5220  * Enable PCI Wake On Lan capability
5221  */
5222 static void
5223 igb_enable_wakeup(device_t dev)
5224 {
5225         u16     cap, status;
5226         u8      id;
5227
5228         /* First find the capabilities pointer*/
5229         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5230         /* Read the PM Capabilities */
5231         id = pci_read_config(dev, cap, 1);
5232         if (id != PCIY_PMG)     /* Something wrong */
5233                 return;
5234         /* OK, we have the power capabilities, so
5235            now get the status register */
5236         cap += PCIR_POWER_STATUS;
5237         status = pci_read_config(dev, cap, 2);
5238         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5239         pci_write_config(dev, cap, status, 2);
5240         return;
5241 }
5242
5243 static void
5244 igb_led_func(void *arg, int onoff)
5245 {
5246         struct adapter  *adapter = arg;
5247
5248         IGB_CORE_LOCK(adapter);
5249         if (onoff) {
5250                 e1000_setup_led(&adapter->hw);
5251                 e1000_led_on(&adapter->hw);
5252         } else {
5253                 e1000_led_off(&adapter->hw);
5254                 e1000_cleanup_led(&adapter->hw);
5255         }
5256         IGB_CORE_UNLOCK(adapter);
5257 }
5258
5259 /**********************************************************************
5260  *
5261  *  Update the board statistics counters.
5262  *
5263  **********************************************************************/
5264 static void
5265 igb_update_stats_counters(struct adapter *adapter)
5266 {
5267         struct ifnet            *ifp;
5268         struct e1000_hw         *hw = &adapter->hw;
5269         struct e1000_hw_stats   *stats;
5270
5271         /* 
5272         ** The virtual function adapter has only a
5273         ** small controlled set of stats, do only 
5274         ** those and return.
5275         */
5276         if (adapter->vf_ifp) {
5277                 igb_update_vf_stats_counters(adapter);
5278                 return;
5279         }
5280
5281         stats = (struct e1000_hw_stats  *)adapter->stats;
5282
5283         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5284            (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5285                 stats->symerrs +=
5286                     E1000_READ_REG(hw,E1000_SYMERRS);
5287                 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5288         }
5289
5290         stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5291         stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5292         stats->scc += E1000_READ_REG(hw, E1000_SCC);
5293         stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5294
5295         stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5296         stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5297         stats->colc += E1000_READ_REG(hw, E1000_COLC);
5298         stats->dc += E1000_READ_REG(hw, E1000_DC);
5299         stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5300         stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5301         stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5302         /*
5303         ** For watchdog management we need to know if we have been
5304         ** paused during the last interval, so capture that here.
5305         */ 
5306         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5307         stats->xoffrxc += adapter->pause_frames;
5308         stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5309         stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5310         stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5311         stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5312         stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5313         stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5314         stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5315         stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5316         stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5317         stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5318         stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5319         stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5320
5321         /* For the 64-bit byte counters the low dword must be read first. */
5322         /* Both registers clear on the read of the high dword */
5323
5324         stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5325             ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5326         stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5327             ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5328
5329         stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5330         stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5331         stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5332         stats->roc += E1000_READ_REG(hw, E1000_ROC);
5333         stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5334
5335         stats->tor += E1000_READ_REG(hw, E1000_TORH);
5336         stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5337
5338         stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5339         stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5340         stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5341         stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5342         stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5343         stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5344         stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5345         stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5346         stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5347         stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5348
5349         /* Interrupt Counts */
5350
5351         stats->iac += E1000_READ_REG(hw, E1000_IAC);
5352         stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5353         stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5354         stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5355         stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5356         stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5357         stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5358         stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5359         stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5360
5361         /* Host to Card Statistics */
5362
5363         stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5364         stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5365         stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5366         stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5367         stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5368         stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5369         stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5370         stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5371             ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5372         stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5373             ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5374         stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5375         stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5376         stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5377
5378         stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5379         stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5380         stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5381         stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5382         stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5383         stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5384
5385         ifp = adapter->ifp;
5386         ifp->if_collisions = stats->colc;
5387
5388         /* Rx Errors */
5389         ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5390             stats->crcerrs + stats->algnerrc +
5391             stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5392
5393         /* Tx Errors */
5394         ifp->if_oerrors = stats->ecol +
5395             stats->latecol + adapter->watchdog_events;
5396
5397         /* Driver specific counters */
5398         adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5399         adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5400         adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5401         adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5402         adapter->packet_buf_alloc_tx =
5403             ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5404         adapter->packet_buf_alloc_rx =
5405             (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5406 }
5407
5408
5409 /**********************************************************************
5410  *
5411  *  Initialize the VF board statistics counters.
5412  *
5413  **********************************************************************/
5414 static void
5415 igb_vf_init_stats(struct adapter *adapter)
5416 {
5417         struct e1000_hw *hw = &adapter->hw;
5418         struct e1000_vf_stats   *stats;
5419
5420         stats = (struct e1000_vf_stats  *)adapter->stats;
5421         if (stats == NULL)
5422                 return;
5423         stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5424         stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5425         stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5426         stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5427         stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5428 }
5429  
5430 /**********************************************************************
5431  *
5432  *  Update the VF board statistics counters.
5433  *
5434  **********************************************************************/
5435 static void
5436 igb_update_vf_stats_counters(struct adapter *adapter)
5437 {
5438         struct e1000_hw *hw = &adapter->hw;
5439         struct e1000_vf_stats   *stats;
5440
5441         if (adapter->link_speed == 0)
5442                 return;
5443
5444         stats = (struct e1000_vf_stats  *)adapter->stats;
5445
5446         UPDATE_VF_REG(E1000_VFGPRC,
5447             stats->last_gprc, stats->gprc);
5448         UPDATE_VF_REG(E1000_VFGORC,
5449             stats->last_gorc, stats->gorc);
5450         UPDATE_VF_REG(E1000_VFGPTC,
5451             stats->last_gptc, stats->gptc);
5452         UPDATE_VF_REG(E1000_VFGOTC,
5453             stats->last_gotc, stats->gotc);
5454         UPDATE_VF_REG(E1000_VFMPRC,
5455             stats->last_mprc, stats->mprc);
5456 }
5457
5458 /* Export a single 32-bit register via a read-only sysctl. */
5459 static int
5460 igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5461 {
5462         struct adapter *adapter;
5463         u_int val;
5464
5465         adapter = oidp->oid_arg1;
5466         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5467         return (sysctl_handle_int(oidp, &val, 0, req));
5468 }
5469
5470 /*
5471 **  Tuneable interrupt rate handler
5472 */
5473 static int
5474 igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5475 {
5476         struct igb_queue        *que = ((struct igb_queue *)oidp->oid_arg1);
5477         int                     error;
5478         u32                     reg, usec, rate;
5479                         
5480         reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5481         usec = ((reg & 0x7FFC) >> 2);
5482         if (usec > 0)
5483                 rate = 1000000 / usec;
5484         else
5485                 rate = 0;
5486         error = sysctl_handle_int(oidp, &rate, 0, req);
5487         if (error || !req->newptr)
5488                 return error;
5489         return 0;
5490 }
5491
5492 /*
5493  * Add sysctl variables, one per statistic, to the system.
5494  */
5495 static void
5496 igb_add_hw_stats(struct adapter *adapter)
5497 {
5498         device_t dev = adapter->dev;
5499
5500         struct tx_ring *txr = adapter->tx_rings;
5501         struct rx_ring *rxr = adapter->rx_rings;
5502
5503         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5504         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5505         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5506         struct e1000_hw_stats *stats = adapter->stats;
5507
5508         struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5509         struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5510
5511 #define QUEUE_NAME_LEN 32
5512         char namebuf[QUEUE_NAME_LEN];
5513
5514         /* Driver Statistics */
5515         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq", 
5516                         CTLFLAG_RD, &adapter->link_irq, 0,
5517                         "Link MSIX IRQ Handled");
5518         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5519                         CTLFLAG_RD, &adapter->dropped_pkts,
5520                         "Driver dropped packets");
5521         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5522                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5523                         "Driver tx dma failure in xmit");
5524         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5525                         CTLFLAG_RD, &adapter->rx_overruns,
5526                         "RX overruns");
5527         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5528                         CTLFLAG_RD, &adapter->watchdog_events,
5529                         "Watchdog timeouts");
5530
5531         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", 
5532                         CTLFLAG_RD, &adapter->device_control,
5533                         "Device Control Register");
5534         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", 
5535                         CTLFLAG_RD, &adapter->rx_control,
5536                         "Receiver Control Register");
5537         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", 
5538                         CTLFLAG_RD, &adapter->int_mask,
5539                         "Interrupt Mask");
5540         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", 
5541                         CTLFLAG_RD, &adapter->eint_mask,
5542                         "Extended Interrupt Mask");
5543         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", 
5544                         CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5545                         "Transmit Buffer Packet Allocation");
5546         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", 
5547                         CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5548                         "Receive Buffer Packet Allocation");
5549         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5550                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5551                         "Flow Control High Watermark");
5552         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5553                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5554                         "Flow Control Low Watermark");
5555
5556         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5557                 struct lro_ctrl *lro = &rxr->lro;
5558
5559                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5560                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5561                                             CTLFLAG_RD, NULL, "Queue Name");
5562                 queue_list = SYSCTL_CHILDREN(queue_node);
5563
5564                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", 
5565                                 CTLFLAG_RD, &adapter->queues[i],
5566                                 sizeof(&adapter->queues[i]),
5567                                 igb_sysctl_interrupt_rate_handler,
5568                                 "IU", "Interrupt Rate");
5569
5570                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5571                                 CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5572                                 igb_sysctl_reg_handler, "IU",
5573                                 "Transmit Descriptor Head");
5574                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5575                                 CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5576                                 igb_sysctl_reg_handler, "IU",
5577                                 "Transmit Descriptor Tail");
5578                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5579                                 CTLFLAG_RD, &txr->no_desc_avail,
5580                                 "Queue No Descriptor Available");
5581                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5582                                 CTLFLAG_RD, &txr->tx_packets,
5583                                 "Queue Packets Transmitted");
5584
5585                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5586                                 CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5587                                 igb_sysctl_reg_handler, "IU",
5588                                 "Receive Descriptor Head");
5589                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5590                                 CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5591                                 igb_sysctl_reg_handler, "IU",
5592                                 "Receive Descriptor Tail");
5593                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5594                                 CTLFLAG_RD, &rxr->rx_packets,
5595                                 "Queue Packets Received");
5596                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5597                                 CTLFLAG_RD, &rxr->rx_bytes,
5598                                 "Queue Bytes Received");
5599                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5600                                 CTLFLAG_RD, &lro->lro_queued, 0,
5601                                 "LRO Queued");
5602                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5603                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5604                                 "LRO Flushed");
5605         }
5606
5607         /* MAC stats get their own sub node */
5608
5609         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5610                                     CTLFLAG_RD, NULL, "MAC Statistics");
5611         stat_list = SYSCTL_CHILDREN(stat_node);
5612
5613         /*
5614         ** VF adapter has a very limited set of stats
5615         ** since its not managing the metal, so to speak.
5616         */
5617         if (adapter->vf_ifp) {
5618         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5619                         CTLFLAG_RD, &stats->gprc,
5620                         "Good Packets Received");
5621         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5622                         CTLFLAG_RD, &stats->gptc,
5623                         "Good Packets Transmitted");
5624         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5625                         CTLFLAG_RD, &stats->gorc, 
5626                         "Good Octets Received"); 
5627         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5628                         CTLFLAG_RD, &stats->gotc, 
5629                         "Good Octets Transmitted"); 
5630         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5631                         CTLFLAG_RD, &stats->mprc,
5632                         "Multicast Packets Received");
5633                 return;
5634         }
5635
5636         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
5637                         CTLFLAG_RD, &stats->ecol,
5638                         "Excessive collisions");
5639         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
5640                         CTLFLAG_RD, &stats->scc,
5641                         "Single collisions");
5642         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
5643                         CTLFLAG_RD, &stats->mcc,
5644                         "Multiple collisions");
5645         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
5646                         CTLFLAG_RD, &stats->latecol,
5647                         "Late collisions");
5648         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
5649                         CTLFLAG_RD, &stats->colc,
5650                         "Collision Count");
5651         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5652                         CTLFLAG_RD, &stats->symerrs,
5653                         "Symbol Errors");
5654         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5655                         CTLFLAG_RD, &stats->sec,
5656                         "Sequence Errors");
5657         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5658                         CTLFLAG_RD, &stats->dc,
5659                         "Defer Count");
5660         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5661                         CTLFLAG_RD, &stats->mpc,
5662                         "Missed Packets");
5663         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5664                         CTLFLAG_RD, &stats->rnbc,
5665                         "Receive No Buffers");
5666         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5667                         CTLFLAG_RD, &stats->ruc,
5668                         "Receive Undersize");
5669         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5670                         CTLFLAG_RD, &stats->rfc,
5671                         "Fragmented Packets Received ");
5672         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5673                         CTLFLAG_RD, &stats->roc,
5674                         "Oversized Packets Received");
5675         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5676                         CTLFLAG_RD, &stats->rjc,
5677                         "Recevied Jabber");
5678         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5679                         CTLFLAG_RD, &stats->rxerrc,
5680                         "Receive Errors");
5681         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5682                         CTLFLAG_RD, &stats->crcerrs,
5683                         "CRC errors");
5684         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5685                         CTLFLAG_RD, &stats->algnerrc,
5686                         "Alignment Errors");
5687         /* On 82575 these are collision counts */
5688         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5689                         CTLFLAG_RD, &stats->cexterr,
5690                         "Collision/Carrier extension errors");
5691         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5692                         CTLFLAG_RD, &stats->xonrxc,
5693                         "XON Received");
5694         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5695                         CTLFLAG_RD, &stats->xontxc,
5696                         "XON Transmitted");
5697         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5698                         CTLFLAG_RD, &stats->xoffrxc,
5699                         "XOFF Received");
5700         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5701                         CTLFLAG_RD, &stats->xofftxc,
5702                         "XOFF Transmitted");
5703         /* Packet Reception Stats */
5704         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5705                         CTLFLAG_RD, &stats->tpr,
5706                         "Total Packets Received ");
5707         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5708                         CTLFLAG_RD, &stats->gprc,
5709                         "Good Packets Received");
5710         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5711                         CTLFLAG_RD, &stats->bprc,
5712                         "Broadcast Packets Received");
5713         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5714                         CTLFLAG_RD, &stats->mprc,
5715                         "Multicast Packets Received");
5716         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5717                         CTLFLAG_RD, &stats->prc64,
5718                         "64 byte frames received ");
5719         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5720                         CTLFLAG_RD, &stats->prc127,
5721                         "65-127 byte frames received");
5722         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5723                         CTLFLAG_RD, &stats->prc255,
5724                         "128-255 byte frames received");
5725         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5726                         CTLFLAG_RD, &stats->prc511,
5727                         "256-511 byte frames received");
5728         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5729                         CTLFLAG_RD, &stats->prc1023,
5730                         "512-1023 byte frames received");
5731         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5732                         CTLFLAG_RD, &stats->prc1522,
5733                         "1023-1522 byte frames received");
5734         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5735                         CTLFLAG_RD, &stats->gorc, 
5736                         "Good Octets Received"); 
5737
5738         /* Packet Transmission Stats */
5739         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5740                         CTLFLAG_RD, &stats->gotc, 
5741                         "Good Octets Transmitted"); 
5742         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5743                         CTLFLAG_RD, &stats->tpt,
5744                         "Total Packets Transmitted");
5745         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5746                         CTLFLAG_RD, &stats->gptc,
5747                         "Good Packets Transmitted");
5748         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5749                         CTLFLAG_RD, &stats->bptc,
5750                         "Broadcast Packets Transmitted");
5751         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5752                         CTLFLAG_RD, &stats->mptc,
5753                         "Multicast Packets Transmitted");
5754         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5755                         CTLFLAG_RD, &stats->ptc64,
5756                         "64 byte frames transmitted ");
5757         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5758                         CTLFLAG_RD, &stats->ptc127,
5759                         "65-127 byte frames transmitted");
5760         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5761                         CTLFLAG_RD, &stats->ptc255,
5762                         "128-255 byte frames transmitted");
5763         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5764                         CTLFLAG_RD, &stats->ptc511,
5765                         "256-511 byte frames transmitted");
5766         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5767                         CTLFLAG_RD, &stats->ptc1023,
5768                         "512-1023 byte frames transmitted");
5769         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5770                         CTLFLAG_RD, &stats->ptc1522,
5771                         "1024-1522 byte frames transmitted");
5772         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5773                         CTLFLAG_RD, &stats->tsctc,
5774                         "TSO Contexts Transmitted");
5775         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5776                         CTLFLAG_RD, &stats->tsctfc,
5777                         "TSO Contexts Failed");
5778
5779
5780         /* Interrupt Stats */
5781
5782         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5783                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5784         int_list = SYSCTL_CHILDREN(int_node);
5785
5786         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5787                         CTLFLAG_RD, &stats->iac,
5788                         "Interrupt Assertion Count");
5789
5790         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5791                         CTLFLAG_RD, &stats->icrxptc,
5792                         "Interrupt Cause Rx Pkt Timer Expire Count");
5793
5794         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5795                         CTLFLAG_RD, &stats->icrxatc,
5796                         "Interrupt Cause Rx Abs Timer Expire Count");
5797
5798         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5799                         CTLFLAG_RD, &stats->ictxptc,
5800                         "Interrupt Cause Tx Pkt Timer Expire Count");
5801
5802         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5803                         CTLFLAG_RD, &stats->ictxatc,
5804                         "Interrupt Cause Tx Abs Timer Expire Count");
5805
5806         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5807                         CTLFLAG_RD, &stats->ictxqec,
5808                         "Interrupt Cause Tx Queue Empty Count");
5809
5810         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5811                         CTLFLAG_RD, &stats->ictxqmtc,
5812                         "Interrupt Cause Tx Queue Min Thresh Count");
5813
5814         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5815                         CTLFLAG_RD, &stats->icrxdmtc,
5816                         "Interrupt Cause Rx Desc Min Thresh Count");
5817
5818         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5819                         CTLFLAG_RD, &stats->icrxoc,
5820                         "Interrupt Cause Receiver Overrun Count");
5821
5822         /* Host to Card Stats */
5823
5824         host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", 
5825                                     CTLFLAG_RD, NULL, 
5826                                     "Host to Card Statistics");
5827
5828         host_list = SYSCTL_CHILDREN(host_node);
5829
5830         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5831                         CTLFLAG_RD, &stats->cbtmpc,
5832                         "Circuit Breaker Tx Packet Count");
5833
5834         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5835                         CTLFLAG_RD, &stats->htdpmc,
5836                         "Host Transmit Discarded Packets");
5837
5838         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5839                         CTLFLAG_RD, &stats->rpthc,
5840                         "Rx Packets To Host");
5841
5842         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5843                         CTLFLAG_RD, &stats->cbrmpc,
5844                         "Circuit Breaker Rx Packet Count");
5845
5846         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5847                         CTLFLAG_RD, &stats->cbrdpc,
5848                         "Circuit Breaker Rx Dropped Count");
5849
5850         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5851                         CTLFLAG_RD, &stats->hgptc,
5852                         "Host Good Packets Tx Count");
5853
5854         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5855                         CTLFLAG_RD, &stats->htcbdpc,
5856                         "Host Tx Circuit Breaker Dropped Count");
5857
5858         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5859                         CTLFLAG_RD, &stats->hgorc,
5860                         "Host Good Octets Received Count");
5861
5862         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5863                         CTLFLAG_RD, &stats->hgotc,
5864                         "Host Good Octets Transmit Count");
5865
5866         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5867                         CTLFLAG_RD, &stats->lenerrs,
5868                         "Length Errors");
5869
5870         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5871                         CTLFLAG_RD, &stats->scvpc,
5872                         "SerDes/SGMII Code Violation Pkt Count");
5873
5874         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5875                         CTLFLAG_RD, &stats->hrmpc,
5876                         "Header Redirection Missed Packet Count");
5877 }
5878
5879
5880 /**********************************************************************
5881  *
5882  *  This routine provides a way to dump out the adapter eeprom,
5883  *  often a useful debug/service tool. This only dumps the first
5884  *  32 words, stuff that matters is in that extent.
5885  *
5886  **********************************************************************/
5887 static int
5888 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5889 {
5890         struct adapter *adapter;
5891         int error;
5892         int result;
5893
5894         result = -1;
5895         error = sysctl_handle_int(oidp, &result, 0, req);
5896
5897         if (error || !req->newptr)
5898                 return (error);
5899
5900         /*
5901          * This value will cause a hex dump of the
5902          * first 32 16-bit words of the EEPROM to
5903          * the screen.
5904          */
5905         if (result == 1) {
5906                 adapter = (struct adapter *)arg1;
5907                 igb_print_nvm_info(adapter);
5908         }
5909
5910         return (error);
5911 }
5912
5913 static void
5914 igb_print_nvm_info(struct adapter *adapter)
5915 {
5916         u16     eeprom_data;
5917         int     i, j, row = 0;
5918
5919         /* Its a bit crude, but it gets the job done */
5920         printf("\nInterface EEPROM Dump:\n");
5921         printf("Offset\n0x0000  ");
5922         for (i = 0, j = 0; i < 32; i++, j++) {
5923                 if (j == 8) { /* Make the offset block */
5924                         j = 0; ++row;
5925                         printf("\n0x00%x0  ",row);
5926                 }
5927                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5928                 printf("%04x ", eeprom_data);
5929         }
5930         printf("\n");
5931 }
5932
5933 static void
5934 igb_set_sysctl_value(struct adapter *adapter, const char *name,
5935         const char *description, int *limit, int value)
5936 {
5937         *limit = value;
5938         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5939             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5940             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5941 }
5942
5943 /*
5944 ** Set flow control using sysctl:
5945 ** Flow control values:
5946 **      0 - off
5947 **      1 - rx pause
5948 **      2 - tx pause
5949 **      3 - full
5950 */
5951 static int
5952 igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5953 {
5954         int             error;
5955         static int      input = 3; /* default is full */
5956         struct adapter  *adapter = (struct adapter *) arg1;
5957
5958         error = sysctl_handle_int(oidp, &input, 0, req);
5959
5960         if ((error) || (req->newptr == NULL))
5961                 return (error);
5962
5963         switch (input) {
5964                 case e1000_fc_rx_pause:
5965                 case e1000_fc_tx_pause:
5966                 case e1000_fc_full:
5967                 case e1000_fc_none:
5968                         adapter->hw.fc.requested_mode = input;
5969                         adapter->fc = input;
5970                         break;
5971                 default:
5972                         /* Do nothing */
5973                         return (error);
5974         }
5975
5976         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5977         e1000_force_mac_fc(&adapter->hw);
5978         return (error);
5979 }
5980
5981 /*
5982 ** Manage DMA Coalesce:
5983 ** Control values:
5984 **      0/1 - off/on
5985 **      Legal timer values are:
5986 **      250,500,1000-10000 in thousands
5987 */
5988 static int
5989 igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
5990 {
5991         struct adapter *adapter = (struct adapter *) arg1;
5992         int             error;
5993
5994         error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
5995
5996         if ((error) || (req->newptr == NULL))
5997                 return (error);
5998
5999         switch (adapter->dmac) {
6000                 case 0:
6001                         /*Disabling */
6002                         break;
6003                 case 1: /* Just enable and use default */
6004                         adapter->dmac = 1000;
6005                         break;
6006                 case 250:
6007                 case 500:
6008                 case 1000:
6009                 case 2000:
6010                 case 3000:
6011                 case 4000:
6012                 case 5000:
6013                 case 6000:
6014                 case 7000:
6015                 case 8000:
6016                 case 9000:
6017                 case 10000:
6018                         /* Legal values - allow */
6019                         break;
6020                 default:
6021                         /* Do nothing, illegal value */
6022                         adapter->dmac = 0;
6023                         return (error);
6024         }
6025         /* Reinit the interface */
6026         igb_init(adapter);
6027         return (error);
6028 }
6029
6030 /*
6031 ** Manage Energy Efficient Ethernet:
6032 ** Control values:
6033 **     0/1 - enabled/disabled
6034 */
6035 static int
6036 igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6037 {
6038         struct adapter  *adapter = (struct adapter *) arg1;
6039         int             error, value;
6040
6041         value = adapter->hw.dev_spec._82575.eee_disable;
6042         error = sysctl_handle_int(oidp, &value, 0, req);
6043         if (error || req->newptr == NULL)
6044                 return (error);
6045         IGB_CORE_LOCK(adapter);
6046         adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6047         igb_init_locked(adapter);
6048         IGB_CORE_UNLOCK(adapter);
6049         return (0);
6050 }