]> CyberLeo.Net >> Repos - FreeBSD/stable/8.git/blob - sys/dev/e1000/if_igb.c
MFC of e1000 changes
[FreeBSD/stable/8.git] / sys / dev / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2010, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #ifdef HAVE_KERNEL_OPTION_HEADERS
37 #include "opt_device_polling.h"
38 #include "opt_inet.h"
39 #include "opt_altq.h"
40 #endif
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #if __FreeBSD_version >= 800000
45 #include <sys/buf_ring.h>
46 #endif
47 #include <sys/bus.h>
48 #include <sys/endian.h>
49 #include <sys/kernel.h>
50 #include <sys/kthread.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/rman.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/taskqueue.h>
59 #include <sys/eventhandler.h>
60 #include <sys/pcpu.h>
61 #include <sys/smp.h>
62 #include <machine/smp.h>
63 #include <machine/bus.h>
64 #include <machine/resource.h>
65
66 #include <net/bpf.h>
67 #include <net/ethernet.h>
68 #include <net/if.h>
69 #include <net/if_arp.h>
70 #include <net/if_dl.h>
71 #include <net/if_media.h>
72
73 #include <net/if_types.h>
74 #include <net/if_vlan_var.h>
75
76 #include <netinet/in_systm.h>
77 #include <netinet/in.h>
78 #include <netinet/if_ether.h>
79 #include <netinet/ip.h>
80 #include <netinet/ip6.h>
81 #include <netinet/tcp.h>
82 #include <netinet/tcp_lro.h>
83 #include <netinet/udp.h>
84
85 #include <machine/in_cksum.h>
86 #include <dev/led/led.h>
87 #include <dev/pci/pcivar.h>
88 #include <dev/pci/pcireg.h>
89
90 #include "e1000_api.h"
91 #include "e1000_82575.h"
92 #include "if_igb.h"
93
94 /*********************************************************************
95  *  Set this to one to display debug statistics
96  *********************************************************************/
97 int     igb_display_debug_stats = 0;
98
99 /*********************************************************************
100  *  Driver version:
101  *********************************************************************/
102 char igb_driver_version[] = "version - 2.0.1";
103
104
105 /*********************************************************************
106  *  PCI Device ID Table
107  *
108  *  Used by probe to select devices to load on
109  *  Last field stores an index into e1000_strings
110  *  Last entry must be all 0s
111  *
112  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
113  *********************************************************************/
114
115 static igb_vendor_info_t igb_vendor_info_array[] =
116 {
117         { 0x8086, E1000_DEV_ID_82575EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
119                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82576,           PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82576_NS,        PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82576_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82576_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
128                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
130                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82576_VF,        PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82580_COPPER,    PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82580_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82580_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82580_SGMII,     PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
137                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
138         /* required last entry */
139         { 0, 0, 0, 0, 0}
140 };
141
142 /*********************************************************************
143  *  Table of branding strings for all supported NICs.
144  *********************************************************************/
145
146 static char *igb_strings[] = {
147         "Intel(R) PRO/1000 Network Connection"
148 };
149
150 /*********************************************************************
151  *  Function prototypes
152  *********************************************************************/
153 static int      igb_probe(device_t);
154 static int      igb_attach(device_t);
155 static int      igb_detach(device_t);
156 static int      igb_shutdown(device_t);
157 static int      igb_suspend(device_t);
158 static int      igb_resume(device_t);
159 static void     igb_start(struct ifnet *);
160 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
161 #if __FreeBSD_version >= 800000
162 static int      igb_mq_start(struct ifnet *, struct mbuf *);
163 static int      igb_mq_start_locked(struct ifnet *,
164                     struct tx_ring *, struct mbuf *);
165 static void     igb_qflush(struct ifnet *);
166 #endif
167 static int      igb_ioctl(struct ifnet *, u_long, caddr_t);
168 static void     igb_init(void *);
169 static void     igb_init_locked(struct adapter *);
170 static void     igb_stop(void *);
171 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
172 static int      igb_media_change(struct ifnet *);
173 static void     igb_identify_hardware(struct adapter *);
174 static int      igb_allocate_pci_resources(struct adapter *);
175 static int      igb_allocate_msix(struct adapter *);
176 static int      igb_allocate_legacy(struct adapter *);
177 static int      igb_setup_msix(struct adapter *);
178 static void     igb_free_pci_resources(struct adapter *);
179 static void     igb_local_timer(void *);
180 static void     igb_reset(struct adapter *);
181 static void     igb_setup_interface(device_t, struct adapter *);
182 static int      igb_allocate_queues(struct adapter *);
183 static void     igb_configure_queues(struct adapter *);
184
185 static int      igb_allocate_transmit_buffers(struct tx_ring *);
186 static void     igb_setup_transmit_structures(struct adapter *);
187 static void     igb_setup_transmit_ring(struct tx_ring *);
188 static void     igb_initialize_transmit_units(struct adapter *);
189 static void     igb_free_transmit_structures(struct adapter *);
190 static void     igb_free_transmit_buffers(struct tx_ring *);
191
192 static int      igb_allocate_receive_buffers(struct rx_ring *);
193 static int      igb_setup_receive_structures(struct adapter *);
194 static int      igb_setup_receive_ring(struct rx_ring *);
195 static void     igb_initialize_receive_units(struct adapter *);
196 static void     igb_free_receive_structures(struct adapter *);
197 static void     igb_free_receive_buffers(struct rx_ring *);
198 static void     igb_free_receive_ring(struct rx_ring *);
199
200 static void     igb_enable_intr(struct adapter *);
201 static void     igb_disable_intr(struct adapter *);
202 static void     igb_update_stats_counters(struct adapter *);
203 static bool     igb_txeof(struct tx_ring *);
204
205 static __inline void igb_rx_discard(struct rx_ring *, int);
206 static __inline void igb_rx_input(struct rx_ring *,
207                     struct ifnet *, struct mbuf *, u32);
208
209 static bool     igb_rxeof(struct igb_queue *, int, int *);
210 static void     igb_rx_checksum(u32, struct mbuf *, u32);
211 static int      igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
212 static bool     igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
213 static void     igb_set_promisc(struct adapter *);
214 static void     igb_disable_promisc(struct adapter *);
215 static void     igb_set_multi(struct adapter *);
216 static void     igb_update_link_status(struct adapter *);
217 static void     igb_refresh_mbufs(struct rx_ring *, int);
218
219 static void     igb_register_vlan(void *, struct ifnet *, u16);
220 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
221 static void     igb_setup_vlan_hw_support(struct adapter *);
222
223 static int      igb_xmit(struct tx_ring *, struct mbuf **);
224 static int      igb_dma_malloc(struct adapter *, bus_size_t,
225                     struct igb_dma_alloc *, int);
226 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
227 static int      igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
228 static void     igb_print_nvm_info(struct adapter *);
229 static int      igb_is_valid_ether_addr(u8 *);
230 static void     igb_add_hw_stats(struct adapter *);
231
232 static void     igb_vf_init_stats(struct adapter *);
233 static void     igb_update_vf_stats_counters(struct adapter *);
234
235 /* Management and WOL Support */
236 static void     igb_init_manageability(struct adapter *);
237 static void     igb_release_manageability(struct adapter *);
238 static void     igb_get_hw_control(struct adapter *);
239 static void     igb_release_hw_control(struct adapter *);
240 static void     igb_enable_wakeup(device_t);
241 static void     igb_led_func(void *, int);
242
243 static int      igb_irq_fast(void *);
244 static void     igb_add_rx_process_limit(struct adapter *, const char *,
245                     const char *, int *, int);
246 static void     igb_handle_que(void *context, int pending);
247 static void     igb_handle_link(void *context, int pending);
248
249 /* These are MSIX only irq handlers */
250 static void     igb_msix_que(void *);
251 static void     igb_msix_link(void *);
252
253 #ifdef DEVICE_POLLING
254 static poll_handler_t igb_poll;
255 #endif /* POLLING */
256
257 /*********************************************************************
258  *  FreeBSD Device Interface Entry Points
259  *********************************************************************/
260
261 static device_method_t igb_methods[] = {
262         /* Device interface */
263         DEVMETHOD(device_probe, igb_probe),
264         DEVMETHOD(device_attach, igb_attach),
265         DEVMETHOD(device_detach, igb_detach),
266         DEVMETHOD(device_shutdown, igb_shutdown),
267         DEVMETHOD(device_suspend, igb_suspend),
268         DEVMETHOD(device_resume, igb_resume),
269         {0, 0}
270 };
271
272 static driver_t igb_driver = {
273         "igb", igb_methods, sizeof(struct adapter),
274 };
275
276 static devclass_t igb_devclass;
277 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
278 MODULE_DEPEND(igb, pci, 1, 1, 1);
279 MODULE_DEPEND(igb, ether, 1, 1, 1);
280
281 /*********************************************************************
282  *  Tunable default values.
283  *********************************************************************/
284
285 /* Descriptor defaults */
286 static int igb_rxd = IGB_DEFAULT_RXD;
287 static int igb_txd = IGB_DEFAULT_TXD;
288 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
289 TUNABLE_INT("hw.igb.txd", &igb_txd);
290
291 /*
292 ** AIM: Adaptive Interrupt Moderation
293 ** which means that the interrupt rate
294 ** is varied over time based on the
295 ** traffic for that interrupt vector
296 */
297 static int igb_enable_aim = TRUE;
298 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
299
300 /*
301  * MSIX should be the default for best performance,
302  * but this allows it to be forced off for testing.
303  */         
304 static int igb_enable_msix = 1;
305 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
306
307 /*
308  * Header split has seemed to be beneficial in
309  * many circumstances tested, however there have
310  * been some stability issues, so the default is
311  * off. 
312  */
313 static bool igb_header_split = FALSE;
314 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
315
316 /*
317 ** This will autoconfigure based on
318 ** the number of CPUs if left at 0.
319 */
320 static int igb_num_queues = 0;
321 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
322
323 /* How many packets rxeof tries to clean at a time */
324 static int igb_rx_process_limit = 100;
325 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
326
327 /* Flow control setting - default to FULL */
328 static int igb_fc_setting = e1000_fc_full;
329 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
330
331 /*
332 ** Shadow VFTA table, this is needed because
333 ** the real filter table gets cleared during
334 ** a soft reset and the driver needs to be able
335 ** to repopulate it.
336 */
337 static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
338
339
340 /*********************************************************************
341  *  Device identification routine
342  *
343  *  igb_probe determines if the driver should be loaded on
344  *  adapter based on PCI vendor/device id of the adapter.
345  *
346  *  return BUS_PROBE_DEFAULT on success, positive on failure
347  *********************************************************************/
348
349 static int
350 igb_probe(device_t dev)
351 {
352         char            adapter_name[60];
353         uint16_t        pci_vendor_id = 0;
354         uint16_t        pci_device_id = 0;
355         uint16_t        pci_subvendor_id = 0;
356         uint16_t        pci_subdevice_id = 0;
357         igb_vendor_info_t *ent;
358
359         INIT_DEBUGOUT("igb_probe: begin");
360
361         pci_vendor_id = pci_get_vendor(dev);
362         if (pci_vendor_id != IGB_VENDOR_ID)
363                 return (ENXIO);
364
365         pci_device_id = pci_get_device(dev);
366         pci_subvendor_id = pci_get_subvendor(dev);
367         pci_subdevice_id = pci_get_subdevice(dev);
368
369         ent = igb_vendor_info_array;
370         while (ent->vendor_id != 0) {
371                 if ((pci_vendor_id == ent->vendor_id) &&
372                     (pci_device_id == ent->device_id) &&
373
374                     ((pci_subvendor_id == ent->subvendor_id) ||
375                     (ent->subvendor_id == PCI_ANY_ID)) &&
376
377                     ((pci_subdevice_id == ent->subdevice_id) ||
378                     (ent->subdevice_id == PCI_ANY_ID))) {
379                         sprintf(adapter_name, "%s %s",
380                                 igb_strings[ent->index],
381                                 igb_driver_version);
382                         device_set_desc_copy(dev, adapter_name);
383                         return (BUS_PROBE_DEFAULT);
384                 }
385                 ent++;
386         }
387
388         return (ENXIO);
389 }
390
391 /*********************************************************************
392  *  Device initialization routine
393  *
394  *  The attach entry point is called when the driver is being loaded.
395  *  This routine identifies the type of hardware, allocates all resources
396  *  and initializes the hardware.
397  *
398  *  return 0 on success, positive on failure
399  *********************************************************************/
400
401 static int
402 igb_attach(device_t dev)
403 {
404         struct adapter  *adapter;
405         int             error = 0;
406         u16             eeprom_data;
407
408         INIT_DEBUGOUT("igb_attach: begin");
409
410         adapter = device_get_softc(dev);
411         adapter->dev = adapter->osdep.dev = dev;
412         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
413
414         /* SYSCTL stuff */
415         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
416             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
417             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
418             igb_sysctl_nvm_info, "I", "NVM Information");
419
420         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
421             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
422             OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
423             &igb_fc_setting, 0, "Flow Control");
424
425         SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
426             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
427             OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
428             &igb_enable_aim, 1, "Interrupt Moderation");
429
430         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
431
432         /* Determine hardware and mac info */
433         igb_identify_hardware(adapter);
434
435         /* Setup PCI resources */
436         if (igb_allocate_pci_resources(adapter)) {
437                 device_printf(dev, "Allocation of PCI resources failed\n");
438                 error = ENXIO;
439                 goto err_pci;
440         }
441
442         /* Do Shared Code initialization */
443         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
444                 device_printf(dev, "Setup of Shared code failed\n");
445                 error = ENXIO;
446                 goto err_pci;
447         }
448
449         e1000_get_bus_info(&adapter->hw);
450
451         /* Sysctls for limiting the amount of work done in the taskqueue */
452         igb_add_rx_process_limit(adapter, "rx_processing_limit",
453             "max number of rx packets to process", &adapter->rx_process_limit,
454             igb_rx_process_limit);
455
456         /*
457          * Validate number of transmit and receive descriptors. It
458          * must not exceed hardware maximum, and must be multiple
459          * of E1000_DBA_ALIGN.
460          */
461         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
462             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
463                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
464                     IGB_DEFAULT_TXD, igb_txd);
465                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
466         } else
467                 adapter->num_tx_desc = igb_txd;
468         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
469             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
470                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
471                     IGB_DEFAULT_RXD, igb_rxd);
472                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
473         } else
474                 adapter->num_rx_desc = igb_rxd;
475
476         adapter->hw.mac.autoneg = DO_AUTO_NEG;
477         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
478         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
479
480         /* Copper options */
481         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
482                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
483                 adapter->hw.phy.disable_polarity_correction = FALSE;
484                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
485         }
486
487         /*
488          * Set the frame limits assuming
489          * standard ethernet sized frames.
490          */
491         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
492         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
493
494         /*
495         ** Allocate and Setup Queues
496         */
497         if (igb_allocate_queues(adapter)) {
498                 error = ENOMEM;
499                 goto err_pci;
500         }
501
502         /* Allocate the appropriate stats memory */
503         if (adapter->hw.mac.type == e1000_vfadapt) {
504                 adapter->stats =
505                     (struct e1000_vf_stats *)malloc(sizeof \
506                     (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
507                 igb_vf_init_stats(adapter);
508         } else
509                 adapter->stats =
510                     (struct e1000_hw_stats *)malloc(sizeof \
511                     (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
512
513         /*
514         ** Start from a known state, this is
515         ** important in reading the nvm and
516         ** mac from that.
517         */
518         e1000_reset_hw(&adapter->hw);
519
520         /* Make sure we have a good EEPROM before we read from it */
521         if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
522                 /*
523                 ** Some PCI-E parts fail the first check due to
524                 ** the link being in sleep state, call it again,
525                 ** if it fails a second time its a real issue.
526                 */
527                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
528                         device_printf(dev,
529                             "The EEPROM Checksum Is Not Valid\n");
530                         error = EIO;
531                         goto err_late;
532                 }
533         }
534
535         /*
536         ** Copy the permanent MAC address out of the EEPROM
537         */
538         if (e1000_read_mac_addr(&adapter->hw) < 0) {
539                 device_printf(dev, "EEPROM read error while reading MAC"
540                     " address\n");
541                 error = EIO;
542                 goto err_late;
543         }
544         /* Check its sanity */
545         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
546                 device_printf(dev, "Invalid MAC address\n");
547                 error = EIO;
548                 goto err_late;
549         }
550
551         /* 
552         ** Configure Interrupts
553         */
554         if ((adapter->msix > 1) && (igb_enable_msix))
555                 error = igb_allocate_msix(adapter);
556         else /* MSI or Legacy */
557                 error = igb_allocate_legacy(adapter);
558         if (error)
559                 goto err_late;
560
561         /* Setup OS specific network interface */
562         igb_setup_interface(dev, adapter);
563
564         /* Now get a good starting state */
565         igb_reset(adapter);
566
567         /* Initialize statistics */
568         igb_update_stats_counters(adapter);
569
570         adapter->hw.mac.get_link_status = 1;
571         igb_update_link_status(adapter);
572
573         /* Indicate SOL/IDER usage */
574         if (e1000_check_reset_block(&adapter->hw))
575                 device_printf(dev,
576                     "PHY reset is blocked due to SOL/IDER session.\n");
577
578         /* Determine if we have to control management hardware */
579         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
580
581         /*
582          * Setup Wake-on-Lan
583          */
584         /* APME bit in EEPROM is mapped to WUC.APME */
585         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
586         if (eeprom_data)
587                 adapter->wol = E1000_WUFC_MAG;
588
589         /* Register for VLAN events */
590         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
591              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
592         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
593              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
594
595         igb_add_hw_stats(adapter);
596
597         /* Tell the stack that the interface is not active */
598         adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
599
600         adapter->led_dev = led_create(igb_led_func, adapter,
601             device_get_nameunit(dev));
602
603         INIT_DEBUGOUT("igb_attach: end");
604
605         return (0);
606
607 err_late:
608         igb_free_transmit_structures(adapter);
609         igb_free_receive_structures(adapter);
610         igb_release_hw_control(adapter);
611 err_pci:
612         igb_free_pci_resources(adapter);
613         IGB_CORE_LOCK_DESTROY(adapter);
614
615         return (error);
616 }
617
618 /*********************************************************************
619  *  Device removal routine
620  *
621  *  The detach entry point is called when the driver is being removed.
622  *  This routine stops the adapter and deallocates all the resources
623  *  that were allocated for driver operation.
624  *
625  *  return 0 on success, positive on failure
626  *********************************************************************/
627
628 static int
629 igb_detach(device_t dev)
630 {
631         struct adapter  *adapter = device_get_softc(dev);
632         struct ifnet    *ifp = adapter->ifp;
633
634         INIT_DEBUGOUT("igb_detach: begin");
635
636         /* Make sure VLANS are not using driver */
637         if (adapter->ifp->if_vlantrunk != NULL) {
638                 device_printf(dev,"Vlan in use, detach first\n");
639                 return (EBUSY);
640         }
641
642         if (adapter->led_dev != NULL)
643                 led_destroy(adapter->led_dev);
644
645 #ifdef DEVICE_POLLING
646         if (ifp->if_capenable & IFCAP_POLLING)
647                 ether_poll_deregister(ifp);
648 #endif
649
650         IGB_CORE_LOCK(adapter);
651         adapter->in_detach = 1;
652         igb_stop(adapter);
653         IGB_CORE_UNLOCK(adapter);
654
655         e1000_phy_hw_reset(&adapter->hw);
656
657         /* Give control back to firmware */
658         igb_release_manageability(adapter);
659         igb_release_hw_control(adapter);
660
661         if (adapter->wol) {
662                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
663                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
664                 igb_enable_wakeup(dev);
665         }
666
667         /* Unregister VLAN events */
668         if (adapter->vlan_attach != NULL)
669                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
670         if (adapter->vlan_detach != NULL)
671                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
672
673         ether_ifdetach(adapter->ifp);
674
675         callout_drain(&adapter->timer);
676
677         igb_free_pci_resources(adapter);
678         bus_generic_detach(dev);
679         if_free(ifp);
680
681         igb_free_transmit_structures(adapter);
682         igb_free_receive_structures(adapter);
683
684         IGB_CORE_LOCK_DESTROY(adapter);
685
686         return (0);
687 }
688
689 /*********************************************************************
690  *
691  *  Shutdown entry point
692  *
693  **********************************************************************/
694
695 static int
696 igb_shutdown(device_t dev)
697 {
698         return igb_suspend(dev);
699 }
700
701 /*
702  * Suspend/resume device methods.
703  */
704 static int
705 igb_suspend(device_t dev)
706 {
707         struct adapter *adapter = device_get_softc(dev);
708
709         IGB_CORE_LOCK(adapter);
710
711         igb_stop(adapter);
712
713         igb_release_manageability(adapter);
714         igb_release_hw_control(adapter);
715
716         if (adapter->wol) {
717                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
718                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
719                 igb_enable_wakeup(dev);
720         }
721
722         IGB_CORE_UNLOCK(adapter);
723
724         return bus_generic_suspend(dev);
725 }
726
727 static int
728 igb_resume(device_t dev)
729 {
730         struct adapter *adapter = device_get_softc(dev);
731         struct ifnet *ifp = adapter->ifp;
732
733         IGB_CORE_LOCK(adapter);
734         igb_init_locked(adapter);
735         igb_init_manageability(adapter);
736
737         if ((ifp->if_flags & IFF_UP) &&
738             (ifp->if_drv_flags & IFF_DRV_RUNNING))
739                 igb_start(ifp);
740
741         IGB_CORE_UNLOCK(adapter);
742
743         return bus_generic_resume(dev);
744 }
745
746
747 /*********************************************************************
748  *  Transmit entry point
749  *
750  *  igb_start is called by the stack to initiate a transmit.
751  *  The driver will remain in this routine as long as there are
752  *  packets to transmit and transmit resources are available.
753  *  In case resources are not available stack is notified and
754  *  the packet is requeued.
755  **********************************************************************/
756
757 static void
758 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
759 {
760         struct adapter  *adapter = ifp->if_softc;
761         struct mbuf     *m_head;
762
763         IGB_TX_LOCK_ASSERT(txr);
764
765         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
766             IFF_DRV_RUNNING)
767                 return;
768         if (!adapter->link_active)
769                 return;
770
771         /* Call cleanup if number of TX descriptors low */
772         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
773                 igb_txeof(txr);
774
775         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
776                 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
777                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
778                         break;
779                 }
780                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
781                 if (m_head == NULL)
782                         break;
783                 /*
784                  *  Encapsulation can modify our pointer, and or make it
785                  *  NULL on failure.  In that event, we can't requeue.
786                  */
787                 if (igb_xmit(txr, &m_head)) {
788                         if (m_head == NULL)
789                                 break;
790                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
791                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
792                         break;
793                 }
794
795                 /* Send a copy of the frame to the BPF listener */
796                 ETHER_BPF_MTAP(ifp, m_head);
797
798                 /* Set watchdog on */
799                 txr->watchdog_time = ticks;
800                 txr->watchdog_check = TRUE;
801         }
802 }
803  
804 /*
805  * Legacy TX driver routine, called from the
806  * stack, always uses tx[0], and spins for it.
807  * Should not be used with multiqueue tx
808  */
809 static void
810 igb_start(struct ifnet *ifp)
811 {
812         struct adapter  *adapter = ifp->if_softc;
813         struct tx_ring  *txr = adapter->tx_rings;
814
815         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
816                 IGB_TX_LOCK(txr);
817                 igb_start_locked(txr, ifp);
818                 IGB_TX_UNLOCK(txr);
819         }
820         return;
821 }
822
823 #if __FreeBSD_version >= 800000
824 /*
825 ** Multiqueue Transmit driver
826 **
827 */
828 static int
829 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
830 {
831         struct adapter          *adapter = ifp->if_softc;
832         struct igb_queue        *que;
833         struct tx_ring          *txr;
834         int                     i = 0, err = 0;
835
836         /* Which queue to use */
837         if ((m->m_flags & M_FLOWID) != 0)
838                 i = m->m_pkthdr.flowid % adapter->num_queues;
839
840         txr = &adapter->tx_rings[i];
841         que = &adapter->queues[i];
842
843         if (IGB_TX_TRYLOCK(txr)) {
844                 err = igb_mq_start_locked(ifp, txr, m);
845                 IGB_TX_UNLOCK(txr);
846         } else {
847                 err = drbr_enqueue(ifp, txr->br, m);
848                 taskqueue_enqueue(que->tq, &que->que_task);
849         }
850
851         return (err);
852 }
853
854 static int
855 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
856 {
857         struct adapter  *adapter = txr->adapter;
858         struct mbuf     *next;
859         int             err = 0, enq;
860
861         IGB_TX_LOCK_ASSERT(txr);
862
863         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
864             IFF_DRV_RUNNING || adapter->link_active == 0) {
865                 if (m != NULL)
866                         err = drbr_enqueue(ifp, txr->br, m);
867                 return (err);
868         }
869
870         /* Call cleanup if number of TX descriptors low */
871         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
872                 igb_txeof(txr);
873
874         enq = 0;
875         if (m == NULL) {
876                 next = drbr_dequeue(ifp, txr->br);
877         } else if (drbr_needs_enqueue(ifp, txr->br)) {
878                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
879                         return (err);
880                 next = drbr_dequeue(ifp, txr->br);
881         } else
882                 next = m;
883
884         /* Process the queue */
885         while (next != NULL) {
886                 if ((err = igb_xmit(txr, &next)) != 0) {
887                         if (next != NULL)
888                                 err = drbr_enqueue(ifp, txr->br, next);
889                         break;
890                 }
891                 enq++;
892                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
893                 ETHER_BPF_MTAP(ifp, next);
894                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
895                         break;
896                 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
897                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
898                         break;
899                 }
900                 next = drbr_dequeue(ifp, txr->br);
901         }
902         if (enq > 0) {
903                 /* Set the watchdog */
904                 txr->watchdog_check = TRUE;
905                 txr->watchdog_time = ticks;
906         }
907         return (err);
908 }
909
910 /*
911 ** Flush all ring buffers
912 */
913 static void
914 igb_qflush(struct ifnet *ifp)
915 {
916         struct adapter  *adapter = ifp->if_softc;
917         struct tx_ring  *txr = adapter->tx_rings;
918         struct mbuf     *m;
919
920         for (int i = 0; i < adapter->num_queues; i++, txr++) {
921                 IGB_TX_LOCK(txr);
922                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
923                         m_freem(m);
924                 IGB_TX_UNLOCK(txr);
925         }
926         if_qflush(ifp);
927 }
928 #endif /* __FreeBSD_version >= 800000 */
929
930 /*********************************************************************
931  *  Ioctl entry point
932  *
933  *  igb_ioctl is called when the user wants to configure the
934  *  interface.
935  *
936  *  return 0 on success, positive on failure
937  **********************************************************************/
938
939 static int
940 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
941 {
942         struct adapter  *adapter = ifp->if_softc;
943         struct ifreq *ifr = (struct ifreq *)data;
944 #ifdef INET
945         struct ifaddr *ifa = (struct ifaddr *)data;
946 #endif
947         int error = 0;
948
949         if (adapter->in_detach)
950                 return (error);
951
952         switch (command) {
953         case SIOCSIFADDR:
954 #ifdef INET
955                 if (ifa->ifa_addr->sa_family == AF_INET) {
956                         /*
957                          * XXX
958                          * Since resetting hardware takes a very long time
959                          * and results in link renegotiation we only
960                          * initialize the hardware only when it is absolutely
961                          * required.
962                          */
963                         ifp->if_flags |= IFF_UP;
964                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
965                                 IGB_CORE_LOCK(adapter);
966                                 igb_init_locked(adapter);
967                                 IGB_CORE_UNLOCK(adapter);
968                         }
969                         if (!(ifp->if_flags & IFF_NOARP))
970                                 arp_ifinit(ifp, ifa);
971                 } else
972 #endif
973                         error = ether_ioctl(ifp, command, data);
974                 break;
975         case SIOCSIFMTU:
976             {
977                 int max_frame_size;
978
979                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
980
981                 IGB_CORE_LOCK(adapter);
982                 max_frame_size = 9234;
983                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
984                     ETHER_CRC_LEN) {
985                         IGB_CORE_UNLOCK(adapter);
986                         error = EINVAL;
987                         break;
988                 }
989
990                 ifp->if_mtu = ifr->ifr_mtu;
991                 adapter->max_frame_size =
992                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
993                 igb_init_locked(adapter);
994                 IGB_CORE_UNLOCK(adapter);
995                 break;
996             }
997         case SIOCSIFFLAGS:
998                 IOCTL_DEBUGOUT("ioctl rcv'd:\
999                     SIOCSIFFLAGS (Set Interface Flags)");
1000                 IGB_CORE_LOCK(adapter);
1001                 if (ifp->if_flags & IFF_UP) {
1002                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1003                                 if ((ifp->if_flags ^ adapter->if_flags) &
1004                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1005                                         igb_disable_promisc(adapter);
1006                                         igb_set_promisc(adapter);
1007                                 }
1008                         } else
1009                                 igb_init_locked(adapter);
1010                 } else
1011                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1012                                 igb_stop(adapter);
1013                 adapter->if_flags = ifp->if_flags;
1014                 IGB_CORE_UNLOCK(adapter);
1015                 break;
1016         case SIOCADDMULTI:
1017         case SIOCDELMULTI:
1018                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1019                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1020                         IGB_CORE_LOCK(adapter);
1021                         igb_disable_intr(adapter);
1022                         igb_set_multi(adapter);
1023 #ifdef DEVICE_POLLING
1024                         if (!(ifp->if_capenable & IFCAP_POLLING))
1025 #endif
1026                                 igb_enable_intr(adapter);
1027                         IGB_CORE_UNLOCK(adapter);
1028                 }
1029                 break;
1030         case SIOCSIFMEDIA:
1031                 /* Check SOL/IDER usage */
1032                 IGB_CORE_LOCK(adapter);
1033                 if (e1000_check_reset_block(&adapter->hw)) {
1034                         IGB_CORE_UNLOCK(adapter);
1035                         device_printf(adapter->dev, "Media change is"
1036                             " blocked due to SOL/IDER session.\n");
1037                         break;
1038                 }
1039                 IGB_CORE_UNLOCK(adapter);
1040         case SIOCGIFMEDIA:
1041                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1042                     SIOCxIFMEDIA (Get/Set Interface Media)");
1043                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1044                 break;
1045         case SIOCSIFCAP:
1046             {
1047                 int mask, reinit;
1048
1049                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1050                 reinit = 0;
1051                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1052 #ifdef DEVICE_POLLING
1053                 if (mask & IFCAP_POLLING) {
1054                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1055                                 error = ether_poll_register(igb_poll, ifp);
1056                                 if (error)
1057                                         return (error);
1058                                 IGB_CORE_LOCK(adapter);
1059                                 igb_disable_intr(adapter);
1060                                 ifp->if_capenable |= IFCAP_POLLING;
1061                                 IGB_CORE_UNLOCK(adapter);
1062                         } else {
1063                                 error = ether_poll_deregister(ifp);
1064                                 /* Enable interrupt even in error case */
1065                                 IGB_CORE_LOCK(adapter);
1066                                 igb_enable_intr(adapter);
1067                                 ifp->if_capenable &= ~IFCAP_POLLING;
1068                                 IGB_CORE_UNLOCK(adapter);
1069                         }
1070                 }
1071 #endif
1072                 if (mask & IFCAP_HWCSUM) {
1073                         ifp->if_capenable ^= IFCAP_HWCSUM;
1074                         reinit = 1;
1075                 }
1076                 if (mask & IFCAP_TSO4) {
1077                         ifp->if_capenable ^= IFCAP_TSO4;
1078                         reinit = 1;
1079                 }
1080                 if (mask & IFCAP_VLAN_HWTAGGING) {
1081                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1082                         reinit = 1;
1083                 }
1084                 if (mask & IFCAP_VLAN_HWFILTER) {
1085                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1086                         reinit = 1;
1087                 }
1088                 if (mask & IFCAP_LRO) {
1089                         ifp->if_capenable ^= IFCAP_LRO;
1090                         reinit = 1;
1091                 }
1092                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1093                         igb_init(adapter);
1094                 VLAN_CAPABILITIES(ifp);
1095                 break;
1096             }
1097
1098         default:
1099                 error = ether_ioctl(ifp, command, data);
1100                 break;
1101         }
1102
1103         return (error);
1104 }
1105
1106
1107 /*********************************************************************
1108  *  Init entry point
1109  *
1110  *  This routine is used in two ways. It is used by the stack as
1111  *  init entry point in network interface structure. It is also used
1112  *  by the driver as a hw/sw initialization routine to get to a
1113  *  consistent state.
1114  *
1115  *  return 0 on success, positive on failure
1116  **********************************************************************/
1117
1118 static void
1119 igb_init_locked(struct adapter *adapter)
1120 {
1121         struct ifnet    *ifp = adapter->ifp;
1122         device_t        dev = adapter->dev;
1123
1124         INIT_DEBUGOUT("igb_init: begin");
1125
1126         IGB_CORE_LOCK_ASSERT(adapter);
1127
1128         igb_disable_intr(adapter);
1129         callout_stop(&adapter->timer);
1130
1131         /* Get the latest mac address, User can use a LAA */
1132         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1133               ETHER_ADDR_LEN);
1134
1135         /* Put the address into the Receive Address Array */
1136         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1137
1138         igb_reset(adapter);
1139         igb_update_link_status(adapter);
1140
1141         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1142
1143         /* Use real VLAN Filter support? */
1144         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1145                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1146                         /* Use real VLAN Filter support */
1147                         igb_setup_vlan_hw_support(adapter);
1148                 else {
1149                         u32 ctrl;
1150                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1151                         ctrl |= E1000_CTRL_VME;
1152                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1153                 }
1154         }
1155                                 
1156         /* Set hardware offload abilities */
1157         ifp->if_hwassist = 0;
1158         if (ifp->if_capenable & IFCAP_TXCSUM) {
1159                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1160 #if __FreeBSD_version >= 800000
1161                 if (adapter->hw.mac.type == e1000_82576)
1162                         ifp->if_hwassist |= CSUM_SCTP;
1163 #endif
1164         }
1165
1166         if (ifp->if_capenable & IFCAP_TSO4)
1167                 ifp->if_hwassist |= CSUM_TSO;
1168
1169         /* Configure for OS presence */
1170         igb_init_manageability(adapter);
1171
1172         /* Prepare transmit descriptors and buffers */
1173         igb_setup_transmit_structures(adapter);
1174         igb_initialize_transmit_units(adapter);
1175
1176         /* Setup Multicast table */
1177         igb_set_multi(adapter);
1178
1179         /*
1180         ** Figure out the desired mbuf pool
1181         ** for doing jumbo/packetsplit
1182         */
1183         if (ifp->if_mtu > ETHERMTU)
1184                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1185         else
1186                 adapter->rx_mbuf_sz = MCLBYTES;
1187
1188         /* Prepare receive descriptors and buffers */
1189         if (igb_setup_receive_structures(adapter)) {
1190                 device_printf(dev, "Could not setup receive structures\n");
1191                 return;
1192         }
1193         igb_initialize_receive_units(adapter);
1194
1195         /* Don't lose promiscuous settings */
1196         igb_set_promisc(adapter);
1197
1198         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1199         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1200
1201         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1202         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1203
1204         if (adapter->msix > 1) /* Set up queue routing */
1205                 igb_configure_queues(adapter);
1206
1207         /* Set up VLAN tag offload and filter */
1208         igb_setup_vlan_hw_support(adapter);
1209
1210         /* this clears any pending interrupts */
1211         E1000_READ_REG(&adapter->hw, E1000_ICR);
1212 #ifdef DEVICE_POLLING
1213         /*
1214          * Only enable interrupts if we are not polling, make sure
1215          * they are off otherwise.
1216          */
1217         if (ifp->if_capenable & IFCAP_POLLING)
1218                 igb_disable_intr(adapter);
1219         else
1220 #endif /* DEVICE_POLLING */
1221         {
1222         igb_enable_intr(adapter);
1223         E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1224         }
1225
1226         /* Don't reset the phy next time init gets called */
1227         adapter->hw.phy.reset_disable = TRUE;
1228 }
1229
1230 static void
1231 igb_init(void *arg)
1232 {
1233         struct adapter *adapter = arg;
1234
1235         IGB_CORE_LOCK(adapter);
1236         igb_init_locked(adapter);
1237         IGB_CORE_UNLOCK(adapter);
1238 }
1239
1240
1241 static void
1242 igb_handle_que(void *context, int pending)
1243 {
1244         struct igb_queue *que = context;
1245         struct adapter *adapter = que->adapter;
1246         struct tx_ring *txr = que->txr;
1247         struct ifnet    *ifp = adapter->ifp;
1248
1249         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1250                 bool    more;
1251
1252                 more = igb_rxeof(que, -1, NULL);
1253
1254                 IGB_TX_LOCK(txr);
1255                 if (igb_txeof(txr))
1256                         more = TRUE;
1257 #if __FreeBSD_version >= 800000
1258                 if (!drbr_empty(ifp, txr->br))
1259                         igb_mq_start_locked(ifp, txr, NULL);
1260 #else
1261                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1262                         igb_start_locked(txr, ifp);
1263 #endif
1264                 IGB_TX_UNLOCK(txr);
1265                 if (more) {
1266                         taskqueue_enqueue(que->tq, &que->que_task);
1267                         return;
1268                 }
1269         }
1270
1271 #ifdef DEVICE_POLLING
1272         if (ifp->if_capenable & IFCAP_POLLING)
1273                 return;
1274 #endif
1275         /* Reenable this interrupt */
1276         if (que->eims)
1277                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1278         else
1279                 igb_enable_intr(adapter);
1280 }
1281
1282 /* Deal with link in a sleepable context */
1283 static void
1284 igb_handle_link(void *context, int pending)
1285 {
1286         struct adapter *adapter = context;
1287
1288         adapter->hw.mac.get_link_status = 1;
1289         igb_update_link_status(adapter);
1290 }
1291
1292 /*********************************************************************
1293  *
1294  *  MSI/Legacy Deferred
1295  *  Interrupt Service routine  
1296  *
1297  *********************************************************************/
1298 static int
1299 igb_irq_fast(void *arg)
1300 {
1301         struct adapter          *adapter = arg;
1302         struct igb_queue        *que = adapter->queues;
1303         u32                     reg_icr;
1304
1305
1306         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1307
1308         /* Hot eject?  */
1309         if (reg_icr == 0xffffffff)
1310                 return FILTER_STRAY;
1311
1312         /* Definitely not our interrupt.  */
1313         if (reg_icr == 0x0)
1314                 return FILTER_STRAY;
1315
1316         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1317                 return FILTER_STRAY;
1318
1319         /*
1320          * Mask interrupts until the taskqueue is finished running.  This is
1321          * cheap, just assume that it is needed.  This also works around the
1322          * MSI message reordering errata on certain systems.
1323          */
1324         igb_disable_intr(adapter);
1325         taskqueue_enqueue(que->tq, &que->que_task);
1326
1327         /* Link status change */
1328         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1329                 taskqueue_enqueue(que->tq, &adapter->link_task);
1330
1331         if (reg_icr & E1000_ICR_RXO)
1332                 adapter->rx_overruns++;
1333         return FILTER_HANDLED;
1334 }
1335
1336 #ifdef DEVICE_POLLING
1337 /*********************************************************************
1338  *
1339  *  Legacy polling routine : if using this code you MUST be sure that
1340  *  multiqueue is not defined, ie, set igb_num_queues to 1.
1341  *
1342  *********************************************************************/
1343 #if __FreeBSD_version >= 800000
1344 #define POLL_RETURN_COUNT(a) (a)
1345 static int
1346 #else
1347 #define POLL_RETURN_COUNT(a)
1348 static void
1349 #endif
1350 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1351 {
1352         struct adapter          *adapter = ifp->if_softc;
1353         struct igb_queue        *que = adapter->queues;
1354         struct tx_ring          *txr = adapter->tx_rings;
1355         u32                     reg_icr, rx_done = 0;
1356         u32                     loop = IGB_MAX_LOOP;
1357         bool                    more;
1358
1359         IGB_CORE_LOCK(adapter);
1360         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1361                 IGB_CORE_UNLOCK(adapter);
1362                 return POLL_RETURN_COUNT(rx_done);
1363         }
1364
1365         if (cmd == POLL_AND_CHECK_STATUS) {
1366                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1367                 /* Link status change */
1368                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1369                         igb_handle_link(adapter, 0);
1370
1371                 if (reg_icr & E1000_ICR_RXO)
1372                         adapter->rx_overruns++;
1373         }
1374         IGB_CORE_UNLOCK(adapter);
1375
1376         igb_rxeof(que, count, &rx_done);
1377
1378         IGB_TX_LOCK(txr);
1379         do {
1380                 more = igb_txeof(txr);
1381         } while (loop-- && more);
1382 #if __FreeBSD_version >= 800000
1383         if (!drbr_empty(ifp, txr->br))
1384                 igb_mq_start_locked(ifp, txr, NULL);
1385 #else
1386         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1387                 igb_start_locked(txr, ifp);
1388 #endif
1389         IGB_TX_UNLOCK(txr);
1390         return POLL_RETURN_COUNT(rx_done);
1391 }
1392 #endif /* DEVICE_POLLING */
1393
1394 /*********************************************************************
1395  *
1396  *  MSIX TX Interrupt Service routine
1397  *
1398  **********************************************************************/
1399 static void
1400 igb_msix_que(void *arg)
1401 {
1402         struct igb_queue *que = arg;
1403         struct adapter *adapter = que->adapter;
1404         struct tx_ring *txr = que->txr;
1405         struct rx_ring *rxr = que->rxr;
1406         u32             newitr = 0;
1407         bool            more_tx, more_rx;
1408
1409         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1410         ++que->irqs;
1411
1412         IGB_TX_LOCK(txr);
1413         more_tx = igb_txeof(txr);
1414         IGB_TX_UNLOCK(txr);
1415
1416         more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1417
1418         if (igb_enable_aim == FALSE)
1419                 goto no_calc;
1420         /*
1421         ** Do Adaptive Interrupt Moderation:
1422         **  - Write out last calculated setting
1423         **  - Calculate based on average size over
1424         **    the last interval.
1425         */
1426         if (que->eitr_setting)
1427                 E1000_WRITE_REG(&adapter->hw,
1428                     E1000_EITR(que->msix), que->eitr_setting);
1429  
1430         que->eitr_setting = 0;
1431
1432         /* Idle, do nothing */
1433         if ((txr->bytes == 0) && (rxr->bytes == 0))
1434                 goto no_calc;
1435                                 
1436         /* Used half Default if sub-gig */
1437         if (adapter->link_speed != 1000)
1438                 newitr = IGB_DEFAULT_ITR / 2;
1439         else {
1440                 if ((txr->bytes) && (txr->packets))
1441                         newitr = txr->bytes/txr->packets;
1442                 if ((rxr->bytes) && (rxr->packets))
1443                         newitr = max(newitr,
1444                             (rxr->bytes / rxr->packets));
1445                 newitr += 24; /* account for hardware frame, crc */
1446                 /* set an upper boundary */
1447                 newitr = min(newitr, 3000);
1448                 /* Be nice to the mid range */
1449                 if ((newitr > 300) && (newitr < 1200))
1450                         newitr = (newitr / 3);
1451                 else
1452                         newitr = (newitr / 2);
1453         }
1454         newitr &= 0x7FFC;  /* Mask invalid bits */
1455         if (adapter->hw.mac.type == e1000_82575)
1456                 newitr |= newitr << 16;
1457         else
1458                 newitr |= E1000_EITR_CNT_IGNR;
1459                  
1460         /* save for next interrupt */
1461         que->eitr_setting = newitr;
1462
1463         /* Reset state */
1464         txr->bytes = 0;
1465         txr->packets = 0;
1466         rxr->bytes = 0;
1467         rxr->packets = 0;
1468
1469 no_calc:
1470         /* Schedule a clean task if needed*/
1471         if (more_tx || more_rx) 
1472                 taskqueue_enqueue(que->tq, &que->que_task);
1473         else
1474                 /* Reenable this interrupt */
1475                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1476         return;
1477 }
1478
1479
1480 /*********************************************************************
1481  *
1482  *  MSIX Link Interrupt Service routine
1483  *
1484  **********************************************************************/
1485
1486 static void
1487 igb_msix_link(void *arg)
1488 {
1489         struct adapter  *adapter = arg;
1490         u32             icr;
1491
1492         ++adapter->link_irq;
1493         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1494         if (!(icr & E1000_ICR_LSC))
1495                 goto spurious;
1496         igb_handle_link(adapter, 0);
1497
1498 spurious:
1499         /* Rearm */
1500         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1501         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1502         return;
1503 }
1504
1505
1506 /*********************************************************************
1507  *
1508  *  Media Ioctl callback
1509  *
1510  *  This routine is called whenever the user queries the status of
1511  *  the interface using ifconfig.
1512  *
1513  **********************************************************************/
1514 static void
1515 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1516 {
1517         struct adapter *adapter = ifp->if_softc;
1518         u_char fiber_type = IFM_1000_SX;
1519
1520         INIT_DEBUGOUT("igb_media_status: begin");
1521
1522         IGB_CORE_LOCK(adapter);
1523         igb_update_link_status(adapter);
1524
1525         ifmr->ifm_status = IFM_AVALID;
1526         ifmr->ifm_active = IFM_ETHER;
1527
1528         if (!adapter->link_active) {
1529                 IGB_CORE_UNLOCK(adapter);
1530                 return;
1531         }
1532
1533         ifmr->ifm_status |= IFM_ACTIVE;
1534
1535         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1536             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1537                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1538         else {
1539                 switch (adapter->link_speed) {
1540                 case 10:
1541                         ifmr->ifm_active |= IFM_10_T;
1542                         break;
1543                 case 100:
1544                         ifmr->ifm_active |= IFM_100_TX;
1545                         break;
1546                 case 1000:
1547                         ifmr->ifm_active |= IFM_1000_T;
1548                         break;
1549                 }
1550                 if (adapter->link_duplex == FULL_DUPLEX)
1551                         ifmr->ifm_active |= IFM_FDX;
1552                 else
1553                         ifmr->ifm_active |= IFM_HDX;
1554         }
1555         IGB_CORE_UNLOCK(adapter);
1556 }
1557
1558 /*********************************************************************
1559  *
1560  *  Media Ioctl callback
1561  *
1562  *  This routine is called when the user changes speed/duplex using
1563  *  media/mediopt option with ifconfig.
1564  *
1565  **********************************************************************/
1566 static int
1567 igb_media_change(struct ifnet *ifp)
1568 {
1569         struct adapter *adapter = ifp->if_softc;
1570         struct ifmedia  *ifm = &adapter->media;
1571
1572         INIT_DEBUGOUT("igb_media_change: begin");
1573
1574         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1575                 return (EINVAL);
1576
1577         IGB_CORE_LOCK(adapter);
1578         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1579         case IFM_AUTO:
1580                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1581                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1582                 break;
1583         case IFM_1000_LX:
1584         case IFM_1000_SX:
1585         case IFM_1000_T:
1586                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1587                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1588                 break;
1589         case IFM_100_TX:
1590                 adapter->hw.mac.autoneg = FALSE;
1591                 adapter->hw.phy.autoneg_advertised = 0;
1592                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1593                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1594                 else
1595                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1596                 break;
1597         case IFM_10_T:
1598                 adapter->hw.mac.autoneg = FALSE;
1599                 adapter->hw.phy.autoneg_advertised = 0;
1600                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1601                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1602                 else
1603                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1604                 break;
1605         default:
1606                 device_printf(adapter->dev, "Unsupported media type\n");
1607         }
1608
1609         /* As the speed/duplex settings my have changed we need to
1610          * reset the PHY.
1611          */
1612         adapter->hw.phy.reset_disable = FALSE;
1613
1614         igb_init_locked(adapter);
1615         IGB_CORE_UNLOCK(adapter);
1616
1617         return (0);
1618 }
1619
1620
1621 /*********************************************************************
1622  *
1623  *  This routine maps the mbufs to Advanced TX descriptors.
1624  *  used by the 82575 adapter.
1625  *  
1626  **********************************************************************/
1627
1628 static int
1629 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1630 {
1631         struct adapter          *adapter = txr->adapter;
1632         bus_dma_segment_t       segs[IGB_MAX_SCATTER];
1633         bus_dmamap_t            map;
1634         struct igb_tx_buffer    *tx_buffer, *tx_buffer_mapped;
1635         union e1000_adv_tx_desc *txd = NULL;
1636         struct mbuf             *m_head;
1637         u32                     olinfo_status = 0, cmd_type_len = 0;
1638         int                     nsegs, i, j, error, first, last = 0;
1639         u32                     hdrlen = 0;
1640
1641         m_head = *m_headp;
1642
1643
1644         /* Set basic descriptor constants */
1645         cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1646         cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1647         if (m_head->m_flags & M_VLANTAG)
1648                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1649
1650         /*
1651          * Force a cleanup if number of TX descriptors
1652          * available hits the threshold
1653          */
1654         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1655                 igb_txeof(txr);
1656                 /* Now do we at least have a minimal? */
1657                 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1658                         txr->no_desc_avail++;
1659                         return (ENOBUFS);
1660                 }
1661         }
1662
1663         /*
1664          * Map the packet for DMA.
1665          *
1666          * Capture the first descriptor index,
1667          * this descriptor will have the index
1668          * of the EOP which is the only one that
1669          * now gets a DONE bit writeback.
1670          */
1671         first = txr->next_avail_desc;
1672         tx_buffer = &txr->tx_buffers[first];
1673         tx_buffer_mapped = tx_buffer;
1674         map = tx_buffer->map;
1675
1676         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1677             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1678
1679         if (error == EFBIG) {
1680                 struct mbuf *m;
1681
1682                 m = m_defrag(*m_headp, M_DONTWAIT);
1683                 if (m == NULL) {
1684                         adapter->mbuf_defrag_failed++;
1685                         m_freem(*m_headp);
1686                         *m_headp = NULL;
1687                         return (ENOBUFS);
1688                 }
1689                 *m_headp = m;
1690
1691                 /* Try it again */
1692                 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1693                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1694
1695                 if (error == ENOMEM) {
1696                         adapter->no_tx_dma_setup++;
1697                         return (error);
1698                 } else if (error != 0) {
1699                         adapter->no_tx_dma_setup++;
1700                         m_freem(*m_headp);
1701                         *m_headp = NULL;
1702                         return (error);
1703                 }
1704         } else if (error == ENOMEM) {
1705                 adapter->no_tx_dma_setup++;
1706                 return (error);
1707         } else if (error != 0) {
1708                 adapter->no_tx_dma_setup++;
1709                 m_freem(*m_headp);
1710                 *m_headp = NULL;
1711                 return (error);
1712         }
1713
1714         /* Check again to be sure we have enough descriptors */
1715         if (nsegs > (txr->tx_avail - 2)) {
1716                 txr->no_desc_avail++;
1717                 bus_dmamap_unload(txr->txtag, map);
1718                 return (ENOBUFS);
1719         }
1720         m_head = *m_headp;
1721
1722         /*
1723          * Set up the context descriptor:
1724          * used when any hardware offload is done.
1725          * This includes CSUM, VLAN, and TSO. It
1726          * will use the first descriptor.
1727          */
1728         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1729                 if (igb_tso_setup(txr, m_head, &hdrlen)) {
1730                         cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1731                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1732                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1733                 } else
1734                         return (ENXIO); 
1735         } else if (igb_tx_ctx_setup(txr, m_head))
1736                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1737
1738         /* Calculate payload length */
1739         olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1740             << E1000_ADVTXD_PAYLEN_SHIFT);
1741
1742         /* 82575 needs the queue index added */
1743         if (adapter->hw.mac.type == e1000_82575)
1744                 olinfo_status |= txr->me << 4;
1745
1746         /* Set up our transmit descriptors */
1747         i = txr->next_avail_desc;
1748         for (j = 0; j < nsegs; j++) {
1749                 bus_size_t seg_len;
1750                 bus_addr_t seg_addr;
1751
1752                 tx_buffer = &txr->tx_buffers[i];
1753                 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1754                 seg_addr = segs[j].ds_addr;
1755                 seg_len  = segs[j].ds_len;
1756
1757                 txd->read.buffer_addr = htole64(seg_addr);
1758                 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1759                 txd->read.olinfo_status = htole32(olinfo_status);
1760                 last = i;
1761                 if (++i == adapter->num_tx_desc)
1762                         i = 0;
1763                 tx_buffer->m_head = NULL;
1764                 tx_buffer->next_eop = -1;
1765         }
1766
1767         txr->next_avail_desc = i;
1768         txr->tx_avail -= nsegs;
1769
1770         tx_buffer->m_head = m_head;
1771         tx_buffer_mapped->map = tx_buffer->map;
1772         tx_buffer->map = map;
1773         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1774
1775         /*
1776          * Last Descriptor of Packet
1777          * needs End Of Packet (EOP)
1778          * and Report Status (RS)
1779          */
1780         txd->read.cmd_type_len |=
1781             htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1782         /*
1783          * Keep track in the first buffer which
1784          * descriptor will be written back
1785          */
1786         tx_buffer = &txr->tx_buffers[first];
1787         tx_buffer->next_eop = last;
1788         txr->watchdog_time = ticks;
1789
1790         /*
1791          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1792          * that this frame is available to transmit.
1793          */
1794         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1795             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1796         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1797         ++txr->tx_packets;
1798
1799         return (0);
1800
1801 }
1802
1803 static void
1804 igb_set_promisc(struct adapter *adapter)
1805 {
1806         struct ifnet    *ifp = adapter->ifp;
1807         struct e1000_hw *hw = &adapter->hw;
1808         u32             reg;
1809
1810         if (hw->mac.type == e1000_vfadapt) {
1811                 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1812                 return;
1813         }
1814
1815         reg = E1000_READ_REG(hw, E1000_RCTL);
1816         if (ifp->if_flags & IFF_PROMISC) {
1817                 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1818                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1819         } else if (ifp->if_flags & IFF_ALLMULTI) {
1820                 reg |= E1000_RCTL_MPE;
1821                 reg &= ~E1000_RCTL_UPE;
1822                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1823         }
1824 }
1825
1826 static void
1827 igb_disable_promisc(struct adapter *adapter)
1828 {
1829         struct e1000_hw *hw = &adapter->hw;
1830         u32             reg;
1831
1832         if (hw->mac.type == e1000_vfadapt) {
1833                 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1834                 return;
1835         }
1836         reg = E1000_READ_REG(hw, E1000_RCTL);
1837         reg &=  (~E1000_RCTL_UPE);
1838         reg &=  (~E1000_RCTL_MPE);
1839         E1000_WRITE_REG(hw, E1000_RCTL, reg);
1840 }
1841
1842
1843 /*********************************************************************
1844  *  Multicast Update
1845  *
1846  *  This routine is called whenever multicast address list is updated.
1847  *
1848  **********************************************************************/
1849
1850 static void
1851 igb_set_multi(struct adapter *adapter)
1852 {
1853         struct ifnet    *ifp = adapter->ifp;
1854         struct ifmultiaddr *ifma;
1855         u32 reg_rctl = 0;
1856         u8  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1857
1858         int mcnt = 0;
1859
1860         IOCTL_DEBUGOUT("igb_set_multi: begin");
1861
1862 #if __FreeBSD_version < 800000
1863         IF_ADDR_LOCK(ifp);
1864 #else
1865         if_maddr_rlock(ifp);
1866 #endif
1867         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1868                 if (ifma->ifma_addr->sa_family != AF_LINK)
1869                         continue;
1870
1871                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1872                         break;
1873
1874                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1875                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1876                 mcnt++;
1877         }
1878 #if __FreeBSD_version < 800000
1879         IF_ADDR_UNLOCK(ifp);
1880 #else
1881         if_maddr_runlock(ifp);
1882 #endif
1883
1884         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1885                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1886                 reg_rctl |= E1000_RCTL_MPE;
1887                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1888         } else
1889                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1890 }
1891
1892
1893 /*********************************************************************
1894  *  Timer routine:
1895  *      This routine checks for link status,
1896  *      updates statistics, and does the watchdog.
1897  *
1898  **********************************************************************/
1899
1900 static void
1901 igb_local_timer(void *arg)
1902 {
1903         struct adapter          *adapter = arg;
1904         device_t                dev = adapter->dev;
1905         struct tx_ring          *txr = adapter->tx_rings;
1906
1907
1908         IGB_CORE_LOCK_ASSERT(adapter);
1909
1910         igb_update_link_status(adapter);
1911         igb_update_stats_counters(adapter);
1912
1913         /*
1914         ** Watchdog: check for time since any descriptor was cleaned
1915         */
1916         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1917                 if (txr->watchdog_check == FALSE)
1918                         continue;
1919                 if ((ticks - txr->watchdog_time) > IGB_WATCHDOG)
1920                         goto timeout;
1921         }
1922
1923         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1924         return;
1925
1926 timeout:
1927         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1928         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1929             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
1930             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
1931         device_printf(dev,"TX(%d) desc avail = %d,"
1932             "Next TX to Clean = %d\n",
1933             txr->me, txr->tx_avail, txr->next_to_clean);
1934         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1935         adapter->watchdog_events++;
1936         igb_init_locked(adapter);
1937 }
1938
1939 static void
1940 igb_update_link_status(struct adapter *adapter)
1941 {
1942         struct e1000_hw *hw = &adapter->hw;
1943         struct ifnet *ifp = adapter->ifp;
1944         device_t dev = adapter->dev;
1945         struct tx_ring *txr = adapter->tx_rings;
1946         u32 link_check = 0;
1947
1948         /* Get the cached link value or read for real */
1949         switch (hw->phy.media_type) {
1950         case e1000_media_type_copper:
1951                 if (hw->mac.get_link_status) {
1952                         /* Do the work to read phy */
1953                         e1000_check_for_link(hw);
1954                         link_check = !hw->mac.get_link_status;
1955                 } else
1956                         link_check = TRUE;
1957                 break;
1958         case e1000_media_type_fiber:
1959                 e1000_check_for_link(hw);
1960                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1961                                  E1000_STATUS_LU);
1962                 break;
1963         case e1000_media_type_internal_serdes:
1964                 e1000_check_for_link(hw);
1965                 link_check = adapter->hw.mac.serdes_has_link;
1966                 break;
1967         /* VF device is type_unknown */
1968         case e1000_media_type_unknown:
1969                 e1000_check_for_link(hw);
1970                 link_check = !hw->mac.get_link_status;
1971                 /* Fall thru */
1972         default:
1973                 break;
1974         }
1975
1976         /* Now we check if a transition has happened */
1977         if (link_check && (adapter->link_active == 0)) {
1978                 e1000_get_speed_and_duplex(&adapter->hw, 
1979                     &adapter->link_speed, &adapter->link_duplex);
1980                 if (bootverbose)
1981                         device_printf(dev, "Link is up %d Mbps %s\n",
1982                             adapter->link_speed,
1983                             ((adapter->link_duplex == FULL_DUPLEX) ?
1984                             "Full Duplex" : "Half Duplex"));
1985                 adapter->link_active = 1;
1986                 ifp->if_baudrate = adapter->link_speed * 1000000;
1987                 /* This can sleep */
1988                 if_link_state_change(ifp, LINK_STATE_UP);
1989         } else if (!link_check && (adapter->link_active == 1)) {
1990                 ifp->if_baudrate = adapter->link_speed = 0;
1991                 adapter->link_duplex = 0;
1992                 if (bootverbose)
1993                         device_printf(dev, "Link is Down\n");
1994                 adapter->link_active = 0;
1995                 /* This can sleep */
1996                 if_link_state_change(ifp, LINK_STATE_DOWN);
1997                 /* Turn off watchdogs */
1998                 for (int i = 0; i < adapter->num_queues; i++, txr++)
1999                         txr->watchdog_check = FALSE;
2000         }
2001 }
2002
2003 /*********************************************************************
2004  *
2005  *  This routine disables all traffic on the adapter by issuing a
2006  *  global reset on the MAC and deallocates TX/RX buffers.
2007  *
2008  **********************************************************************/
2009
2010 static void
2011 igb_stop(void *arg)
2012 {
2013         struct adapter  *adapter = arg;
2014         struct ifnet    *ifp = adapter->ifp;
2015         struct tx_ring *txr = adapter->tx_rings;
2016
2017         IGB_CORE_LOCK_ASSERT(adapter);
2018
2019         INIT_DEBUGOUT("igb_stop: begin");
2020
2021         igb_disable_intr(adapter);
2022
2023         callout_stop(&adapter->timer);
2024
2025         /* Tell the stack that the interface is no longer active */
2026         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2027
2028         /* Unarm watchdog timer. */
2029         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2030                 IGB_TX_LOCK(txr);
2031                 txr->watchdog_check = FALSE;
2032                 IGB_TX_UNLOCK(txr);
2033         }
2034
2035         e1000_reset_hw(&adapter->hw);
2036         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2037
2038         e1000_led_off(&adapter->hw);
2039         e1000_cleanup_led(&adapter->hw);
2040 }
2041
2042
2043 /*********************************************************************
2044  *
2045  *  Determine hardware revision.
2046  *
2047  **********************************************************************/
2048 static void
2049 igb_identify_hardware(struct adapter *adapter)
2050 {
2051         device_t dev = adapter->dev;
2052
2053         /* Make sure our PCI config space has the necessary stuff set */
2054         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2055         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2056             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2057                 INIT_DEBUGOUT("Memory Access and/or Bus Master "
2058                     "bits were not set!\n");
2059                 adapter->hw.bus.pci_cmd_word |=
2060                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2061                 pci_write_config(dev, PCIR_COMMAND,
2062                     adapter->hw.bus.pci_cmd_word, 2);
2063         }
2064
2065         /* Save off the information about this board */
2066         adapter->hw.vendor_id = pci_get_vendor(dev);
2067         adapter->hw.device_id = pci_get_device(dev);
2068         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2069         adapter->hw.subsystem_vendor_id =
2070             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2071         adapter->hw.subsystem_device_id =
2072             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2073
2074         /* Set MAC type early for PCI setup */
2075         e1000_set_mac_type(&adapter->hw);
2076 }
2077
2078 static int
2079 igb_allocate_pci_resources(struct adapter *adapter)
2080 {
2081         device_t        dev = adapter->dev;
2082         int             rid;
2083
2084         rid = PCIR_BAR(0);
2085         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2086             &rid, RF_ACTIVE);
2087         if (adapter->pci_mem == NULL) {
2088                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2089                 return (ENXIO);
2090         }
2091         adapter->osdep.mem_bus_space_tag =
2092             rman_get_bustag(adapter->pci_mem);
2093         adapter->osdep.mem_bus_space_handle =
2094             rman_get_bushandle(adapter->pci_mem);
2095         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2096
2097         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2098
2099         /* This will setup either MSI/X or MSI */
2100         adapter->msix = igb_setup_msix(adapter);
2101         adapter->hw.back = &adapter->osdep;
2102
2103         return (0);
2104 }
2105
2106 /*********************************************************************
2107  *
2108  *  Setup the Legacy or MSI Interrupt handler
2109  *
2110  **********************************************************************/
2111 static int
2112 igb_allocate_legacy(struct adapter *adapter)
2113 {
2114         device_t                dev = adapter->dev;
2115         struct igb_queue        *que = adapter->queues;
2116         int                     error, rid = 0;
2117
2118         /* Turn off all interrupts */
2119         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2120
2121         /* MSI RID is 1 */
2122         if (adapter->msix == 1)
2123                 rid = 1;
2124
2125         /* We allocate a single interrupt resource */
2126         adapter->res = bus_alloc_resource_any(dev,
2127             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2128         if (adapter->res == NULL) {
2129                 device_printf(dev, "Unable to allocate bus resource: "
2130                     "interrupt\n");
2131                 return (ENXIO);
2132         }
2133
2134         /*
2135          * Try allocating a fast interrupt and the associated deferred
2136          * processing contexts.
2137          */
2138         TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2139         /* Make tasklet for deferred link handling */
2140         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2141         que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2142             taskqueue_thread_enqueue, &que->tq);
2143         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2144             device_get_nameunit(adapter->dev));
2145         if ((error = bus_setup_intr(dev, adapter->res,
2146             INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2147             adapter, &adapter->tag)) != 0) {
2148                 device_printf(dev, "Failed to register fast interrupt "
2149                             "handler: %d\n", error);
2150                 taskqueue_free(que->tq);
2151                 que->tq = NULL;
2152                 return (error);
2153         }
2154
2155         return (0);
2156 }
2157
2158
2159 /*********************************************************************
2160  *
2161  *  Setup the MSIX Queue Interrupt handlers: 
2162  *
2163  **********************************************************************/
2164 static int
2165 igb_allocate_msix(struct adapter *adapter)
2166 {
2167         device_t                dev = adapter->dev;
2168         struct igb_queue        *que = adapter->queues;
2169         int                     error, rid, vector = 0;
2170
2171
2172         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2173                 rid = vector +1;
2174                 que->res = bus_alloc_resource_any(dev,
2175                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2176                 if (que->res == NULL) {
2177                         device_printf(dev,
2178                             "Unable to allocate bus resource: "
2179                             "MSIX Queue Interrupt\n");
2180                         return (ENXIO);
2181                 }
2182                 error = bus_setup_intr(dev, que->res,
2183                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2184                     igb_msix_que, que, &que->tag);
2185                 if (error) {
2186                         que->res = NULL;
2187                         device_printf(dev, "Failed to register Queue handler");
2188                         return (error);
2189                 }
2190 #if __FreeBSD_version >= 800504
2191                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2192 #endif
2193                 que->msix = vector;
2194                 if (adapter->hw.mac.type == e1000_82575)
2195                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2196                 else
2197                         que->eims = 1 << vector;
2198                 /*
2199                 ** Bind the msix vector, and thus the
2200                 ** rings to the corresponding cpu.
2201                 */
2202                 if (adapter->num_queues > 1)
2203                         bus_bind_intr(dev, que->res, i);
2204                 /* Make tasklet for deferred handling */
2205                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2206                 que->tq = taskqueue_create_fast("igb_que", M_NOWAIT,
2207                     taskqueue_thread_enqueue, &que->tq);
2208                 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2209                     device_get_nameunit(adapter->dev));
2210         }
2211
2212         /* And Link */
2213         rid = vector + 1;
2214         adapter->res = bus_alloc_resource_any(dev,
2215             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2216         if (adapter->res == NULL) {
2217                 device_printf(dev,
2218                     "Unable to allocate bus resource: "
2219                     "MSIX Link Interrupt\n");
2220                 return (ENXIO);
2221         }
2222         if ((error = bus_setup_intr(dev, adapter->res,
2223             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2224             igb_msix_link, adapter, &adapter->tag)) != 0) {
2225                 device_printf(dev, "Failed to register Link handler");
2226                 return (error);
2227         }
2228 #if __FreeBSD_version >= 800504
2229         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2230 #endif
2231         adapter->linkvec = vector;
2232
2233         return (0);
2234 }
2235
2236
2237 static void
2238 igb_configure_queues(struct adapter *adapter)
2239 {
2240         struct  e1000_hw        *hw = &adapter->hw;
2241         struct  igb_queue       *que;
2242         u32                     tmp, ivar = 0;
2243         u32                     newitr = IGB_DEFAULT_ITR;
2244
2245         /* First turn on RSS capability */
2246         if (adapter->hw.mac.type > e1000_82575)
2247                 E1000_WRITE_REG(hw, E1000_GPIE,
2248                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2249                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2250
2251         /* Turn on MSIX */
2252         switch (adapter->hw.mac.type) {
2253         case e1000_82580:
2254         case e1000_vfadapt:
2255                 /* RX entries */
2256                 for (int i = 0; i < adapter->num_queues; i++) {
2257                         u32 index = i >> 1;
2258                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2259                         que = &adapter->queues[i];
2260                         if (i & 1) {
2261                                 ivar &= 0xFF00FFFF;
2262                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2263                         } else {
2264                                 ivar &= 0xFFFFFF00;
2265                                 ivar |= que->msix | E1000_IVAR_VALID;
2266                         }
2267                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2268                 }
2269                 /* TX entries */
2270                 for (int i = 0; i < adapter->num_queues; i++) {
2271                         u32 index = i >> 1;
2272                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2273                         que = &adapter->queues[i];
2274                         if (i & 1) {
2275                                 ivar &= 0x00FFFFFF;
2276                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2277                         } else {
2278                                 ivar &= 0xFFFF00FF;
2279                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2280                         }
2281                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2282                         adapter->eims_mask |= que->eims;
2283                 }
2284
2285                 /* And for the link interrupt */
2286                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2287                 adapter->link_mask = 1 << adapter->linkvec;
2288                 adapter->eims_mask |= adapter->link_mask;
2289                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2290                 break;
2291         case e1000_82576:
2292                 /* RX entries */
2293                 for (int i = 0; i < adapter->num_queues; i++) {
2294                         u32 index = i & 0x7; /* Each IVAR has two entries */
2295                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2296                         que = &adapter->queues[i];
2297                         if (i < 8) {
2298                                 ivar &= 0xFFFFFF00;
2299                                 ivar |= que->msix | E1000_IVAR_VALID;
2300                         } else {
2301                                 ivar &= 0xFF00FFFF;
2302                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2303                         }
2304                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2305                         adapter->eims_mask |= que->eims;
2306                 }
2307                 /* TX entries */
2308                 for (int i = 0; i < adapter->num_queues; i++) {
2309                         u32 index = i & 0x7; /* Each IVAR has two entries */
2310                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2311                         que = &adapter->queues[i];
2312                         if (i < 8) {
2313                                 ivar &= 0xFFFF00FF;
2314                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2315                         } else {
2316                                 ivar &= 0x00FFFFFF;
2317                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2318                         }
2319                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2320                         adapter->eims_mask |= que->eims;
2321                 }
2322
2323                 /* And for the link interrupt */
2324                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2325                 adapter->link_mask = 1 << adapter->linkvec;
2326                 adapter->eims_mask |= adapter->link_mask;
2327                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2328                 break;
2329
2330         case e1000_82575:
2331                 /* enable MSI-X support*/
2332                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2333                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2334                 /* Auto-Mask interrupts upon ICR read. */
2335                 tmp |= E1000_CTRL_EXT_EIAME;
2336                 tmp |= E1000_CTRL_EXT_IRCA;
2337                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2338
2339                 /* Queues */
2340                 for (int i = 0; i < adapter->num_queues; i++) {
2341                         que = &adapter->queues[i];
2342                         tmp = E1000_EICR_RX_QUEUE0 << i;
2343                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2344                         que->eims = tmp;
2345                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2346                             i, que->eims);
2347                         adapter->eims_mask |= que->eims;
2348                 }
2349
2350                 /* Link */
2351                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2352                     E1000_EIMS_OTHER);
2353                 adapter->link_mask |= E1000_EIMS_OTHER;
2354                 adapter->eims_mask |= adapter->link_mask;
2355         default:
2356                 break;
2357         }
2358
2359         /* Set the starting interrupt rate */
2360         if (hw->mac.type == e1000_82575)
2361                 newitr |= newitr << 16;
2362         else
2363                 newitr |= E1000_EITR_CNT_IGNR;
2364
2365         for (int i = 0; i < adapter->num_queues; i++) {
2366                 que = &adapter->queues[i];
2367                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2368         }
2369
2370         return;
2371 }
2372
2373
2374 static void
2375 igb_free_pci_resources(struct adapter *adapter)
2376 {
2377         struct          igb_queue *que = adapter->queues;
2378         device_t        dev = adapter->dev;
2379         int             rid;
2380
2381         /*
2382         ** There is a slight possibility of a failure mode
2383         ** in attach that will result in entering this function
2384         ** before interrupt resources have been initialized, and
2385         ** in that case we do not want to execute the loops below
2386         ** We can detect this reliably by the state of the adapter
2387         ** res pointer.
2388         */
2389         if (adapter->res == NULL)
2390                 goto mem;
2391
2392         /*
2393          * First release all the interrupt resources:
2394          */
2395         for (int i = 0; i < adapter->num_queues; i++, que++) {
2396                 rid = que->msix + 1;
2397                 if (que->tag != NULL) {
2398                         bus_teardown_intr(dev, que->res, que->tag);
2399                         que->tag = NULL;
2400                 }
2401                 if (que->res != NULL)
2402                         bus_release_resource(dev,
2403                             SYS_RES_IRQ, rid, que->res);
2404         }
2405
2406         /* Clean the Legacy or Link interrupt last */
2407         if (adapter->linkvec) /* we are doing MSIX */
2408                 rid = adapter->linkvec + 1;
2409         else
2410                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2411
2412         if (adapter->tag != NULL) {
2413                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2414                 adapter->tag = NULL;
2415         }
2416         if (adapter->res != NULL)
2417                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2418
2419 mem:
2420         if (adapter->msix)
2421                 pci_release_msi(dev);
2422
2423         if (adapter->msix_mem != NULL)
2424                 bus_release_resource(dev, SYS_RES_MEMORY,
2425                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2426
2427         if (adapter->pci_mem != NULL)
2428                 bus_release_resource(dev, SYS_RES_MEMORY,
2429                     PCIR_BAR(0), adapter->pci_mem);
2430
2431 }
2432
2433 /*
2434  * Setup Either MSI/X or MSI
2435  */
2436 static int
2437 igb_setup_msix(struct adapter *adapter)
2438 {
2439         device_t dev = adapter->dev;
2440         int rid, want, queues, msgs;
2441
2442         /* tuneable override */
2443         if (igb_enable_msix == 0)
2444                 goto msi;
2445
2446         /* First try MSI/X */
2447         rid = PCIR_BAR(IGB_MSIX_BAR);
2448         adapter->msix_mem = bus_alloc_resource_any(dev,
2449             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2450         if (!adapter->msix_mem) {
2451                 /* May not be enabled */
2452                 device_printf(adapter->dev,
2453                     "Unable to map MSIX table \n");
2454                 goto msi;
2455         }
2456
2457         msgs = pci_msix_count(dev); 
2458         if (msgs == 0) { /* system has msix disabled */
2459                 bus_release_resource(dev, SYS_RES_MEMORY,
2460                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2461                 adapter->msix_mem = NULL;
2462                 goto msi;
2463         }
2464
2465         /* Figure out a reasonable auto config value */
2466         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2467
2468         /* Manual override */
2469         if (igb_num_queues != 0)
2470                 queues = igb_num_queues;
2471
2472         /* Can have max of 4 queues on 82575 */
2473         if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2474                 queues = 4;
2475
2476         /* Limit the VF adapter to one queues */
2477         if ((adapter->hw.mac.type == e1000_vfadapt) && (queues > 2))
2478                 queues = 1;
2479
2480         /*
2481         ** One vector (RX/TX pair) per queue
2482         ** plus an additional for Link interrupt
2483         */
2484         want = queues + 1;
2485         if (msgs >= want)
2486                 msgs = want;
2487         else {
2488                 device_printf(adapter->dev,
2489                     "MSIX Configuration Problem, "
2490                     "%d vectors configured, but %d queues wanted!\n",
2491                     msgs, want);
2492                 return (ENXIO);
2493         }
2494         if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2495                 device_printf(adapter->dev,
2496                     "Using MSIX interrupts with %d vectors\n", msgs);
2497                 adapter->num_queues = queues;
2498                 return (msgs);
2499         }
2500 msi:
2501         msgs = pci_msi_count(dev);
2502         if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2503                 device_printf(adapter->dev,"Using MSI interrupt\n");
2504         return (msgs);
2505 }
2506
2507 /*********************************************************************
2508  *
2509  *  Set up an fresh starting state
2510  *
2511  **********************************************************************/
2512 static void
2513 igb_reset(struct adapter *adapter)
2514 {
2515         device_t        dev = adapter->dev;
2516         struct e1000_hw *hw = &adapter->hw;
2517         struct e1000_fc_info *fc = &hw->fc;
2518         struct ifnet    *ifp = adapter->ifp;
2519         u32             pba = 0;
2520         u16             hwm;
2521
2522         INIT_DEBUGOUT("igb_reset: begin");
2523
2524         /* Let the firmware know the OS is in control */
2525         igb_get_hw_control(adapter);
2526
2527         /*
2528          * Packet Buffer Allocation (PBA)
2529          * Writing PBA sets the receive portion of the buffer
2530          * the remainder is used for the transmit buffer.
2531          */
2532         switch (hw->mac.type) {
2533         case e1000_82575:
2534                 pba = E1000_PBA_32K;
2535                 break;
2536         case e1000_82576:
2537         case e1000_vfadapt:
2538                 pba = E1000_PBA_64K;
2539                 break;
2540         case e1000_82580:
2541                 pba = E1000_PBA_35K;
2542         default:
2543                 break;
2544         }
2545
2546         /* Special needs in case of Jumbo frames */
2547         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2548                 u32 tx_space, min_tx, min_rx;
2549                 pba = E1000_READ_REG(hw, E1000_PBA);
2550                 tx_space = pba >> 16;
2551                 pba &= 0xffff;
2552                 min_tx = (adapter->max_frame_size +
2553                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2554                 min_tx = roundup2(min_tx, 1024);
2555                 min_tx >>= 10;
2556                 min_rx = adapter->max_frame_size;
2557                 min_rx = roundup2(min_rx, 1024);
2558                 min_rx >>= 10;
2559                 if (tx_space < min_tx &&
2560                     ((min_tx - tx_space) < pba)) {
2561                         pba = pba - (min_tx - tx_space);
2562                         /*
2563                          * if short on rx space, rx wins
2564                          * and must trump tx adjustment
2565                          */
2566                         if (pba < min_rx)
2567                                 pba = min_rx;
2568                 }
2569                 E1000_WRITE_REG(hw, E1000_PBA, pba);
2570         }
2571
2572         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2573
2574         /*
2575          * These parameters control the automatic generation (Tx) and
2576          * response (Rx) to Ethernet PAUSE frames.
2577          * - High water mark should allow for at least two frames to be
2578          *   received after sending an XOFF.
2579          * - Low water mark works best when it is very near the high water mark.
2580          *   This allows the receiver to restart by sending XON when it has
2581          *   drained a bit.
2582          */
2583         hwm = min(((pba << 10) * 9 / 10),
2584             ((pba << 10) - 2 * adapter->max_frame_size));
2585
2586         if (hw->mac.type < e1000_82576) {
2587                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2588                 fc->low_water = fc->high_water - 8;
2589         } else {
2590                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2591                 fc->low_water = fc->high_water - 16;
2592         }
2593
2594         fc->pause_time = IGB_FC_PAUSE_TIME;
2595         fc->send_xon = TRUE;
2596
2597         /* Set Flow control, use the tunable location if sane */
2598         if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2599                 fc->requested_mode = igb_fc_setting;
2600         else
2601                 fc->requested_mode = e1000_fc_none;
2602
2603         fc->current_mode = fc->requested_mode;
2604
2605         /* Issue a global reset */
2606         e1000_reset_hw(hw);
2607         E1000_WRITE_REG(hw, E1000_WUC, 0);
2608
2609         if (e1000_init_hw(hw) < 0)
2610                 device_printf(dev, "Hardware Initialization Failed\n");
2611
2612         if (hw->mac.type == e1000_82580) {
2613                 u32 reg;
2614
2615                 hwm = (pba << 10) - (2 * adapter->max_frame_size);
2616                 /*
2617                  * 0x80000000 - enable DMA COAL
2618                  * 0x10000000 - use L0s as low power
2619                  * 0x20000000 - use L1 as low power
2620                  * X << 16 - exit dma coal when rx data exceeds X kB
2621                  * Y - upper limit to stay in dma coal in units of 32usecs
2622                  */
2623                 E1000_WRITE_REG(hw, E1000_DMACR,
2624                     0xA0000006 | ((hwm << 6) & 0x00FF0000));
2625
2626                 /* set hwm to PBA -  2 * max frame size */
2627                 E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2628                 /*
2629                  * This sets the time to wait before requesting transition to
2630                  * low power state to number of usecs needed to receive 1 512
2631                  * byte frame at gigabit line rate
2632                  */
2633                 E1000_WRITE_REG(hw, E1000_DMCTLX, 4);
2634
2635                 /* free space in tx packet buffer to wake from DMA coal */
2636                 E1000_WRITE_REG(hw, E1000_DMCTXTH,
2637                     (20480 - (2 * adapter->max_frame_size)) >> 6);
2638
2639                 /* make low power state decision controlled by DMA coal */
2640                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2641                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
2642                     reg | E1000_PCIEMISC_LX_DECISION);
2643         }
2644
2645         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2646         e1000_get_phy_info(hw);
2647         e1000_check_for_link(hw);
2648         return;
2649 }
2650
2651 /*********************************************************************
2652  *
2653  *  Setup networking device structure and register an interface.
2654  *
2655  **********************************************************************/
2656 static void
2657 igb_setup_interface(device_t dev, struct adapter *adapter)
2658 {
2659         struct ifnet   *ifp;
2660
2661         INIT_DEBUGOUT("igb_setup_interface: begin");
2662
2663         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2664         if (ifp == NULL)
2665                 panic("%s: can not if_alloc()", device_get_nameunit(dev));
2666         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2667         ifp->if_mtu = ETHERMTU;
2668         ifp->if_init =  igb_init;
2669         ifp->if_softc = adapter;
2670         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2671         ifp->if_ioctl = igb_ioctl;
2672         ifp->if_start = igb_start;
2673 #if __FreeBSD_version >= 800000
2674         ifp->if_transmit = igb_mq_start;
2675         ifp->if_qflush = igb_qflush;
2676 #endif
2677         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2678         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2679         IFQ_SET_READY(&ifp->if_snd);
2680
2681         ether_ifattach(ifp, adapter->hw.mac.addr);
2682
2683         ifp->if_capabilities = ifp->if_capenable = 0;
2684
2685         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2686         ifp->if_capabilities |= IFCAP_TSO4;
2687         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2688         if (igb_header_split)
2689                 ifp->if_capabilities |= IFCAP_LRO;
2690
2691         ifp->if_capenable = ifp->if_capabilities;
2692 #ifdef DEVICE_POLLING
2693         ifp->if_capabilities |= IFCAP_POLLING;
2694 #endif
2695
2696         /*
2697          * Tell the upper layer(s) we
2698          * support full VLAN capability.
2699          */
2700         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2701         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2702         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2703
2704         /*
2705         ** Dont turn this on by default, if vlans are
2706         ** created on another pseudo device (eg. lagg)
2707         ** then vlan events are not passed thru, breaking
2708         ** operation, but with HW FILTER off it works. If
2709         ** using vlans directly on the em driver you can
2710         ** enable this and get full hardware tag filtering.
2711         */
2712         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2713
2714         /*
2715          * Specify the media types supported by this adapter and register
2716          * callbacks to update media and link information
2717          */
2718         ifmedia_init(&adapter->media, IFM_IMASK,
2719             igb_media_change, igb_media_status);
2720         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2721             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2722                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
2723                             0, NULL);
2724                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2725         } else {
2726                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2727                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2728                             0, NULL);
2729                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2730                             0, NULL);
2731                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2732                             0, NULL);
2733                 if (adapter->hw.phy.type != e1000_phy_ife) {
2734                         ifmedia_add(&adapter->media,
2735                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2736                         ifmedia_add(&adapter->media,
2737                                 IFM_ETHER | IFM_1000_T, 0, NULL);
2738                 }
2739         }
2740         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2741         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2742 }
2743
2744
2745 /*
2746  * Manage DMA'able memory.
2747  */
2748 static void
2749 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2750 {
2751         if (error)
2752                 return;
2753         *(bus_addr_t *) arg = segs[0].ds_addr;
2754 }
2755
2756 static int
2757 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2758         struct igb_dma_alloc *dma, int mapflags)
2759 {
2760         int error;
2761
2762         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2763                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
2764                                 BUS_SPACE_MAXADDR,      /* lowaddr */
2765                                 BUS_SPACE_MAXADDR,      /* highaddr */
2766                                 NULL, NULL,             /* filter, filterarg */
2767                                 size,                   /* maxsize */
2768                                 1,                      /* nsegments */
2769                                 size,                   /* maxsegsize */
2770                                 0,                      /* flags */
2771                                 NULL,                   /* lockfunc */
2772                                 NULL,                   /* lockarg */
2773                                 &dma->dma_tag);
2774         if (error) {
2775                 device_printf(adapter->dev,
2776                     "%s: bus_dma_tag_create failed: %d\n",
2777                     __func__, error);
2778                 goto fail_0;
2779         }
2780
2781         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2782             BUS_DMA_NOWAIT, &dma->dma_map);
2783         if (error) {
2784                 device_printf(adapter->dev,
2785                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2786                     __func__, (uintmax_t)size, error);
2787                 goto fail_2;
2788         }
2789
2790         dma->dma_paddr = 0;
2791         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2792             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2793         if (error || dma->dma_paddr == 0) {
2794                 device_printf(adapter->dev,
2795                     "%s: bus_dmamap_load failed: %d\n",
2796                     __func__, error);
2797                 goto fail_3;
2798         }
2799
2800         return (0);
2801
2802 fail_3:
2803         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2804 fail_2:
2805         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2806         bus_dma_tag_destroy(dma->dma_tag);
2807 fail_0:
2808         dma->dma_map = NULL;
2809         dma->dma_tag = NULL;
2810
2811         return (error);
2812 }
2813
2814 static void
2815 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2816 {
2817         if (dma->dma_tag == NULL)
2818                 return;
2819         if (dma->dma_map != NULL) {
2820                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2821                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2822                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2823                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2824                 dma->dma_map = NULL;
2825         }
2826         bus_dma_tag_destroy(dma->dma_tag);
2827         dma->dma_tag = NULL;
2828 }
2829
2830
2831 /*********************************************************************
2832  *
2833  *  Allocate memory for the transmit and receive rings, and then
2834  *  the descriptors associated with each, called only once at attach.
2835  *
2836  **********************************************************************/
2837 static int
2838 igb_allocate_queues(struct adapter *adapter)
2839 {
2840         device_t dev = adapter->dev;
2841         struct igb_queue        *que = NULL;
2842         struct tx_ring          *txr = NULL;
2843         struct rx_ring          *rxr = NULL;
2844         int rsize, tsize, error = E1000_SUCCESS;
2845         int txconf = 0, rxconf = 0;
2846
2847         /* First allocate the top level queue structs */
2848         if (!(adapter->queues =
2849             (struct igb_queue *) malloc(sizeof(struct igb_queue) *
2850             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2851                 device_printf(dev, "Unable to allocate queue memory\n");
2852                 error = ENOMEM;
2853                 goto fail;
2854         }
2855
2856         /* Next allocate the TX ring struct memory */
2857         if (!(adapter->tx_rings =
2858             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2859             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2860                 device_printf(dev, "Unable to allocate TX ring memory\n");
2861                 error = ENOMEM;
2862                 goto tx_fail;
2863         }
2864
2865         /* Now allocate the RX */
2866         if (!(adapter->rx_rings =
2867             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2868             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2869                 device_printf(dev, "Unable to allocate RX ring memory\n");
2870                 error = ENOMEM;
2871                 goto rx_fail;
2872         }
2873
2874         tsize = roundup2(adapter->num_tx_desc *
2875             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2876         /*
2877          * Now set up the TX queues, txconf is needed to handle the
2878          * possibility that things fail midcourse and we need to
2879          * undo memory gracefully
2880          */ 
2881         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2882                 /* Set up some basics */
2883                 txr = &adapter->tx_rings[i];
2884                 txr->adapter = adapter;
2885                 txr->me = i;
2886
2887                 /* Initialize the TX lock */
2888                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2889                     device_get_nameunit(dev), txr->me);
2890                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2891
2892                 if (igb_dma_malloc(adapter, tsize,
2893                         &txr->txdma, BUS_DMA_NOWAIT)) {
2894                         device_printf(dev,
2895                             "Unable to allocate TX Descriptor memory\n");
2896                         error = ENOMEM;
2897                         goto err_tx_desc;
2898                 }
2899                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2900                 bzero((void *)txr->tx_base, tsize);
2901
2902                 /* Now allocate transmit buffers for the ring */
2903                 if (igb_allocate_transmit_buffers(txr)) {
2904                         device_printf(dev,
2905                             "Critical Failure setting up transmit buffers\n");
2906                         error = ENOMEM;
2907                         goto err_tx_desc;
2908                 }
2909 #if __FreeBSD_version >= 800000
2910                 /* Allocate a buf ring */
2911                 txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2912                     M_WAITOK, &txr->tx_mtx);
2913 #endif
2914         }
2915
2916         /*
2917          * Next the RX queues...
2918          */ 
2919         rsize = roundup2(adapter->num_rx_desc *
2920             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2921         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2922                 rxr = &adapter->rx_rings[i];
2923                 rxr->adapter = adapter;
2924                 rxr->me = i;
2925
2926                 /* Initialize the RX lock */
2927                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2928                     device_get_nameunit(dev), txr->me);
2929                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2930
2931                 if (igb_dma_malloc(adapter, rsize,
2932                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2933                         device_printf(dev,
2934                             "Unable to allocate RxDescriptor memory\n");
2935                         error = ENOMEM;
2936                         goto err_rx_desc;
2937                 }
2938                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2939                 bzero((void *)rxr->rx_base, rsize);
2940
2941                 /* Allocate receive buffers for the ring*/
2942                 if (igb_allocate_receive_buffers(rxr)) {
2943                         device_printf(dev,
2944                             "Critical Failure setting up receive buffers\n");
2945                         error = ENOMEM;
2946                         goto err_rx_desc;
2947                 }
2948         }
2949
2950         /*
2951         ** Finally set up the queue holding structs
2952         */
2953         for (int i = 0; i < adapter->num_queues; i++) {
2954                 que = &adapter->queues[i];
2955                 que->adapter = adapter;
2956                 que->txr = &adapter->tx_rings[i];
2957                 que->rxr = &adapter->rx_rings[i];
2958         }
2959
2960         return (0);
2961
2962 err_rx_desc:
2963         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2964                 igb_dma_free(adapter, &rxr->rxdma);
2965 err_tx_desc:
2966         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2967                 igb_dma_free(adapter, &txr->txdma);
2968         free(adapter->rx_rings, M_DEVBUF);
2969 rx_fail:
2970 #if __FreeBSD_version >= 800000
2971         buf_ring_free(txr->br, M_DEVBUF);
2972 #endif
2973         free(adapter->tx_rings, M_DEVBUF);
2974 tx_fail:
2975         free(adapter->queues, M_DEVBUF);
2976 fail:
2977         return (error);
2978 }
2979
2980 /*********************************************************************
2981  *
2982  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2983  *  the information needed to transmit a packet on the wire. This is
2984  *  called only once at attach, setup is done every reset.
2985  *
2986  **********************************************************************/
2987 static int
2988 igb_allocate_transmit_buffers(struct tx_ring *txr)
2989 {
2990         struct adapter *adapter = txr->adapter;
2991         device_t dev = adapter->dev;
2992         struct igb_tx_buffer *txbuf;
2993         int error, i;
2994
2995         /*
2996          * Setup DMA descriptor areas.
2997          */
2998         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
2999                                1, 0,                    /* alignment, bounds */
3000                                BUS_SPACE_MAXADDR,       /* lowaddr */
3001                                BUS_SPACE_MAXADDR,       /* highaddr */
3002                                NULL, NULL,              /* filter, filterarg */
3003                                IGB_TSO_SIZE,            /* maxsize */
3004                                IGB_MAX_SCATTER,         /* nsegments */
3005                                PAGE_SIZE,               /* maxsegsize */
3006                                0,                       /* flags */
3007                                NULL,                    /* lockfunc */
3008                                NULL,                    /* lockfuncarg */
3009                                &txr->txtag))) {
3010                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3011                 goto fail;
3012         }
3013
3014         if (!(txr->tx_buffers =
3015             (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3016             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3017                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3018                 error = ENOMEM;
3019                 goto fail;
3020         }
3021
3022         /* Create the descriptor buffer dma maps */
3023         txbuf = txr->tx_buffers;
3024         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3025                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3026                 if (error != 0) {
3027                         device_printf(dev, "Unable to create TX DMA map\n");
3028                         goto fail;
3029                 }
3030         }
3031
3032         return 0;
3033 fail:
3034         /* We free all, it handles case where we are in the middle */
3035         igb_free_transmit_structures(adapter);
3036         return (error);
3037 }
3038
3039 /*********************************************************************
3040  *
3041  *  Initialize a transmit ring.
3042  *
3043  **********************************************************************/
3044 static void
3045 igb_setup_transmit_ring(struct tx_ring *txr)
3046 {
3047         struct adapter *adapter = txr->adapter;
3048         struct igb_tx_buffer *txbuf;
3049         int i;
3050
3051         /* Clear the old descriptor contents */
3052         IGB_TX_LOCK(txr);
3053         bzero((void *)txr->tx_base,
3054               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3055         /* Reset indices */
3056         txr->next_avail_desc = 0;
3057         txr->next_to_clean = 0;
3058
3059         /* Free any existing tx buffers. */
3060         txbuf = txr->tx_buffers;
3061         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3062                 if (txbuf->m_head != NULL) {
3063                         bus_dmamap_sync(txr->txtag, txbuf->map,
3064                             BUS_DMASYNC_POSTWRITE);
3065                         bus_dmamap_unload(txr->txtag, txbuf->map);
3066                         m_freem(txbuf->m_head);
3067                         txbuf->m_head = NULL;
3068                 }
3069                 /* clear the watch index */
3070                 txbuf->next_eop = -1;
3071         }
3072
3073         /* Set number of descriptors available */
3074         txr->tx_avail = adapter->num_tx_desc;
3075
3076         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3077             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3078         IGB_TX_UNLOCK(txr);
3079 }
3080
3081 /*********************************************************************
3082  *
3083  *  Initialize all transmit rings.
3084  *
3085  **********************************************************************/
3086 static void
3087 igb_setup_transmit_structures(struct adapter *adapter)
3088 {
3089         struct tx_ring *txr = adapter->tx_rings;
3090
3091         for (int i = 0; i < adapter->num_queues; i++, txr++)
3092                 igb_setup_transmit_ring(txr);
3093
3094         return;
3095 }
3096
3097 /*********************************************************************
3098  *
3099  *  Enable transmit unit.
3100  *
3101  **********************************************************************/
3102 static void
3103 igb_initialize_transmit_units(struct adapter *adapter)
3104 {
3105         struct tx_ring  *txr = adapter->tx_rings;
3106         struct e1000_hw *hw = &adapter->hw;
3107         u32             tctl, txdctl;
3108
3109         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3110         tctl = txdctl = 0;
3111
3112         /* Setup the Tx Descriptor Rings */
3113         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3114                 u64 bus_addr = txr->txdma.dma_paddr;
3115
3116                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3117                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3118                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3119                     (uint32_t)(bus_addr >> 32));
3120                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3121                     (uint32_t)bus_addr);
3122
3123                 /* Setup the HW Tx Head and Tail descriptor pointers */
3124                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3125                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3126
3127                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3128                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3129                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3130
3131                 txr->watchdog_check = FALSE;
3132
3133                 txdctl |= IGB_TX_PTHRESH;
3134                 txdctl |= IGB_TX_HTHRESH << 8;
3135                 txdctl |= IGB_TX_WTHRESH << 16;
3136                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3137                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3138         }
3139
3140         if (adapter->hw.mac.type == e1000_vfadapt)
3141                 return;
3142
3143         /* Program the Transmit Control Register */
3144         tctl = E1000_READ_REG(hw, E1000_TCTL);
3145         tctl &= ~E1000_TCTL_CT;
3146         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3147                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3148
3149         e1000_config_collision_dist(hw);
3150
3151         /* This write will effectively turn on the transmit unit. */
3152         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3153 }
3154
3155 /*********************************************************************
3156  *
3157  *  Free all transmit rings.
3158  *
3159  **********************************************************************/
3160 static void
3161 igb_free_transmit_structures(struct adapter *adapter)
3162 {
3163         struct tx_ring *txr = adapter->tx_rings;
3164
3165         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3166                 IGB_TX_LOCK(txr);
3167                 igb_free_transmit_buffers(txr);
3168                 igb_dma_free(adapter, &txr->txdma);
3169                 IGB_TX_UNLOCK(txr);
3170                 IGB_TX_LOCK_DESTROY(txr);
3171         }
3172         free(adapter->tx_rings, M_DEVBUF);
3173 }
3174
3175 /*********************************************************************
3176  *
3177  *  Free transmit ring related data structures.
3178  *
3179  **********************************************************************/
3180 static void
3181 igb_free_transmit_buffers(struct tx_ring *txr)
3182 {
3183         struct adapter *adapter = txr->adapter;
3184         struct igb_tx_buffer *tx_buffer;
3185         int             i;
3186
3187         INIT_DEBUGOUT("free_transmit_ring: begin");
3188
3189         if (txr->tx_buffers == NULL)
3190                 return;
3191
3192         tx_buffer = txr->tx_buffers;
3193         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3194                 if (tx_buffer->m_head != NULL) {
3195                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3196                             BUS_DMASYNC_POSTWRITE);
3197                         bus_dmamap_unload(txr->txtag,
3198                             tx_buffer->map);
3199                         m_freem(tx_buffer->m_head);
3200                         tx_buffer->m_head = NULL;
3201                         if (tx_buffer->map != NULL) {
3202                                 bus_dmamap_destroy(txr->txtag,
3203                                     tx_buffer->map);
3204                                 tx_buffer->map = NULL;
3205                         }
3206                 } else if (tx_buffer->map != NULL) {
3207                         bus_dmamap_unload(txr->txtag,
3208                             tx_buffer->map);
3209                         bus_dmamap_destroy(txr->txtag,
3210                             tx_buffer->map);
3211                         tx_buffer->map = NULL;
3212                 }
3213         }
3214 #if __FreeBSD_version >= 800000
3215         if (txr->br != NULL)
3216                 buf_ring_free(txr->br, M_DEVBUF);
3217 #endif
3218         if (txr->tx_buffers != NULL) {
3219                 free(txr->tx_buffers, M_DEVBUF);
3220                 txr->tx_buffers = NULL;
3221         }
3222         if (txr->txtag != NULL) {
3223                 bus_dma_tag_destroy(txr->txtag);
3224                 txr->txtag = NULL;
3225         }
3226         return;
3227 }
3228
3229 /**********************************************************************
3230  *
3231  *  Setup work for hardware segmentation offload (TSO)
3232  *
3233  **********************************************************************/
3234 static boolean_t
3235 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3236 {
3237         struct adapter *adapter = txr->adapter;
3238         struct e1000_adv_tx_context_desc *TXD;
3239         struct igb_tx_buffer        *tx_buffer;
3240         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3241         u32 mss_l4len_idx = 0;
3242         u16 vtag = 0;
3243         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3244         struct ether_vlan_header *eh;
3245         struct ip *ip;
3246         struct tcphdr *th;
3247
3248
3249         /*
3250          * Determine where frame payload starts.
3251          * Jump over vlan headers if already present
3252          */
3253         eh = mtod(mp, struct ether_vlan_header *);
3254         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3255                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3256         else
3257                 ehdrlen = ETHER_HDR_LEN;
3258
3259         /* Ensure we have at least the IP+TCP header in the first mbuf. */
3260         if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3261                 return FALSE;
3262
3263         /* Only supports IPV4 for now */
3264         ctxd = txr->next_avail_desc;
3265         tx_buffer = &txr->tx_buffers[ctxd];
3266         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3267
3268         ip = (struct ip *)(mp->m_data + ehdrlen);
3269         if (ip->ip_p != IPPROTO_TCP)
3270                 return FALSE;   /* 0 */
3271         ip->ip_sum = 0;
3272         ip_hlen = ip->ip_hl << 2;
3273         th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3274         th->th_sum = in_pseudo(ip->ip_src.s_addr,
3275             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3276         tcp_hlen = th->th_off << 2;
3277         /*
3278          * Calculate header length, this is used
3279          * in the transmit desc in igb_xmit
3280          */
3281         *hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3282
3283         /* VLAN MACLEN IPLEN */
3284         if (mp->m_flags & M_VLANTAG) {
3285                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3286                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3287         }
3288
3289         vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3290         vlan_macip_lens |= ip_hlen;
3291         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3292
3293         /* ADV DTYPE TUCMD */
3294         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3295         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3296         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3297         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3298
3299         /* MSS L4LEN IDX */
3300         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3301         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3302         /* 82575 needs the queue index added */
3303         if (adapter->hw.mac.type == e1000_82575)
3304                 mss_l4len_idx |= txr->me << 4;
3305         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3306
3307         TXD->seqnum_seed = htole32(0);
3308         tx_buffer->m_head = NULL;
3309         tx_buffer->next_eop = -1;
3310
3311         if (++ctxd == adapter->num_tx_desc)
3312                 ctxd = 0;
3313
3314         txr->tx_avail--;
3315         txr->next_avail_desc = ctxd;
3316         return TRUE;
3317 }
3318
3319
3320 /*********************************************************************
3321  *
3322  *  Context Descriptor setup for VLAN or CSUM
3323  *
3324  **********************************************************************/
3325
3326 static bool
3327 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3328 {
3329         struct adapter *adapter = txr->adapter;
3330         struct e1000_adv_tx_context_desc *TXD;
3331         struct igb_tx_buffer        *tx_buffer;
3332         u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3333         struct ether_vlan_header *eh;
3334         struct ip *ip = NULL;
3335         struct ip6_hdr *ip6;
3336         int  ehdrlen, ctxd, ip_hlen = 0;
3337         u16     etype, vtag = 0;
3338         u8      ipproto = 0;
3339         bool    offload = TRUE;
3340
3341         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3342                 offload = FALSE;
3343
3344         vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3345         ctxd = txr->next_avail_desc;
3346         tx_buffer = &txr->tx_buffers[ctxd];
3347         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3348
3349         /*
3350         ** In advanced descriptors the vlan tag must 
3351         ** be placed into the context descriptor, thus
3352         ** we need to be here just for that setup.
3353         */
3354         if (mp->m_flags & M_VLANTAG) {
3355                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3356                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3357         } else if (offload == FALSE)
3358                 return FALSE;
3359
3360         /*
3361          * Determine where frame payload starts.
3362          * Jump over vlan headers if already present,
3363          * helpful for QinQ too.
3364          */
3365         eh = mtod(mp, struct ether_vlan_header *);
3366         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3367                 etype = ntohs(eh->evl_proto);
3368                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3369         } else {
3370                 etype = ntohs(eh->evl_encap_proto);
3371                 ehdrlen = ETHER_HDR_LEN;
3372         }
3373
3374         /* Set the ether header length */
3375         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3376
3377         switch (etype) {
3378                 case ETHERTYPE_IP:
3379                         ip = (struct ip *)(mp->m_data + ehdrlen);
3380                         ip_hlen = ip->ip_hl << 2;
3381                         if (mp->m_len < ehdrlen + ip_hlen) {
3382                                 offload = FALSE;
3383                                 break;
3384                         }
3385                         ipproto = ip->ip_p;
3386                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3387                         break;
3388                 case ETHERTYPE_IPV6:
3389                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3390                         ip_hlen = sizeof(struct ip6_hdr);
3391                         if (mp->m_len < ehdrlen + ip_hlen)
3392                                 return (FALSE);
3393                         ipproto = ip6->ip6_nxt;
3394                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3395                         break;
3396                 default:
3397                         offload = FALSE;
3398                         break;
3399         }
3400
3401         vlan_macip_lens |= ip_hlen;
3402         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3403
3404         switch (ipproto) {
3405                 case IPPROTO_TCP:
3406                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3407                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3408                         break;
3409                 case IPPROTO_UDP:
3410                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3411                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3412                         break;
3413 #if __FreeBSD_version >= 800000
3414                 case IPPROTO_SCTP:
3415                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3416                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3417                         break;
3418 #endif
3419                 default:
3420                         offload = FALSE;
3421                         break;
3422         }
3423
3424         /* 82575 needs the queue index added */
3425         if (adapter->hw.mac.type == e1000_82575)
3426                 mss_l4len_idx = txr->me << 4;
3427
3428         /* Now copy bits into descriptor */
3429         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3430         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3431         TXD->seqnum_seed = htole32(0);
3432         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3433
3434         tx_buffer->m_head = NULL;
3435         tx_buffer->next_eop = -1;
3436
3437         /* We've consumed the first desc, adjust counters */
3438         if (++ctxd == adapter->num_tx_desc)
3439                 ctxd = 0;
3440         txr->next_avail_desc = ctxd;
3441         --txr->tx_avail;
3442
3443         return (offload);
3444 }
3445
3446
3447 /**********************************************************************
3448  *
3449  *  Examine each tx_buffer in the used queue. If the hardware is done
3450  *  processing the packet then free associated resources. The
3451  *  tx_buffer is put back on the free queue.
3452  *
3453  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3454  **********************************************************************/
3455 static bool
3456 igb_txeof(struct tx_ring *txr)
3457 {
3458         struct adapter  *adapter = txr->adapter;
3459         int first, last, done;
3460         struct igb_tx_buffer *tx_buffer;
3461         struct e1000_tx_desc   *tx_desc, *eop_desc;
3462         struct ifnet   *ifp = adapter->ifp;
3463
3464         IGB_TX_LOCK_ASSERT(txr);
3465
3466         if (txr->tx_avail == adapter->num_tx_desc)
3467                 return FALSE;
3468
3469         first = txr->next_to_clean;
3470         tx_desc = &txr->tx_base[first];
3471         tx_buffer = &txr->tx_buffers[first];
3472         last = tx_buffer->next_eop;
3473         eop_desc = &txr->tx_base[last];
3474
3475         /*
3476          * What this does is get the index of the
3477          * first descriptor AFTER the EOP of the 
3478          * first packet, that way we can do the
3479          * simple comparison on the inner while loop.
3480          */
3481         if (++last == adapter->num_tx_desc)
3482                 last = 0;
3483         done = last;
3484
3485         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3486             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3487
3488         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3489                 /* We clean the range of the packet */
3490                 while (first != done) {
3491                         tx_desc->upper.data = 0;
3492                         tx_desc->lower.data = 0;
3493                         tx_desc->buffer_addr = 0;
3494                         ++txr->tx_avail;
3495
3496                         if (tx_buffer->m_head) {
3497                                 txr->bytes +=
3498                                     tx_buffer->m_head->m_pkthdr.len;
3499                                 bus_dmamap_sync(txr->txtag,
3500                                     tx_buffer->map,
3501                                     BUS_DMASYNC_POSTWRITE);
3502                                 bus_dmamap_unload(txr->txtag,
3503                                     tx_buffer->map);
3504
3505                                 m_freem(tx_buffer->m_head);
3506                                 tx_buffer->m_head = NULL;
3507                         }
3508                         tx_buffer->next_eop = -1;
3509                         txr->watchdog_time = ticks;
3510
3511                         if (++first == adapter->num_tx_desc)
3512                                 first = 0;
3513
3514                         tx_buffer = &txr->tx_buffers[first];
3515                         tx_desc = &txr->tx_base[first];
3516                 }
3517                 ++txr->packets;
3518                 ++ifp->if_opackets;
3519                 /* See if we can continue to the next packet */
3520                 last = tx_buffer->next_eop;
3521                 if (last != -1) {
3522                         eop_desc = &txr->tx_base[last];
3523                         /* Get new done point */
3524                         if (++last == adapter->num_tx_desc) last = 0;
3525                         done = last;
3526                 } else
3527                         break;
3528         }
3529         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3530             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3531
3532         txr->next_to_clean = first;
3533
3534         /*
3535          * If we have enough room, clear IFF_DRV_OACTIVE
3536          * to tell the stack that it is OK to send packets.
3537          */
3538         if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {                
3539                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3540                 /* All clean, turn off the watchdog */
3541                 if (txr->tx_avail == adapter->num_tx_desc) {
3542                         txr->watchdog_check = FALSE;
3543                         return (FALSE);
3544                 }
3545         }
3546
3547         return (TRUE);
3548 }
3549
3550
3551 /*********************************************************************
3552  *
3553  *  Refresh mbuf buffers for RX descriptor rings
3554  *   - now keeps its own state so discards due to resource
3555  *     exhaustion are unnecessary, if an mbuf cannot be obtained
3556  *     it just returns, keeping its placeholder, thus it can simply
3557  *     be recalled to try again.
3558  *
3559  **********************************************************************/
3560 static void
3561 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3562 {
3563         struct adapter          *adapter = rxr->adapter;
3564         bus_dma_segment_t       hseg[1];
3565         bus_dma_segment_t       pseg[1];
3566         struct igb_rx_buf       *rxbuf;
3567         struct mbuf             *mh, *mp;
3568         int                     i, nsegs, error, cleaned;
3569
3570         i = rxr->next_to_refresh;
3571         cleaned = -1; /* Signify no completions */
3572         while (i != limit) {
3573                 rxbuf = &rxr->rx_buffers[i];
3574                 if ((rxbuf->m_head == NULL) && (rxr->hdr_split)) {
3575                         mh = m_gethdr(M_DONTWAIT, MT_DATA);
3576                         if (mh == NULL)
3577                                 goto update;
3578                         mh->m_pkthdr.len = mh->m_len = MHLEN;
3579                         mh->m_len = MHLEN;
3580                         mh->m_flags |= M_PKTHDR;
3581                         m_adj(mh, ETHER_ALIGN);
3582                         /* Get the memory mapping */
3583                         error = bus_dmamap_load_mbuf_sg(rxr->htag,
3584                             rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3585                         if (error != 0) {
3586                                 printf("GET BUF: dmamap load"
3587                                     " failure - %d\n", error);
3588                                 m_free(mh);
3589                                 goto update;
3590                         }
3591                         rxbuf->m_head = mh;
3592                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3593                             BUS_DMASYNC_PREREAD);
3594                         rxr->rx_base[i].read.hdr_addr =
3595                             htole64(hseg[0].ds_addr);
3596                 }
3597
3598                 if (rxbuf->m_pack == NULL) {
3599                         mp = m_getjcl(M_DONTWAIT, MT_DATA,
3600                             M_PKTHDR, adapter->rx_mbuf_sz);
3601                         if (mp == NULL)
3602                                 goto update;
3603                         mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3604                         /* Get the memory mapping */
3605                         error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3606                             rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3607                         if (error != 0) {
3608                                 printf("GET BUF: dmamap load"
3609                                     " failure - %d\n", error);
3610                                 m_free(mp);
3611                                 goto update;
3612                         }
3613                         rxbuf->m_pack = mp;
3614                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3615                             BUS_DMASYNC_PREREAD);
3616                         rxr->rx_base[i].read.pkt_addr =
3617                             htole64(pseg[0].ds_addr);
3618                 }
3619
3620                 cleaned = i;
3621                 /* Calculate next index */
3622                 if (++i == adapter->num_rx_desc)
3623                         i = 0;
3624                 /* This is the work marker for refresh */
3625                 rxr->next_to_refresh = i;
3626         }
3627 update:
3628         if (cleaned != -1) /* If we refreshed some, bump tail */
3629                 E1000_WRITE_REG(&adapter->hw,
3630                     E1000_RDT(rxr->me), cleaned);
3631         return;
3632 }
3633
3634
3635 /*********************************************************************
3636  *
3637  *  Allocate memory for rx_buffer structures. Since we use one
3638  *  rx_buffer per received packet, the maximum number of rx_buffer's
3639  *  that we'll need is equal to the number of receive descriptors
3640  *  that we've allocated.
3641  *
3642  **********************************************************************/
3643 static int
3644 igb_allocate_receive_buffers(struct rx_ring *rxr)
3645 {
3646         struct  adapter         *adapter = rxr->adapter;
3647         device_t                dev = adapter->dev;
3648         struct igb_rx_buf       *rxbuf;
3649         int                     i, bsize, error;
3650
3651         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3652         if (!(rxr->rx_buffers =
3653             (struct igb_rx_buf *) malloc(bsize,
3654             M_DEVBUF, M_NOWAIT | M_ZERO))) {
3655                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3656                 error = ENOMEM;
3657                 goto fail;
3658         }
3659
3660         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3661                                    1, 0,                /* alignment, bounds */
3662                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3663                                    BUS_SPACE_MAXADDR,   /* highaddr */
3664                                    NULL, NULL,          /* filter, filterarg */
3665                                    MSIZE,               /* maxsize */
3666                                    1,                   /* nsegments */
3667                                    MSIZE,               /* maxsegsize */
3668                                    0,                   /* flags */
3669                                    NULL,                /* lockfunc */
3670                                    NULL,                /* lockfuncarg */
3671                                    &rxr->htag))) {
3672                 device_printf(dev, "Unable to create RX DMA tag\n");
3673                 goto fail;
3674         }
3675
3676         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3677                                    1, 0,                /* alignment, bounds */
3678                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3679                                    BUS_SPACE_MAXADDR,   /* highaddr */
3680                                    NULL, NULL,          /* filter, filterarg */
3681                                    MJUMPAGESIZE,        /* maxsize */
3682                                    1,                   /* nsegments */
3683                                    MJUMPAGESIZE,        /* maxsegsize */
3684                                    0,                   /* flags */
3685                                    NULL,                /* lockfunc */
3686                                    NULL,                /* lockfuncarg */
3687                                    &rxr->ptag))) {
3688                 device_printf(dev, "Unable to create RX payload DMA tag\n");
3689                 goto fail;
3690         }
3691
3692         for (i = 0; i < adapter->num_rx_desc; i++) {
3693                 rxbuf = &rxr->rx_buffers[i];
3694                 error = bus_dmamap_create(rxr->htag,
3695                     BUS_DMA_NOWAIT, &rxbuf->hmap);
3696                 if (error) {
3697                         device_printf(dev,
3698                             "Unable to create RX head DMA maps\n");
3699                         goto fail;
3700                 }
3701                 error = bus_dmamap_create(rxr->ptag,
3702                     BUS_DMA_NOWAIT, &rxbuf->pmap);
3703                 if (error) {
3704                         device_printf(dev,
3705                             "Unable to create RX packet DMA maps\n");
3706                         goto fail;
3707                 }
3708         }
3709
3710         return (0);
3711
3712 fail:
3713         /* Frees all, but can handle partial completion */
3714         igb_free_receive_structures(adapter);
3715         return (error);
3716 }
3717
3718
3719 static void
3720 igb_free_receive_ring(struct rx_ring *rxr)
3721 {
3722         struct  adapter         *adapter;
3723         struct igb_rx_buf       *rxbuf;
3724         int i;
3725
3726         adapter = rxr->adapter;
3727         for (i = 0; i < adapter->num_rx_desc; i++) {
3728                 rxbuf = &rxr->rx_buffers[i];
3729                 if (rxbuf->m_head != NULL) {
3730                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3731                             BUS_DMASYNC_POSTREAD);
3732                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3733                         rxbuf->m_head->m_flags |= M_PKTHDR;
3734                         m_freem(rxbuf->m_head);
3735                 }
3736                 if (rxbuf->m_pack != NULL) {
3737                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3738                             BUS_DMASYNC_POSTREAD);
3739                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3740                         rxbuf->m_pack->m_flags |= M_PKTHDR;
3741                         m_freem(rxbuf->m_pack);
3742                 }
3743                 rxbuf->m_head = NULL;
3744                 rxbuf->m_pack = NULL;
3745         }
3746 }
3747
3748
3749 /*********************************************************************
3750  *
3751  *  Initialize a receive ring and its buffers.
3752  *
3753  **********************************************************************/
3754 static int
3755 igb_setup_receive_ring(struct rx_ring *rxr)
3756 {
3757         struct  adapter         *adapter;
3758         struct  ifnet           *ifp;
3759         device_t                dev;
3760         struct igb_rx_buf       *rxbuf;
3761         bus_dma_segment_t       pseg[1], hseg[1];
3762         struct lro_ctrl         *lro = &rxr->lro;
3763         int                     rsize, nsegs, error = 0;
3764
3765         adapter = rxr->adapter;
3766         dev = adapter->dev;
3767         ifp = adapter->ifp;
3768
3769         /* Clear the ring contents */
3770         IGB_RX_LOCK(rxr);
3771         rsize = roundup2(adapter->num_rx_desc *
3772             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3773         bzero((void *)rxr->rx_base, rsize);
3774
3775         /*
3776         ** Free current RX buffer structures and their mbufs
3777         */
3778         igb_free_receive_ring(rxr);
3779
3780         /* Configure for header split? */
3781         if (igb_header_split)
3782                 rxr->hdr_split = TRUE;
3783
3784         /* Now replenish the ring mbufs */
3785         for (int j = 0; j != adapter->num_rx_desc; ++j) {
3786                 struct mbuf     *mh, *mp;
3787
3788                 rxbuf = &rxr->rx_buffers[j];
3789                 if (rxr->hdr_split == FALSE)
3790                         goto skip_head;
3791
3792                 /* First the header */
3793                 rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
3794                 if (rxbuf->m_head == NULL) {
3795                         error = ENOBUFS;
3796                         goto fail;
3797                 }
3798                 m_adj(rxbuf->m_head, ETHER_ALIGN);
3799                 mh = rxbuf->m_head;
3800                 mh->m_len = mh->m_pkthdr.len = MHLEN;
3801                 mh->m_flags |= M_PKTHDR;
3802                 /* Get the memory mapping */
3803                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
3804                     rxbuf->hmap, rxbuf->m_head, hseg,
3805                     &nsegs, BUS_DMA_NOWAIT);
3806                 if (error != 0) /* Nothing elegant to do here */
3807                         goto fail;
3808                 bus_dmamap_sync(rxr->htag,
3809                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
3810                 /* Update descriptor */
3811                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
3812
3813 skip_head:
3814                 /* Now the payload cluster */
3815                 rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
3816                     M_PKTHDR, adapter->rx_mbuf_sz);
3817                 if (rxbuf->m_pack == NULL) {
3818                         error = ENOBUFS;
3819                         goto fail;
3820                 }
3821                 mp = rxbuf->m_pack;
3822                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3823                 /* Get the memory mapping */
3824                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3825                     rxbuf->pmap, mp, pseg,
3826                     &nsegs, BUS_DMA_NOWAIT);
3827                 if (error != 0)
3828                         goto fail;
3829                 bus_dmamap_sync(rxr->ptag,
3830                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
3831                 /* Update descriptor */
3832                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
3833         }
3834
3835         /* Setup our descriptor indices */
3836         rxr->next_to_check = 0;
3837         rxr->next_to_refresh = 0;
3838         rxr->lro_enabled = FALSE;
3839         rxr->rx_split_packets = 0;
3840         rxr->rx_bytes = 0;
3841
3842         rxr->fmp = NULL;
3843         rxr->lmp = NULL;
3844         rxr->discard = FALSE;
3845
3846         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3847             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3848
3849         /*
3850         ** Now set up the LRO interface, we
3851         ** also only do head split when LRO
3852         ** is enabled, since so often they
3853         ** are undesireable in similar setups.
3854         */
3855         if (ifp->if_capenable & IFCAP_LRO) {
3856                 int err = tcp_lro_init(lro);
3857                 if (err) {
3858                         device_printf(dev, "LRO Initialization failed!\n");
3859                         goto fail;
3860                 }
3861                 INIT_DEBUGOUT("RX LRO Initialized\n");
3862                 rxr->lro_enabled = TRUE;
3863                 lro->ifp = adapter->ifp;
3864         }
3865
3866         IGB_RX_UNLOCK(rxr);
3867         return (0);
3868
3869 fail:
3870         igb_free_receive_ring(rxr);
3871         IGB_RX_UNLOCK(rxr);
3872         return (error);
3873 }
3874
3875 /*********************************************************************
3876  *
3877  *  Initialize all receive rings.
3878  *
3879  **********************************************************************/
3880 static int
3881 igb_setup_receive_structures(struct adapter *adapter)
3882 {
3883         struct rx_ring *rxr = adapter->rx_rings;
3884         int i;
3885
3886         for (i = 0; i < adapter->num_queues; i++, rxr++)
3887                 if (igb_setup_receive_ring(rxr))
3888                         goto fail;
3889
3890         return (0);
3891 fail:
3892         /*
3893          * Free RX buffers allocated so far, we will only handle
3894          * the rings that completed, the failing case will have
3895          * cleaned up for itself. 'i' is the endpoint.
3896          */
3897         for (int j = 0; j > i; ++j) {
3898                 rxr = &adapter->rx_rings[i];
3899                 igb_free_receive_ring(rxr);
3900         }
3901
3902         return (ENOBUFS);
3903 }
3904
3905 /*********************************************************************
3906  *
3907  *  Enable receive unit.
3908  *
3909  **********************************************************************/
3910 static void
3911 igb_initialize_receive_units(struct adapter *adapter)
3912 {
3913         struct rx_ring  *rxr = adapter->rx_rings;
3914         struct ifnet    *ifp = adapter->ifp;
3915         struct e1000_hw *hw = &adapter->hw;
3916         u32             rctl, rxcsum, psize, srrctl = 0;
3917
3918         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3919
3920         /*
3921          * Make sure receives are disabled while setting
3922          * up the descriptor ring
3923          */
3924         rctl = E1000_READ_REG(hw, E1000_RCTL);
3925         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3926
3927         /*
3928         ** Set up for header split
3929         */
3930         if (rxr->hdr_split) {
3931                 /* Use a standard mbuf for the header */
3932                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3933                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3934         } else
3935                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3936
3937         /*
3938         ** Set up for jumbo frames
3939         */
3940         if (ifp->if_mtu > ETHERMTU) {
3941                 rctl |= E1000_RCTL_LPE;
3942                 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3943                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3944
3945                 /* Set maximum packet len */
3946                 psize = adapter->max_frame_size;
3947                 /* are we on a vlan? */
3948                 if (adapter->ifp->if_vlantrunk != NULL)
3949                         psize += VLAN_TAG_SIZE;
3950                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3951         } else {
3952                 rctl &= ~E1000_RCTL_LPE;
3953                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3954                 rctl |= E1000_RCTL_SZ_2048;
3955         }
3956
3957         /* Setup the Base and Length of the Rx Descriptor Rings */
3958         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3959                 u64 bus_addr = rxr->rxdma.dma_paddr;
3960                 u32 rxdctl;
3961
3962                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
3963                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3964                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
3965                     (uint32_t)(bus_addr >> 32));
3966                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
3967                     (uint32_t)bus_addr);
3968                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
3969                 /* Enable this Queue */
3970                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
3971                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3972                 rxdctl &= 0xFFF00000;
3973                 rxdctl |= IGB_RX_PTHRESH;
3974                 rxdctl |= IGB_RX_HTHRESH << 8;
3975                 rxdctl |= IGB_RX_WTHRESH << 16;
3976                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
3977         }
3978
3979         /*
3980         ** Setup for RX MultiQueue
3981         */
3982         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
3983         if (adapter->num_queues >1) {
3984                 u32 random[10], mrqc, shift = 0;
3985                 union igb_reta {
3986                         u32 dword;
3987                         u8  bytes[4];
3988                 } reta;
3989
3990                 arc4rand(&random, sizeof(random), 0);
3991                 if (adapter->hw.mac.type == e1000_82575)
3992                         shift = 6;
3993                 /* Warning FM follows */
3994                 for (int i = 0; i < 128; i++) {
3995                         reta.bytes[i & 3] =
3996                             (i % adapter->num_queues) << shift;
3997                         if ((i & 3) == 3)
3998                                 E1000_WRITE_REG(hw,
3999                                     E1000_RETA(i >> 2), reta.dword);
4000                 }
4001                 /* Now fill in hash table */
4002                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4003                 for (int i = 0; i < 10; i++)
4004                         E1000_WRITE_REG_ARRAY(hw,
4005                             E1000_RSSRK(0), i, random[i]);
4006
4007                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4008                     E1000_MRQC_RSS_FIELD_IPV4_TCP);
4009                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4010                     E1000_MRQC_RSS_FIELD_IPV6_TCP);
4011                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4012                     E1000_MRQC_RSS_FIELD_IPV6_UDP);
4013                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4014                     E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4015
4016                 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4017
4018                 /*
4019                 ** NOTE: Receive Full-Packet Checksum Offload 
4020                 ** is mutually exclusive with Multiqueue. However
4021                 ** this is not the same as TCP/IP checksums which
4022                 ** still work.
4023                 */
4024                 rxcsum |= E1000_RXCSUM_PCSD;
4025 #if __FreeBSD_version >= 800000
4026                 /* For SCTP Offload */
4027                 if ((hw->mac.type == e1000_82576)
4028                     && (ifp->if_capenable & IFCAP_RXCSUM))
4029                         rxcsum |= E1000_RXCSUM_CRCOFL;
4030 #endif
4031         } else {
4032                 /* Non RSS setup */
4033                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4034                         rxcsum |= E1000_RXCSUM_IPPCSE;
4035 #if __FreeBSD_version >= 800000
4036                         if (adapter->hw.mac.type == e1000_82576)
4037                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4038 #endif
4039                 } else
4040                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4041         }
4042         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4043
4044         /* Setup the Receive Control Register */
4045         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4046         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4047                    E1000_RCTL_RDMTS_HALF |
4048                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4049         /* Strip CRC bytes. */
4050         rctl |= E1000_RCTL_SECRC;
4051         /* Make sure VLAN Filters are off */
4052         rctl &= ~E1000_RCTL_VFE;
4053         /* Don't store bad packets */
4054         rctl &= ~E1000_RCTL_SBP;
4055
4056         /* Enable Receives */
4057         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4058
4059         /*
4060          * Setup the HW Rx Head and Tail Descriptor Pointers
4061          *   - needs to be after enable
4062          */
4063         for (int i = 0; i < adapter->num_queues; i++) {
4064                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4065                 E1000_WRITE_REG(hw, E1000_RDT(i),
4066                      adapter->num_rx_desc - 1);
4067         }
4068         return;
4069 }
4070
4071 /*********************************************************************
4072  *
4073  *  Free receive rings.
4074  *
4075  **********************************************************************/
4076 static void
4077 igb_free_receive_structures(struct adapter *adapter)
4078 {
4079         struct rx_ring *rxr = adapter->rx_rings;
4080
4081         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4082                 struct lro_ctrl *lro = &rxr->lro;
4083                 igb_free_receive_buffers(rxr);
4084                 tcp_lro_free(lro);
4085                 igb_dma_free(adapter, &rxr->rxdma);
4086         }
4087
4088         free(adapter->rx_rings, M_DEVBUF);
4089 }
4090
4091 /*********************************************************************
4092  *
4093  *  Free receive ring data structures.
4094  *
4095  **********************************************************************/
4096 static void
4097 igb_free_receive_buffers(struct rx_ring *rxr)
4098 {
4099         struct adapter          *adapter = rxr->adapter;
4100         struct igb_rx_buf       *rxbuf;
4101         int i;
4102
4103         INIT_DEBUGOUT("free_receive_structures: begin");
4104
4105         /* Cleanup any existing buffers */
4106         if (rxr->rx_buffers != NULL) {
4107                 for (i = 0; i < adapter->num_rx_desc; i++) {
4108                         rxbuf = &rxr->rx_buffers[i];
4109                         if (rxbuf->m_head != NULL) {
4110                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4111                                     BUS_DMASYNC_POSTREAD);
4112                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4113                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4114                                 m_freem(rxbuf->m_head);
4115                         }
4116                         if (rxbuf->m_pack != NULL) {
4117                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4118                                     BUS_DMASYNC_POSTREAD);
4119                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4120                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4121                                 m_freem(rxbuf->m_pack);
4122                         }
4123                         rxbuf->m_head = NULL;
4124                         rxbuf->m_pack = NULL;
4125                         if (rxbuf->hmap != NULL) {
4126                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4127                                 rxbuf->hmap = NULL;
4128                         }
4129                         if (rxbuf->pmap != NULL) {
4130                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4131                                 rxbuf->pmap = NULL;
4132                         }
4133                 }
4134                 if (rxr->rx_buffers != NULL) {
4135                         free(rxr->rx_buffers, M_DEVBUF);
4136                         rxr->rx_buffers = NULL;
4137                 }
4138         }
4139
4140         if (rxr->htag != NULL) {
4141                 bus_dma_tag_destroy(rxr->htag);
4142                 rxr->htag = NULL;
4143         }
4144         if (rxr->ptag != NULL) {
4145                 bus_dma_tag_destroy(rxr->ptag);
4146                 rxr->ptag = NULL;
4147         }
4148 }
4149
4150 static __inline void
4151 igb_rx_discard(struct rx_ring *rxr, int i)
4152 {
4153         struct adapter          *adapter = rxr->adapter;
4154         struct igb_rx_buf       *rbuf;
4155         struct mbuf             *mh, *mp;
4156
4157         rbuf = &rxr->rx_buffers[i];
4158         if (rxr->fmp != NULL) {
4159                 rxr->fmp->m_flags |= M_PKTHDR;
4160                 m_freem(rxr->fmp);
4161                 rxr->fmp = NULL;
4162                 rxr->lmp = NULL;
4163         }
4164
4165         mh = rbuf->m_head;
4166         mp = rbuf->m_pack;
4167
4168         /* Reuse loaded DMA map and just update mbuf chain */
4169         mh->m_len = MHLEN;
4170         mh->m_flags |= M_PKTHDR;
4171         mh->m_next = NULL;
4172
4173         mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz;
4174         mp->m_data = mp->m_ext.ext_buf;
4175         mp->m_next = NULL;
4176         return;
4177 }
4178
4179 static __inline void
4180 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4181 {
4182
4183         /*
4184          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4185          * should be computed by hardware. Also it should not have VLAN tag in
4186          * ethernet header.
4187          */
4188         if (rxr->lro_enabled &&
4189             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4190             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4191             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4192             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4193             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4194             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4195                 /*
4196                  * Send to the stack if:
4197                  **  - LRO not enabled, or
4198                  **  - no LRO resources, or
4199                  **  - lro enqueue fails
4200                  */
4201                 if (rxr->lro.lro_cnt != 0)
4202                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4203                                 return;
4204         }
4205         IGB_RX_UNLOCK(rxr);
4206         (*ifp->if_input)(ifp, m);
4207         IGB_RX_LOCK(rxr);
4208 }
4209
4210 /*********************************************************************
4211  *
4212  *  This routine executes in interrupt context. It replenishes
4213  *  the mbufs in the descriptor and sends data which has been
4214  *  dma'ed into host memory to upper layer.
4215  *
4216  *  We loop at most count times if count is > 0, or until done if
4217  *  count < 0.
4218  *
4219  *  Return TRUE if more to clean, FALSE otherwise
4220  *********************************************************************/
4221 static bool
4222 igb_rxeof(struct igb_queue *que, int count, int *done)
4223 {
4224         struct adapter          *adapter = que->adapter;
4225         struct rx_ring          *rxr = que->rxr;
4226         struct ifnet            *ifp = adapter->ifp;
4227         struct lro_ctrl         *lro = &rxr->lro;
4228         struct lro_entry        *queued;
4229         int                     i, processed = 0, rxdone = 0;
4230         u32                     ptype, staterr = 0;
4231         union e1000_adv_rx_desc *cur;
4232
4233         IGB_RX_LOCK(rxr);
4234         /* Sync the ring. */
4235         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4236             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4237
4238         /* Main clean loop */
4239         for (i = rxr->next_to_check; count != 0;) {
4240                 struct mbuf             *sendmp, *mh, *mp;
4241                 struct igb_rx_buf       *rxbuf;
4242                 u16                     hlen, plen, hdr, vtag;
4243                 bool                    eop = FALSE;
4244  
4245                 cur = &rxr->rx_base[i];
4246                 staterr = le32toh(cur->wb.upper.status_error);
4247                 if ((staterr & E1000_RXD_STAT_DD) == 0)
4248                         break;
4249                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4250                         break;
4251                 count--;
4252                 sendmp = mh = mp = NULL;
4253                 cur->wb.upper.status_error = 0;
4254                 rxbuf = &rxr->rx_buffers[i];
4255                 plen = le16toh(cur->wb.upper.length);
4256                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4257                 vtag = le16toh(cur->wb.upper.vlan);
4258                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4259                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4260
4261                 /* Make sure all segments of a bad packet are discarded */
4262                 if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4263                     (rxr->discard)) {
4264                         ifp->if_ierrors++;
4265                         ++rxr->rx_discarded;
4266                         if (!eop) /* Catch subsequent segs */
4267                                 rxr->discard = TRUE;
4268                         else
4269                                 rxr->discard = FALSE;
4270                         igb_rx_discard(rxr, i);
4271                         goto next_desc;
4272                 }
4273
4274                 /*
4275                 ** The way the hardware is configured to
4276                 ** split, it will ONLY use the header buffer
4277                 ** when header split is enabled, otherwise we
4278                 ** get normal behavior, ie, both header and
4279                 ** payload are DMA'd into the payload buffer.
4280                 **
4281                 ** The fmp test is to catch the case where a
4282                 ** packet spans multiple descriptors, in that
4283                 ** case only the first header is valid.
4284                 */
4285                 if (rxr->hdr_split && rxr->fmp == NULL) {
4286                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4287                             E1000_RXDADV_HDRBUFLEN_SHIFT;
4288                         if (hlen > IGB_HDR_BUF)
4289                                 hlen = IGB_HDR_BUF;
4290                         /* Handle the header mbuf */
4291                         mh = rxr->rx_buffers[i].m_head;
4292                         mh->m_len = hlen;
4293                         /* clear buf info for refresh */
4294                         rxbuf->m_head = NULL;
4295                         /*
4296                         ** Get the payload length, this
4297                         ** could be zero if its a small
4298                         ** packet.
4299                         */
4300                         if (plen > 0) {
4301                                 mp = rxr->rx_buffers[i].m_pack;
4302                                 mp->m_len = plen;
4303                                 mh->m_next = mp;
4304                                 /* clear buf info for refresh */
4305                                 rxbuf->m_pack = NULL;
4306                                 rxr->rx_split_packets++;
4307                         }
4308                 } else {
4309                         /*
4310                         ** Either no header split, or a
4311                         ** secondary piece of a fragmented
4312                         ** split packet.
4313                         */
4314                         mh = rxr->rx_buffers[i].m_pack;
4315                         mh->m_len = plen;
4316                         /* clear buf info for refresh */
4317                         rxbuf->m_pack = NULL;
4318                 }
4319
4320                 ++processed; /* So we know when to refresh */
4321
4322                 /* Initial frame - setup */
4323                 if (rxr->fmp == NULL) {
4324                         mh->m_pkthdr.len = mh->m_len;
4325                         /* Store the first mbuf */
4326                         rxr->fmp = mh;
4327                         rxr->lmp = mh;
4328                         if (mp != NULL) {
4329                                 /* Add payload if split */
4330                                 mh->m_pkthdr.len += mp->m_len;
4331                                 rxr->lmp = mh->m_next;
4332                         }
4333                 } else {
4334                         /* Chain mbuf's together */
4335                         rxr->lmp->m_next = mh;
4336                         rxr->lmp = rxr->lmp->m_next;
4337                         rxr->fmp->m_pkthdr.len += mh->m_len;
4338                 }
4339
4340                 if (eop) {
4341                         rxr->fmp->m_pkthdr.rcvif = ifp;
4342                         ifp->if_ipackets++;
4343                         rxr->rx_packets++;
4344                         /* capture data for AIM */
4345                         rxr->packets++;
4346                         rxr->bytes += rxr->fmp->m_pkthdr.len;
4347                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4348
4349                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4350                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
4351
4352                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4353                             (staterr & E1000_RXD_STAT_VP) != 0) {
4354                                 rxr->fmp->m_pkthdr.ether_vtag = vtag;
4355                                 rxr->fmp->m_flags |= M_VLANTAG;
4356                         }
4357 #if __FreeBSD_version >= 800000
4358                         rxr->fmp->m_pkthdr.flowid = que->msix;
4359                         rxr->fmp->m_flags |= M_FLOWID;
4360 #endif
4361                         sendmp = rxr->fmp;
4362                         /* Make sure to set M_PKTHDR. */
4363                         sendmp->m_flags |= M_PKTHDR;
4364                         rxr->fmp = NULL;
4365                         rxr->lmp = NULL;
4366                 }
4367
4368 next_desc:
4369                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4370                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4371
4372                 /* Advance our pointers to the next descriptor. */
4373                 if (++i == adapter->num_rx_desc)
4374                         i = 0;
4375                 /*
4376                 ** Send to the stack or LRO
4377                 */
4378                 if (sendmp != NULL) {
4379                         rxr->next_to_check = i;
4380                         igb_rx_input(rxr, ifp, sendmp, ptype);
4381                         i = rxr->next_to_check;
4382                         rxdone++;
4383                 }
4384
4385                 /* Every 8 descriptors we go to refresh mbufs */
4386                 if (processed == 8) {
4387                         igb_refresh_mbufs(rxr, i);
4388                         processed = 0;
4389                 }
4390         }
4391
4392         /* Catch any remainders */
4393         if (processed != 0) {
4394                 igb_refresh_mbufs(rxr, i);
4395                 processed = 0;
4396         }
4397
4398         rxr->next_to_check = i;
4399
4400         /*
4401          * Flush any outstanding LRO work
4402          */
4403         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4404                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4405                 tcp_lro_flush(lro, queued);
4406         }
4407
4408         IGB_RX_UNLOCK(rxr);
4409
4410         if (done != NULL)
4411                 *done = rxdone;
4412
4413         /*
4414         ** We still have cleaning to do?
4415         ** Schedule another interrupt if so.
4416         */
4417         if ((staterr & E1000_RXD_STAT_DD) != 0)
4418                 return (TRUE);
4419
4420         return (FALSE);
4421 }
4422
4423 /*********************************************************************
4424  *
4425  *  Verify that the hardware indicated that the checksum is valid.
4426  *  Inform the stack about the status of checksum so that stack
4427  *  doesn't spend time verifying the checksum.
4428  *
4429  *********************************************************************/
4430 static void
4431 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4432 {
4433         u16 status = (u16)staterr;
4434         u8  errors = (u8) (staterr >> 24);
4435         int sctp;
4436
4437         /* Ignore Checksum bit is set */
4438         if (status & E1000_RXD_STAT_IXSM) {
4439                 mp->m_pkthdr.csum_flags = 0;
4440                 return;
4441         }
4442
4443         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4444             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4445                 sctp = 1;
4446         else
4447                 sctp = 0;
4448         if (status & E1000_RXD_STAT_IPCS) {
4449                 /* Did it pass? */
4450                 if (!(errors & E1000_RXD_ERR_IPE)) {
4451                         /* IP Checksum Good */
4452                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4453                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4454                 } else
4455                         mp->m_pkthdr.csum_flags = 0;
4456         }
4457
4458         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4459                 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4460 #if __FreeBSD_version >= 800000
4461                 if (sctp) /* reassign */
4462                         type = CSUM_SCTP_VALID;
4463 #endif
4464                 /* Did it pass? */
4465                 if (!(errors & E1000_RXD_ERR_TCPE)) {
4466                         mp->m_pkthdr.csum_flags |= type;
4467                         if (sctp == 0)
4468                                 mp->m_pkthdr.csum_data = htons(0xffff);
4469                 }
4470         }
4471         return;
4472 }
4473
4474 /*
4475  * This routine is run via an vlan
4476  * config EVENT
4477  */
4478 static void
4479 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4480 {
4481         struct adapter  *adapter = ifp->if_softc;
4482         u32             index, bit;
4483
4484         if (ifp->if_softc !=  arg)   /* Not our event */
4485                 return;
4486
4487         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4488                 return;
4489
4490         index = (vtag >> 5) & 0x7F;
4491         bit = vtag & 0x1F;
4492         igb_shadow_vfta[index] |= (1 << bit);
4493         ++adapter->num_vlans;
4494         /* Re-init to load the changes */
4495         igb_init(adapter);
4496 }
4497
4498 /*
4499  * This routine is run via an vlan
4500  * unconfig EVENT
4501  */
4502 static void
4503 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4504 {
4505         struct adapter  *adapter = ifp->if_softc;
4506         u32             index, bit;
4507
4508         if (ifp->if_softc !=  arg)
4509                 return;
4510
4511         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4512                 return;
4513
4514         index = (vtag >> 5) & 0x7F;
4515         bit = vtag & 0x1F;
4516         igb_shadow_vfta[index] &= ~(1 << bit);
4517         --adapter->num_vlans;
4518         /* Re-init to load the changes */
4519         igb_init(adapter);
4520 }
4521
4522 static void
4523 igb_setup_vlan_hw_support(struct adapter *adapter)
4524 {
4525         struct e1000_hw *hw = &adapter->hw;
4526         u32             reg;
4527
4528         /*
4529         ** We get here thru init_locked, meaning
4530         ** a soft reset, this has already cleared
4531         ** the VFTA and other state, so if there
4532         ** have been no vlan's registered do nothing.
4533         */
4534         if (adapter->num_vlans == 0)
4535                 return;
4536
4537         /*
4538         ** A soft reset zero's out the VFTA, so
4539         ** we need to repopulate it now.
4540         */
4541         for (int i = 0; i < IGB_VFTA_SIZE; i++)
4542                 if (igb_shadow_vfta[i] != 0) {
4543                         if (hw->mac.type == e1000_vfadapt)
4544                                 e1000_vfta_set_vf(hw, igb_shadow_vfta[i], TRUE);
4545                         else
4546                                 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4547                                  i, igb_shadow_vfta[i]);
4548                 }
4549
4550         if (hw->mac.type == e1000_vfadapt)
4551                 e1000_rlpml_set_vf(hw,
4552                     adapter->max_frame_size + VLAN_TAG_SIZE);
4553         else {
4554                 reg = E1000_READ_REG(hw, E1000_CTRL);
4555                 reg |= E1000_CTRL_VME;
4556                 E1000_WRITE_REG(hw, E1000_CTRL, reg);
4557
4558                 /* Enable the Filter Table */
4559                 reg = E1000_READ_REG(hw, E1000_RCTL);
4560                 reg &= ~E1000_RCTL_CFIEN;
4561                 reg |= E1000_RCTL_VFE;
4562                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
4563
4564                 /* Update the frame size */
4565                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4566                     adapter->max_frame_size + VLAN_TAG_SIZE);
4567         }
4568 }
4569
4570 static void
4571 igb_enable_intr(struct adapter *adapter)
4572 {
4573         /* With RSS set up what to auto clear */
4574         if (adapter->msix_mem) {
4575                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4576                     adapter->eims_mask);
4577                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4578                     adapter->eims_mask);
4579                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4580                     adapter->eims_mask);
4581                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4582                     E1000_IMS_LSC);
4583         } else {
4584                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4585                     IMS_ENABLE_MASK);
4586         }
4587         E1000_WRITE_FLUSH(&adapter->hw);
4588
4589         return;
4590 }
4591
4592 static void
4593 igb_disable_intr(struct adapter *adapter)
4594 {
4595         if (adapter->msix_mem) {
4596                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4597                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4598         } 
4599         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4600         E1000_WRITE_FLUSH(&adapter->hw);
4601         return;
4602 }
4603
4604 /*
4605  * Bit of a misnomer, what this really means is
4606  * to enable OS management of the system... aka
4607  * to disable special hardware management features 
4608  */
4609 static void
4610 igb_init_manageability(struct adapter *adapter)
4611 {
4612         if (adapter->has_manage) {
4613                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4614                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4615
4616                 /* disable hardware interception of ARP */
4617                 manc &= ~(E1000_MANC_ARP_EN);
4618
4619                 /* enable receiving management packets to the host */
4620                 manc |= E1000_MANC_EN_MNG2HOST;
4621                 manc2h |= 1 << 5;  /* Mng Port 623 */
4622                 manc2h |= 1 << 6;  /* Mng Port 664 */
4623                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4624                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4625         }
4626 }
4627
4628 /*
4629  * Give control back to hardware management
4630  * controller if there is one.
4631  */
4632 static void
4633 igb_release_manageability(struct adapter *adapter)
4634 {
4635         if (adapter->has_manage) {
4636                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4637
4638                 /* re-enable hardware interception of ARP */
4639                 manc |= E1000_MANC_ARP_EN;
4640                 manc &= ~E1000_MANC_EN_MNG2HOST;
4641
4642                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4643         }
4644 }
4645
4646 /*
4647  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4648  * For ASF and Pass Through versions of f/w this means that
4649  * the driver is loaded. 
4650  *
4651  */
4652 static void
4653 igb_get_hw_control(struct adapter *adapter)
4654 {
4655         u32 ctrl_ext;
4656
4657         if (adapter->hw.mac.type == e1000_vfadapt)
4658                 return;
4659
4660         /* Let firmware know the driver has taken over */
4661         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4662         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4663             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4664 }
4665
4666 /*
4667  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4668  * For ASF and Pass Through versions of f/w this means that the
4669  * driver is no longer loaded.
4670  *
4671  */
4672 static void
4673 igb_release_hw_control(struct adapter *adapter)
4674 {
4675         u32 ctrl_ext;
4676
4677         if (adapter->hw.mac.type == e1000_vfadapt)
4678                 return;
4679
4680         /* Let firmware taken over control of h/w */
4681         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4682         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4683             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4684 }
4685
4686 static int
4687 igb_is_valid_ether_addr(uint8_t *addr)
4688 {
4689         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4690
4691         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4692                 return (FALSE);
4693         }
4694
4695         return (TRUE);
4696 }
4697
4698
4699 /*
4700  * Enable PCI Wake On Lan capability
4701  */
4702 static void
4703 igb_enable_wakeup(device_t dev)
4704 {
4705         u16     cap, status;
4706         u8      id;
4707
4708         /* First find the capabilities pointer*/
4709         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4710         /* Read the PM Capabilities */
4711         id = pci_read_config(dev, cap, 1);
4712         if (id != PCIY_PMG)     /* Something wrong */
4713                 return;
4714         /* OK, we have the power capabilities, so
4715            now get the status register */
4716         cap += PCIR_POWER_STATUS;
4717         status = pci_read_config(dev, cap, 2);
4718         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4719         pci_write_config(dev, cap, status, 2);
4720         return;
4721 }
4722
4723 static void
4724 igb_led_func(void *arg, int onoff)
4725 {
4726         struct adapter  *adapter = arg;
4727
4728         IGB_CORE_LOCK(adapter);
4729         if (onoff) {
4730                 e1000_setup_led(&adapter->hw);
4731                 e1000_led_on(&adapter->hw);
4732         } else {
4733                 e1000_led_off(&adapter->hw);
4734                 e1000_cleanup_led(&adapter->hw);
4735         }
4736         IGB_CORE_UNLOCK(adapter);
4737 }
4738
4739 /**********************************************************************
4740  *
4741  *  Update the board statistics counters.
4742  *
4743  **********************************************************************/
4744 static void
4745 igb_update_stats_counters(struct adapter *adapter)
4746 {
4747         struct ifnet            *ifp;
4748         struct e1000_hw         *hw = &adapter->hw;
4749         struct e1000_hw_stats   *stats;
4750
4751         /* 
4752         ** The virtual function adapter has only a
4753         ** small controlled set of stats, do only 
4754         ** those and return.
4755         */
4756         if (adapter->hw.mac.type == e1000_vfadapt) {
4757                 igb_update_vf_stats_counters(adapter);
4758                 return;
4759         }
4760
4761         stats = (struct e1000_hw_stats  *)adapter->stats;
4762
4763         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4764            (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
4765                 stats->symerrs +=
4766                     E1000_READ_REG(hw,E1000_SYMERRS);
4767                 stats->sec += E1000_READ_REG(hw, E1000_SEC);
4768         }
4769
4770         stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
4771         stats->mpc += E1000_READ_REG(hw, E1000_MPC);
4772         stats->scc += E1000_READ_REG(hw, E1000_SCC);
4773         stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
4774
4775         stats->mcc += E1000_READ_REG(hw, E1000_MCC);
4776         stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
4777         stats->colc += E1000_READ_REG(hw, E1000_COLC);
4778         stats->dc += E1000_READ_REG(hw, E1000_DC);
4779         stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
4780         stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
4781         stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
4782         stats->xoffrxc += E1000_READ_REG(hw, E1000_XOFFRXC);
4783         stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
4784         stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
4785         stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
4786         stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
4787         stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
4788         stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
4789         stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
4790         stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
4791         stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
4792         stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
4793         stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
4794         stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
4795
4796         /* For the 64-bit byte counters the low dword must be read first. */
4797         /* Both registers clear on the read of the high dword */
4798
4799         stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
4800           ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
4801         stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
4802           ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32) ;
4803
4804         stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
4805         stats->ruc += E1000_READ_REG(hw, E1000_RUC);
4806         stats->rfc += E1000_READ_REG(hw, E1000_RFC);
4807         stats->roc += E1000_READ_REG(hw, E1000_ROC);
4808         stats->rjc += E1000_READ_REG(hw, E1000_RJC);
4809
4810         stats->tor += E1000_READ_REG(hw, E1000_TORH);
4811         stats->tot += E1000_READ_REG(hw, E1000_TOTH);
4812
4813         stats->tpr += E1000_READ_REG(hw, E1000_TPR);
4814         stats->tpt += E1000_READ_REG(hw, E1000_TPT);
4815         stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
4816         stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
4817         stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
4818         stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
4819         stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
4820         stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
4821         stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
4822         stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
4823
4824         /* Interrupt Counts */
4825
4826         stats->iac += E1000_READ_REG(hw, E1000_IAC);
4827         stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
4828         stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
4829         stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
4830         stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
4831         stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
4832         stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
4833         stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
4834         stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
4835
4836         /* Host to Card Statistics */
4837
4838         stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
4839         stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
4840         stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
4841         stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
4842         stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
4843         stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
4844         stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
4845         stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
4846             ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
4847         stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
4848             ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
4849         stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
4850         stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
4851         stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
4852
4853         stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
4854         stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
4855         stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
4856         stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
4857         stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
4858         stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
4859
4860         ifp = adapter->ifp;
4861         ifp->if_collisions = stats->colc;
4862
4863         /* Rx Errors */
4864         ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
4865             stats->crcerrs + stats->algnerrc +
4866             stats->ruc + stats->roc + stats->mpc + stats->cexterr;
4867
4868         /* Tx Errors */
4869         ifp->if_oerrors = stats->ecol +
4870             stats->latecol + adapter->watchdog_events;
4871
4872         /* Driver specific counters */
4873         adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
4874         adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
4875         adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
4876         adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
4877         adapter->packet_buf_alloc_tx =
4878             ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
4879         adapter->packet_buf_alloc_rx =
4880             (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
4881 }
4882
4883
4884 /**********************************************************************
4885  *
4886  *  Initialize the VF board statistics counters.
4887  *
4888  **********************************************************************/
4889 static void
4890 igb_vf_init_stats(struct adapter *adapter)
4891 {
4892         struct e1000_hw *hw = &adapter->hw;
4893         struct e1000_vf_stats   *stats;
4894
4895         stats = (struct e1000_vf_stats  *)adapter->stats;
4896
4897         stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
4898         stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
4899         stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
4900         stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
4901         stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
4902 }
4903  
4904 /**********************************************************************
4905  *
4906  *  Update the VF board statistics counters.
4907  *
4908  **********************************************************************/
4909 static void
4910 igb_update_vf_stats_counters(struct adapter *adapter)
4911 {
4912         struct e1000_hw *hw = &adapter->hw;
4913         struct e1000_vf_stats   *stats;
4914
4915         if (adapter->link_speed == 0)
4916                 return;
4917
4918         stats = (struct e1000_vf_stats  *)adapter->stats;
4919
4920         UPDATE_VF_REG(E1000_VFGPRC,
4921             stats->last_gprc, stats->gprc);
4922         UPDATE_VF_REG(E1000_VFGORC,
4923             stats->last_gorc, stats->gorc);
4924         UPDATE_VF_REG(E1000_VFGPTC,
4925             stats->last_gptc, stats->gptc);
4926         UPDATE_VF_REG(E1000_VFGOTC,
4927             stats->last_gotc, stats->gotc);
4928         UPDATE_VF_REG(E1000_VFMPRC,
4929             stats->last_mprc, stats->mprc);
4930 }
4931
4932
4933 /** igb_sysctl_tdh_handler - Handler function
4934  *  Retrieves the TDH value from the hardware
4935  */
4936 static int 
4937 igb_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS)
4938 {
4939         int error;
4940
4941         struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
4942         if (!txr) return 0;
4943
4944         unsigned val = E1000_READ_REG(&txr->adapter->hw, E1000_TDH(txr->me));
4945         error = sysctl_handle_int(oidp, &val, 0, req);
4946         if (error || !req->newptr)
4947                 return error;
4948         return 0;
4949 }
4950
4951 /** igb_sysctl_tdt_handler - Handler function
4952  *  Retrieves the TDT value from the hardware
4953  */
4954 static int 
4955 igb_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS)
4956 {
4957         int error;
4958
4959         struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
4960         if (!txr) return 0;
4961
4962         unsigned val = E1000_READ_REG(&txr->adapter->hw, E1000_TDT(txr->me));
4963         error = sysctl_handle_int(oidp, &val, 0, req);
4964         if (error || !req->newptr)
4965                 return error;
4966         return 0;
4967 }
4968
4969 /** igb_sysctl_rdh_handler - Handler function
4970  *  Retrieves the RDH value from the hardware
4971  */
4972 static int 
4973 igb_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS)
4974 {
4975         int error;
4976
4977         struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
4978         if (!rxr) return 0;
4979
4980         unsigned val = E1000_READ_REG(&rxr->adapter->hw, E1000_RDH(rxr->me));
4981         error = sysctl_handle_int(oidp, &val, 0, req);
4982         if (error || !req->newptr)
4983                 return error;
4984         return 0;
4985 }
4986
4987 /** igb_sysctl_rdt_handler - Handler function
4988  *  Retrieves the RDT value from the hardware
4989  */
4990 static int 
4991 igb_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS)
4992 {
4993         int error;
4994
4995         struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
4996         if (!rxr) return 0;
4997
4998         unsigned val = E1000_READ_REG(&rxr->adapter->hw, E1000_RDT(rxr->me));
4999         error = sysctl_handle_int(oidp, &val, 0, req);
5000         if (error || !req->newptr)
5001                 return error;
5002         return 0;
5003 }
5004
5005 /*
5006  * Add sysctl variables, one per statistic, to the system.
5007  */
5008 static void
5009 igb_add_hw_stats(struct adapter *adapter)
5010 {
5011
5012         device_t dev = adapter->dev;
5013
5014         struct tx_ring *txr = adapter->tx_rings;
5015         struct rx_ring *rxr = adapter->rx_rings;
5016
5017         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5018         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5019         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5020         struct e1000_hw_stats *stats = adapter->stats;
5021
5022         struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5023         struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5024
5025 #define QUEUE_NAME_LEN 32
5026         char namebuf[QUEUE_NAME_LEN];
5027
5028         /* Driver Statistics */
5029         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq", 
5030                         CTLFLAG_RD, &adapter->link_irq, 0,
5031                         "Link MSIX IRQ Handled");
5032         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5033                         CTLFLAG_RD, &adapter->dropped_pkts,
5034                         "Driver dropped packets");
5035         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5036                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5037                         "Driver tx dma failure in xmit");
5038
5039         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", 
5040                         CTLFLAG_RD, &adapter->device_control,
5041                         "Device Control Register");
5042         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", 
5043                         CTLFLAG_RD, &adapter->rx_control,
5044                         "Receiver Control Register");
5045         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", 
5046                         CTLFLAG_RD, &adapter->int_mask,
5047                         "Interrupt Mask");
5048         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", 
5049                         CTLFLAG_RD, &adapter->eint_mask,
5050                         "Extended Interrupt Mask");
5051         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", 
5052                         CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5053                         "Transmit Buffer Packet Allocation");
5054         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", 
5055                         CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5056                         "Receive Buffer Packet Allocation");
5057         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5058                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5059                         "Flow Control High Watermark");
5060         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5061                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5062                         "Flow Control Low Watermark");
5063
5064         for (int i = 0; i < adapter->num_queues; i++, txr++) {
5065                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5066                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5067                                             CTLFLAG_RD, NULL, "Queue Name");
5068                 queue_list = SYSCTL_CHILDREN(queue_node);
5069
5070                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5071                                 CTLFLAG_RD, txr, sizeof(txr),
5072                                 igb_sysctl_tdh_handler, "IU",
5073                                 "Transmit Descriptor Head");
5074                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5075                                 CTLFLAG_RD, txr, sizeof(txr),
5076                                 igb_sysctl_tdt_handler, "IU",
5077                                 "Transmit Descriptor Tail");
5078                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5079                                 CTLFLAG_RD, &txr->no_desc_avail,
5080                                 "Queue No Descriptor Available");
5081                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5082                                 CTLFLAG_RD, &txr->tx_packets,
5083                                 "Queue Packets Transmitted");
5084         }
5085
5086         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
5087                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5088                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
5089                                             CTLFLAG_RD, NULL, "Queue Name");
5090                 queue_list = SYSCTL_CHILDREN(queue_node);
5091
5092                 struct lro_ctrl *lro = &rxr->lro;
5093
5094                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5095                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
5096                                             CTLFLAG_RD, NULL, "Queue Name");
5097                 queue_list = SYSCTL_CHILDREN(queue_node);
5098
5099                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5100                                 CTLFLAG_RD, rxr, sizeof(rxr),
5101                                 igb_sysctl_rdh_handler, "IU",
5102                                 "Receive Descriptor Head");
5103                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5104                                 CTLFLAG_RD, rxr, sizeof(rxr),
5105                                 igb_sysctl_rdt_handler, "IU",
5106                                 "Receive Descriptor Tail");
5107                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5108                                 CTLFLAG_RD, &rxr->rx_packets,
5109                                 "Queue Packets Received");
5110                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5111                                 CTLFLAG_RD, &rxr->rx_bytes,
5112                                 "Queue Bytes Received");
5113                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5114                                 CTLFLAG_RD, &lro->lro_queued, 0,
5115                                 "LRO Queued");
5116                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5117                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5118                                 "LRO Flushed");
5119         }
5120
5121         /* MAC stats get the own sub node */
5122
5123         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5124                                     CTLFLAG_RD, NULL, "MAC Statistics");
5125         stat_list = SYSCTL_CHILDREN(stat_node);
5126
5127         /*
5128         ** VF adapter has a very limited set of stats
5129         ** since its not managing the metal, so to speak.
5130         */
5131         if (adapter->hw.mac.type == e1000_vfadapt) {
5132         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5133                         CTLFLAG_RD, &stats->gprc,
5134                         "Good Packets Received");
5135         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5136                         CTLFLAG_RD, &stats->gptc,
5137                         "Good Packets Transmitted");
5138         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5139                         CTLFLAG_RD, &stats->gorc, 
5140                         "Good Octets Received"); 
5141         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octest_txd", 
5142                         CTLFLAG_RD, &stats->gotc, 
5143                         "Good Octest Transmitted"); 
5144         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5145                         CTLFLAG_RD, &stats->mprc,
5146                         "Multicast Packets Received");
5147                 return;
5148         }
5149
5150         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
5151                         CTLFLAG_RD, &stats->ecol,
5152                         "Excessive collisions");
5153         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
5154                         CTLFLAG_RD, &stats->scc,
5155                         "Single collisions");
5156         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
5157                         CTLFLAG_RD, &stats->mcc,
5158                         "Multiple collisions");
5159         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
5160                         CTLFLAG_RD, &stats->latecol,
5161                         "Late collisions");
5162         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
5163                         CTLFLAG_RD, &stats->colc,
5164                         "Collision Count");
5165         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5166                         CTLFLAG_RD, &stats->symerrs,
5167                         "Symbol Errors");
5168         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5169                         CTLFLAG_RD, &stats->sec,
5170                         "Sequence Errors");
5171         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5172                         CTLFLAG_RD, &stats->dc,
5173                         "Defer Count");
5174         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5175                         CTLFLAG_RD, &stats->mpc,
5176                         "Missed Packets");
5177         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5178                         CTLFLAG_RD, &stats->rnbc,
5179                         "Receive No Buffers");
5180         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5181                         CTLFLAG_RD, &stats->ruc,
5182                         "Receive Undersize");
5183         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5184                         CTLFLAG_RD, &stats->rfc,
5185                         "Fragmented Packets Received ");
5186         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5187                         CTLFLAG_RD, &stats->roc,
5188                         "Oversized Packets Received");
5189         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5190                         CTLFLAG_RD, &stats->rjc,
5191                         "Recevied Jabber");
5192         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5193                         CTLFLAG_RD, &stats->rxerrc,
5194                         "Receive Errors");
5195         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5196                         CTLFLAG_RD, &stats->crcerrs,
5197                         "CRC errors");
5198         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5199                         CTLFLAG_RD, &stats->algnerrc,
5200                         "Alignment Errors");
5201         /* On 82575 these are collision counts */
5202         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5203                         CTLFLAG_RD, &stats->cexterr,
5204                         "Collision/Carrier extension errors");
5205         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_overruns",
5206                         CTLFLAG_RD, &adapter->rx_overruns,
5207                         "RX overruns");
5208         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "watchdog_timeouts",
5209                         CTLFLAG_RD, &adapter->watchdog_events,
5210                         "Watchdog timeouts");
5211         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5212                         CTLFLAG_RD, &stats->xonrxc,
5213                         "XON Received");
5214         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5215                         CTLFLAG_RD, &stats->xontxc,
5216                         "XON Transmitted");
5217         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5218                         CTLFLAG_RD, &stats->xoffrxc,
5219                         "XOFF Received");
5220         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5221                         CTLFLAG_RD, &stats->xofftxc,
5222                         "XOFF Transmitted");
5223         /* Packet Reception Stats */
5224         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5225                         CTLFLAG_RD, &stats->tpr,
5226                         "Total Packets Received ");
5227         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5228                         CTLFLAG_RD, &stats->gprc,
5229                         "Good Packets Received");
5230         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5231                         CTLFLAG_RD, &stats->bprc,
5232                         "Broadcast Packets Received");
5233         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5234                         CTLFLAG_RD, &stats->mprc,
5235                         "Multicast Packets Received");
5236         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5237                         CTLFLAG_RD, &stats->prc64,
5238                         "64 byte frames received ");
5239         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5240                         CTLFLAG_RD, &stats->prc127,
5241                         "65-127 byte frames received");
5242         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5243                         CTLFLAG_RD, &stats->prc255,
5244                         "128-255 byte frames received");
5245         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5246                         CTLFLAG_RD, &stats->prc511,
5247                         "256-511 byte frames received");
5248         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5249                         CTLFLAG_RD, &stats->prc1023,
5250                         "512-1023 byte frames received");
5251         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5252                         CTLFLAG_RD, &stats->prc1522,
5253                         "1023-1522 byte frames received");
5254         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5255                         CTLFLAG_RD, &stats->gorc, 
5256                         "Good Octets Received"); 
5257
5258         /* Packet Transmission Stats */
5259         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octest_txd", 
5260                         CTLFLAG_RD, &stats->gotc, 
5261                         "Good Octest Transmitted"); 
5262         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5263                         CTLFLAG_RD, &stats->tpt,
5264                         "Total Packets Transmitted");
5265         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5266                         CTLFLAG_RD, &stats->gptc,
5267                         "Good Packets Transmitted");
5268         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5269                         CTLFLAG_RD, &stats->bptc,
5270                         "Broadcast Packets Transmitted");
5271         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5272                         CTLFLAG_RD, &stats->mptc,
5273                         "Multicast Packets Transmitted");
5274         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5275                         CTLFLAG_RD, &stats->ptc64,
5276                         "64 byte frames transmitted ");
5277         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5278                         CTLFLAG_RD, &stats->ptc127,
5279                         "65-127 byte frames transmitted");
5280         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5281                         CTLFLAG_RD, &stats->ptc255,
5282                         "128-255 byte frames transmitted");
5283         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5284                         CTLFLAG_RD, &stats->ptc511,
5285                         "256-511 byte frames transmitted");
5286         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5287                         CTLFLAG_RD, &stats->ptc1023,
5288                         "512-1023 byte frames transmitted");
5289         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5290                         CTLFLAG_RD, &stats->ptc1522,
5291                         "1024-1522 byte frames transmitted");
5292         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5293                         CTLFLAG_RD, &stats->tsctc,
5294                         "TSO Contexts Transmitted");
5295         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5296                         CTLFLAG_RD, &stats->tsctfc,
5297                         "TSO Contexts Failed");
5298
5299
5300         /* Interrupt Stats */
5301
5302         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5303                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5304         int_list = SYSCTL_CHILDREN(int_node);
5305
5306         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5307                         CTLFLAG_RD, &stats->iac,
5308                         "Interrupt Assertion Count");
5309
5310         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5311                         CTLFLAG_RD, &stats->icrxptc,
5312                         "Interrupt Cause Rx Pkt Timer Expire Count");
5313
5314         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5315                         CTLFLAG_RD, &stats->icrxatc,
5316                         "Interrupt Cause Rx Abs Timer Expire Count");
5317
5318         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5319                         CTLFLAG_RD, &stats->ictxptc,
5320                         "Interrupt Cause Tx Pkt Timer Expire Count");
5321
5322         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5323                         CTLFLAG_RD, &stats->ictxatc,
5324                         "Interrupt Cause Tx Abs Timer Expire Count");
5325
5326         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5327                         CTLFLAG_RD, &stats->ictxqec,
5328                         "Interrupt Cause Tx Queue Empty Count");
5329
5330         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5331                         CTLFLAG_RD, &stats->ictxqmtc,
5332                         "Interrupt Cause Tx Queue Min Thresh Count");
5333
5334         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5335                         CTLFLAG_RD, &stats->icrxdmtc,
5336                         "Interrupt Cause Rx Desc Min Thresh Count");
5337
5338         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5339                         CTLFLAG_RD, &stats->icrxoc,
5340                         "Interrupt Cause Receiver Overrun Count");
5341
5342         /* Host to Card Stats */
5343
5344         host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", 
5345                                     CTLFLAG_RD, NULL, 
5346                                     "Host to Card Statistics");
5347
5348         host_list = SYSCTL_CHILDREN(host_node);
5349
5350         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5351                         CTLFLAG_RD, &stats->cbtmpc,
5352                         "Circuit Breaker Tx Packet Count");
5353
5354         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5355                         CTLFLAG_RD, &stats->htdpmc,
5356                         "Host Transmit Discarded Packets");
5357
5358         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5359                         CTLFLAG_RD, &stats->rpthc,
5360                         "Rx Packets To Host");
5361
5362         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5363                         CTLFLAG_RD, &stats->cbrmpc,
5364                         "Circuit Breaker Rx Packet Count");
5365
5366         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5367                         CTLFLAG_RD, &stats->cbrdpc,
5368                         "Circuit Breaker Rx Dropped Count");
5369
5370         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5371                         CTLFLAG_RD, &stats->hgptc,
5372                         "Host Good Packets Tx Count");
5373
5374         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5375                         CTLFLAG_RD, &stats->htcbdpc,
5376                         "Host Tx Circuit Breaker Dropped Count");
5377
5378         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5379                         CTLFLAG_RD, &stats->hgorc,
5380                         "Host Good Octets Received Count");
5381
5382         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5383                         CTLFLAG_RD, &stats->hgotc,
5384                         "Host Good Octets Transmit Count");
5385
5386         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5387                         CTLFLAG_RD, &stats->lenerrs,
5388                         "Length Errors");
5389
5390         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5391                         CTLFLAG_RD, &stats->scvpc,
5392                         "SerDes/SGMII Code Violation Pkt Count");
5393
5394         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5395                         CTLFLAG_RD, &stats->hrmpc,
5396                         "Header Redirection Missed Packet Count");
5397 }
5398
5399
5400 /**********************************************************************
5401  *
5402  *  This routine provides a way to dump out the adapter eeprom,
5403  *  often a useful debug/service tool. This only dumps the first
5404  *  32 words, stuff that matters is in that extent.
5405  *
5406  **********************************************************************/
5407 static int
5408 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5409 {
5410         struct adapter *adapter;
5411         int error;
5412         int result;
5413
5414         result = -1;
5415         error = sysctl_handle_int(oidp, &result, 0, req);
5416
5417         if (error || !req->newptr)
5418                 return (error);
5419
5420         /*
5421          * This value will cause a hex dump of the
5422          * first 32 16-bit words of the EEPROM to
5423          * the screen.
5424          */
5425         if (result == 1) {
5426                 adapter = (struct adapter *)arg1;
5427                 igb_print_nvm_info(adapter);
5428         }
5429
5430         return (error);
5431 }
5432
5433 static void
5434 igb_print_nvm_info(struct adapter *adapter)
5435 {
5436         u16     eeprom_data;
5437         int     i, j, row = 0;
5438
5439         /* Its a bit crude, but it gets the job done */
5440         printf("\nInterface EEPROM Dump:\n");
5441         printf("Offset\n0x0000  ");
5442         for (i = 0, j = 0; i < 32; i++, j++) {
5443                 if (j == 8) { /* Make the offset block */
5444                         j = 0; ++row;
5445                         printf("\n0x00%x0  ",row);
5446                 }
5447                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5448                 printf("%04x ", eeprom_data);
5449         }
5450         printf("\n");
5451 }
5452
5453 static void
5454 igb_add_rx_process_limit(struct adapter *adapter, const char *name,
5455         const char *description, int *limit, int value)
5456 {
5457         *limit = value;
5458         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5459             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5460             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5461 }