]> CyberLeo.Net >> Repos - FreeBSD/releng/8.1.git/blob - sys/dev/e1000/if_igb.c
Copy stable/8 to releng/8.1 in preparation for 8.1-RC1.
[FreeBSD/releng/8.1.git] / sys / dev / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2010, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #ifdef HAVE_KERNEL_OPTION_HEADERS
37 #include "opt_device_polling.h"
38 #include "opt_inet.h"
39 #include "opt_altq.h"
40 #endif
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #if __FreeBSD_version >= 800000
45 #include <sys/buf_ring.h>
46 #endif
47 #include <sys/bus.h>
48 #include <sys/endian.h>
49 #include <sys/kernel.h>
50 #include <sys/kthread.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/rman.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/taskqueue.h>
59 #include <sys/eventhandler.h>
60 #include <sys/pcpu.h>
61 #include <sys/smp.h>
62 #include <machine/smp.h>
63 #include <machine/bus.h>
64 #include <machine/resource.h>
65
66 #include <net/bpf.h>
67 #include <net/ethernet.h>
68 #include <net/if.h>
69 #include <net/if_arp.h>
70 #include <net/if_dl.h>
71 #include <net/if_media.h>
72
73 #include <net/if_types.h>
74 #include <net/if_vlan_var.h>
75
76 #include <netinet/in_systm.h>
77 #include <netinet/in.h>
78 #include <netinet/if_ether.h>
79 #include <netinet/ip.h>
80 #include <netinet/ip6.h>
81 #include <netinet/tcp.h>
82 #include <netinet/tcp_lro.h>
83 #include <netinet/udp.h>
84
85 #include <machine/in_cksum.h>
86 #include <dev/led/led.h>
87 #include <dev/pci/pcivar.h>
88 #include <dev/pci/pcireg.h>
89
90 #include "e1000_api.h"
91 #include "e1000_82575.h"
92 #include "if_igb.h"
93
94 /*********************************************************************
95  *  Set this to one to display debug statistics
96  *********************************************************************/
97 int     igb_display_debug_stats = 0;
98
99 /*********************************************************************
100  *  Driver version:
101  *********************************************************************/
102 char igb_driver_version[] = "version - 1.9.5";
103
104
105 /*********************************************************************
106  *  PCI Device ID Table
107  *
108  *  Used by probe to select devices to load on
109  *  Last field stores an index into e1000_strings
110  *  Last entry must be all 0s
111  *
112  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
113  *********************************************************************/
114
115 static igb_vendor_info_t igb_vendor_info_array[] =
116 {
117         { 0x8086, E1000_DEV_ID_82575EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
119                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82576,           PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82576_NS,        PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82576_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82576_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
128                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
130                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82580_COPPER,    PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82580_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82580_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82580_SGMII,     PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
136                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
137         /* required last entry */
138         { 0, 0, 0, 0, 0}
139 };
140
141 /*********************************************************************
142  *  Table of branding strings for all supported NICs.
143  *********************************************************************/
144
145 static char *igb_strings[] = {
146         "Intel(R) PRO/1000 Network Connection"
147 };
148
149 /*********************************************************************
150  *  Function prototypes
151  *********************************************************************/
152 static int      igb_probe(device_t);
153 static int      igb_attach(device_t);
154 static int      igb_detach(device_t);
155 static int      igb_shutdown(device_t);
156 static int      igb_suspend(device_t);
157 static int      igb_resume(device_t);
158 static void     igb_start(struct ifnet *);
159 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
160 #if __FreeBSD_version >= 800000
161 static int      igb_mq_start(struct ifnet *, struct mbuf *);
162 static int      igb_mq_start_locked(struct ifnet *,
163                     struct tx_ring *, struct mbuf *);
164 static void     igb_qflush(struct ifnet *);
165 #endif
166 static int      igb_ioctl(struct ifnet *, u_long, caddr_t);
167 static void     igb_init(void *);
168 static void     igb_init_locked(struct adapter *);
169 static void     igb_stop(void *);
170 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
171 static int      igb_media_change(struct ifnet *);
172 static void     igb_identify_hardware(struct adapter *);
173 static int      igb_allocate_pci_resources(struct adapter *);
174 static int      igb_allocate_msix(struct adapter *);
175 static int      igb_allocate_legacy(struct adapter *);
176 static int      igb_setup_msix(struct adapter *);
177 static void     igb_free_pci_resources(struct adapter *);
178 static void     igb_local_timer(void *);
179 static void     igb_reset(struct adapter *);
180 static void     igb_setup_interface(device_t, struct adapter *);
181 static int      igb_allocate_queues(struct adapter *);
182 static void     igb_configure_queues(struct adapter *);
183
184 static int      igb_allocate_transmit_buffers(struct tx_ring *);
185 static void     igb_setup_transmit_structures(struct adapter *);
186 static void     igb_setup_transmit_ring(struct tx_ring *);
187 static void     igb_initialize_transmit_units(struct adapter *);
188 static void     igb_free_transmit_structures(struct adapter *);
189 static void     igb_free_transmit_buffers(struct tx_ring *);
190
191 static int      igb_allocate_receive_buffers(struct rx_ring *);
192 static int      igb_setup_receive_structures(struct adapter *);
193 static int      igb_setup_receive_ring(struct rx_ring *);
194 static void     igb_initialize_receive_units(struct adapter *);
195 static void     igb_free_receive_structures(struct adapter *);
196 static void     igb_free_receive_buffers(struct rx_ring *);
197 static void     igb_free_receive_ring(struct rx_ring *);
198
199 static void     igb_enable_intr(struct adapter *);
200 static void     igb_disable_intr(struct adapter *);
201 static void     igb_update_stats_counters(struct adapter *);
202 static bool     igb_txeof(struct tx_ring *);
203
204 static __inline void igb_rx_discard(struct rx_ring *, int);
205 static __inline void igb_rx_input(struct rx_ring *,
206                     struct ifnet *, struct mbuf *, u32);
207
208 static bool     igb_rxeof(struct igb_queue *, int);
209 static void     igb_rx_checksum(u32, struct mbuf *, u32);
210 static int      igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
211 static bool     igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
212 static void     igb_set_promisc(struct adapter *);
213 static void     igb_disable_promisc(struct adapter *);
214 static void     igb_set_multi(struct adapter *);
215 static void     igb_print_hw_stats(struct adapter *);
216 static void     igb_update_link_status(struct adapter *);
217 static void     igb_refresh_mbufs(struct rx_ring *, int);
218
219 static void     igb_register_vlan(void *, struct ifnet *, u16);
220 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
221 static void     igb_setup_vlan_hw_support(struct adapter *);
222
223 static int      igb_xmit(struct tx_ring *, struct mbuf **);
224 static int      igb_dma_malloc(struct adapter *, bus_size_t,
225                     struct igb_dma_alloc *, int);
226 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
227 static void     igb_print_debug_info(struct adapter *);
228 static void     igb_print_nvm_info(struct adapter *);
229 static int      igb_is_valid_ether_addr(u8 *);
230 static int      igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
231 static int      igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
232 /* Management and WOL Support */
233 static void     igb_init_manageability(struct adapter *);
234 static void     igb_release_manageability(struct adapter *);
235 static void     igb_get_hw_control(struct adapter *);
236 static void     igb_release_hw_control(struct adapter *);
237 static void     igb_enable_wakeup(device_t);
238 static void     igb_led_func(void *, int);
239
240 static int      igb_irq_fast(void *);
241 static void     igb_add_rx_process_limit(struct adapter *, const char *,
242                     const char *, int *, int);
243 static void     igb_handle_rxtx(void *context, int pending);
244 static void     igb_handle_que(void *context, int pending);
245 static void     igb_handle_link(void *context, int pending);
246
247 /* These are MSIX only irq handlers */
248 static void     igb_msix_que(void *);
249 static void     igb_msix_link(void *);
250
251 #ifdef DEVICE_POLLING
252 static poll_handler_t igb_poll;
253 #endif /* POLLING */
254
255 /*********************************************************************
256  *  FreeBSD Device Interface Entry Points
257  *********************************************************************/
258
259 static device_method_t igb_methods[] = {
260         /* Device interface */
261         DEVMETHOD(device_probe, igb_probe),
262         DEVMETHOD(device_attach, igb_attach),
263         DEVMETHOD(device_detach, igb_detach),
264         DEVMETHOD(device_shutdown, igb_shutdown),
265         DEVMETHOD(device_suspend, igb_suspend),
266         DEVMETHOD(device_resume, igb_resume),
267         {0, 0}
268 };
269
270 static driver_t igb_driver = {
271         "igb", igb_methods, sizeof(struct adapter),
272 };
273
274 static devclass_t igb_devclass;
275 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
276 MODULE_DEPEND(igb, pci, 1, 1, 1);
277 MODULE_DEPEND(igb, ether, 1, 1, 1);
278
279 /*********************************************************************
280  *  Tunable default values.
281  *********************************************************************/
282
283 /* Descriptor defaults */
284 static int igb_rxd = IGB_DEFAULT_RXD;
285 static int igb_txd = IGB_DEFAULT_TXD;
286 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
287 TUNABLE_INT("hw.igb.txd", &igb_txd);
288
289 /*
290 ** AIM: Adaptive Interrupt Moderation
291 ** which means that the interrupt rate
292 ** is varied over time based on the
293 ** traffic for that interrupt vector
294 */
295 static int igb_enable_aim = TRUE;
296 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
297
298 /*
299  * MSIX should be the default for best performance,
300  * but this allows it to be forced off for testing.
301  */         
302 static int igb_enable_msix = 1;
303 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
304
305 /*
306  * Header split has seemed to be beneficial in
307  * many circumstances tested, however there have
308  * been some stability issues, so the default is
309  * off. 
310  */
311 static bool igb_header_split = FALSE;
312 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
313
314 /*
315 ** This will autoconfigure based on
316 ** the number of CPUs if left at 0.
317 */
318 static int igb_num_queues = 0;
319 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
320
321 /* How many packets rxeof tries to clean at a time */
322 static int igb_rx_process_limit = 100;
323 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
324
325 /* Flow control setting - default to FULL */
326 static int igb_fc_setting = e1000_fc_full;
327 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
328
329 /*
330 ** Shadow VFTA table, this is needed because
331 ** the real filter table gets cleared during
332 ** a soft reset and the driver needs to be able
333 ** to repopulate it.
334 */
335 static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
336
337
338 /*********************************************************************
339  *  Device identification routine
340  *
341  *  igb_probe determines if the driver should be loaded on
342  *  adapter based on PCI vendor/device id of the adapter.
343  *
344  *  return BUS_PROBE_DEFAULT on success, positive on failure
345  *********************************************************************/
346
347 static int
348 igb_probe(device_t dev)
349 {
350         char            adapter_name[60];
351         uint16_t        pci_vendor_id = 0;
352         uint16_t        pci_device_id = 0;
353         uint16_t        pci_subvendor_id = 0;
354         uint16_t        pci_subdevice_id = 0;
355         igb_vendor_info_t *ent;
356
357         INIT_DEBUGOUT("igb_probe: begin");
358
359         pci_vendor_id = pci_get_vendor(dev);
360         if (pci_vendor_id != IGB_VENDOR_ID)
361                 return (ENXIO);
362
363         pci_device_id = pci_get_device(dev);
364         pci_subvendor_id = pci_get_subvendor(dev);
365         pci_subdevice_id = pci_get_subdevice(dev);
366
367         ent = igb_vendor_info_array;
368         while (ent->vendor_id != 0) {
369                 if ((pci_vendor_id == ent->vendor_id) &&
370                     (pci_device_id == ent->device_id) &&
371
372                     ((pci_subvendor_id == ent->subvendor_id) ||
373                     (ent->subvendor_id == PCI_ANY_ID)) &&
374
375                     ((pci_subdevice_id == ent->subdevice_id) ||
376                     (ent->subdevice_id == PCI_ANY_ID))) {
377                         sprintf(adapter_name, "%s %s",
378                                 igb_strings[ent->index],
379                                 igb_driver_version);
380                         device_set_desc_copy(dev, adapter_name);
381                         return (BUS_PROBE_DEFAULT);
382                 }
383                 ent++;
384         }
385
386         return (ENXIO);
387 }
388
389 /*********************************************************************
390  *  Device initialization routine
391  *
392  *  The attach entry point is called when the driver is being loaded.
393  *  This routine identifies the type of hardware, allocates all resources
394  *  and initializes the hardware.
395  *
396  *  return 0 on success, positive on failure
397  *********************************************************************/
398
399 static int
400 igb_attach(device_t dev)
401 {
402         struct adapter  *adapter;
403         int             error = 0;
404         u16             eeprom_data;
405
406         INIT_DEBUGOUT("igb_attach: begin");
407
408         adapter = device_get_softc(dev);
409         adapter->dev = adapter->osdep.dev = dev;
410         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
411
412         /* SYSCTL stuff */
413         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
414             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
415             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
416             igb_sysctl_debug_info, "I", "Debug Information");
417
418         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
419             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
420             OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
421             igb_sysctl_stats, "I", "Statistics");
422
423         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
424             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
425             OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
426             &igb_fc_setting, 0, "Flow Control");
427
428         SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
429             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
430             OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
431             &igb_enable_aim, 1, "Interrupt Moderation");
432
433         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
434
435         /* Determine hardware and mac info */
436         igb_identify_hardware(adapter);
437
438         /* Setup PCI resources */
439         if (igb_allocate_pci_resources(adapter)) {
440                 device_printf(dev, "Allocation of PCI resources failed\n");
441                 error = ENXIO;
442                 goto err_pci;
443         }
444
445         /* Do Shared Code initialization */
446         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
447                 device_printf(dev, "Setup of Shared code failed\n");
448                 error = ENXIO;
449                 goto err_pci;
450         }
451
452         e1000_get_bus_info(&adapter->hw);
453
454         /* Sysctls for limiting the amount of work done in the taskqueue */
455         igb_add_rx_process_limit(adapter, "rx_processing_limit",
456             "max number of rx packets to process", &adapter->rx_process_limit,
457             igb_rx_process_limit);
458
459         /*
460          * Validate number of transmit and receive descriptors. It
461          * must not exceed hardware maximum, and must be multiple
462          * of E1000_DBA_ALIGN.
463          */
464         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
465             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
466                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
467                     IGB_DEFAULT_TXD, igb_txd);
468                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
469         } else
470                 adapter->num_tx_desc = igb_txd;
471         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
472             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
473                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
474                     IGB_DEFAULT_RXD, igb_rxd);
475                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
476         } else
477                 adapter->num_rx_desc = igb_rxd;
478
479         adapter->hw.mac.autoneg = DO_AUTO_NEG;
480         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
481         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
482
483         /* Copper options */
484         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
485                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
486                 adapter->hw.phy.disable_polarity_correction = FALSE;
487                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
488         }
489
490         /*
491          * Set the frame limits assuming
492          * standard ethernet sized frames.
493          */
494         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
495         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
496
497         /*
498         ** Allocate and Setup Queues
499         */
500         if (igb_allocate_queues(adapter)) {
501                 error = ENOMEM;
502                 goto err_pci;
503         }
504
505         /*
506         ** Start from a known state, this is
507         ** important in reading the nvm and
508         ** mac from that.
509         */
510         e1000_reset_hw(&adapter->hw);
511
512         /* Make sure we have a good EEPROM before we read from it */
513         if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
514                 /*
515                 ** Some PCI-E parts fail the first check due to
516                 ** the link being in sleep state, call it again,
517                 ** if it fails a second time its a real issue.
518                 */
519                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
520                         device_printf(dev,
521                             "The EEPROM Checksum Is Not Valid\n");
522                         error = EIO;
523                         goto err_late;
524                 }
525         }
526
527         /*
528         ** Copy the permanent MAC address out of the EEPROM
529         */
530         if (e1000_read_mac_addr(&adapter->hw) < 0) {
531                 device_printf(dev, "EEPROM read error while reading MAC"
532                     " address\n");
533                 error = EIO;
534                 goto err_late;
535         }
536         /* Check its sanity */
537         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
538                 device_printf(dev, "Invalid MAC address\n");
539                 error = EIO;
540                 goto err_late;
541         }
542
543         /* 
544         ** Configure Interrupts
545         */
546         if ((adapter->msix > 1) && (igb_enable_msix))
547                 error = igb_allocate_msix(adapter);
548         else /* MSI or Legacy */
549                 error = igb_allocate_legacy(adapter);
550         if (error)
551                 goto err_late;
552
553         /* Setup OS specific network interface */
554         igb_setup_interface(dev, adapter);
555
556         /* Now get a good starting state */
557         igb_reset(adapter);
558
559         /* Initialize statistics */
560         igb_update_stats_counters(adapter);
561
562         adapter->hw.mac.get_link_status = 1;
563         igb_update_link_status(adapter);
564
565         /* Indicate SOL/IDER usage */
566         if (e1000_check_reset_block(&adapter->hw))
567                 device_printf(dev,
568                     "PHY reset is blocked due to SOL/IDER session.\n");
569
570         /* Determine if we have to control management hardware */
571         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
572
573         /*
574          * Setup Wake-on-Lan
575          */
576         /* APME bit in EEPROM is mapped to WUC.APME */
577         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
578         if (eeprom_data)
579                 adapter->wol = E1000_WUFC_MAG;
580
581         /* Register for VLAN events */
582         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
583              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
584         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
585              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
586
587         /* Tell the stack that the interface is not active */
588         adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
589
590         adapter->led_dev = led_create(igb_led_func, adapter,
591             device_get_nameunit(dev));
592
593         INIT_DEBUGOUT("igb_attach: end");
594
595         return (0);
596
597 err_late:
598         igb_free_transmit_structures(adapter);
599         igb_free_receive_structures(adapter);
600         igb_release_hw_control(adapter);
601 err_pci:
602         igb_free_pci_resources(adapter);
603         IGB_CORE_LOCK_DESTROY(adapter);
604
605         return (error);
606 }
607
608 /*********************************************************************
609  *  Device removal routine
610  *
611  *  The detach entry point is called when the driver is being removed.
612  *  This routine stops the adapter and deallocates all the resources
613  *  that were allocated for driver operation.
614  *
615  *  return 0 on success, positive on failure
616  *********************************************************************/
617
618 static int
619 igb_detach(device_t dev)
620 {
621         struct adapter  *adapter = device_get_softc(dev);
622         struct ifnet    *ifp = adapter->ifp;
623
624         INIT_DEBUGOUT("igb_detach: begin");
625
626         /* Make sure VLANS are not using driver */
627         if (adapter->ifp->if_vlantrunk != NULL) {
628                 device_printf(dev,"Vlan in use, detach first\n");
629                 return (EBUSY);
630         }
631
632         if (adapter->led_dev != NULL)
633                 led_destroy(adapter->led_dev);
634
635 #ifdef DEVICE_POLLING
636         if (ifp->if_capenable & IFCAP_POLLING)
637                 ether_poll_deregister(ifp);
638 #endif
639
640         IGB_CORE_LOCK(adapter);
641         adapter->in_detach = 1;
642         igb_stop(adapter);
643         IGB_CORE_UNLOCK(adapter);
644
645         e1000_phy_hw_reset(&adapter->hw);
646
647         /* Give control back to firmware */
648         igb_release_manageability(adapter);
649         igb_release_hw_control(adapter);
650
651         if (adapter->wol) {
652                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
653                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
654                 igb_enable_wakeup(dev);
655         }
656
657         /* Unregister VLAN events */
658         if (adapter->vlan_attach != NULL)
659                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
660         if (adapter->vlan_detach != NULL)
661                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
662
663         ether_ifdetach(adapter->ifp);
664
665         callout_drain(&adapter->timer);
666
667         igb_free_pci_resources(adapter);
668         bus_generic_detach(dev);
669         if_free(ifp);
670
671         igb_free_transmit_structures(adapter);
672         igb_free_receive_structures(adapter);
673
674         IGB_CORE_LOCK_DESTROY(adapter);
675
676         return (0);
677 }
678
679 /*********************************************************************
680  *
681  *  Shutdown entry point
682  *
683  **********************************************************************/
684
685 static int
686 igb_shutdown(device_t dev)
687 {
688         return igb_suspend(dev);
689 }
690
691 /*
692  * Suspend/resume device methods.
693  */
694 static int
695 igb_suspend(device_t dev)
696 {
697         struct adapter *adapter = device_get_softc(dev);
698
699         IGB_CORE_LOCK(adapter);
700
701         igb_stop(adapter);
702
703         igb_release_manageability(adapter);
704         igb_release_hw_control(adapter);
705
706         if (adapter->wol) {
707                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
708                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
709                 igb_enable_wakeup(dev);
710         }
711
712         IGB_CORE_UNLOCK(adapter);
713
714         return bus_generic_suspend(dev);
715 }
716
717 static int
718 igb_resume(device_t dev)
719 {
720         struct adapter *adapter = device_get_softc(dev);
721         struct ifnet *ifp = adapter->ifp;
722
723         IGB_CORE_LOCK(adapter);
724         igb_init_locked(adapter);
725         igb_init_manageability(adapter);
726
727         if ((ifp->if_flags & IFF_UP) &&
728             (ifp->if_drv_flags & IFF_DRV_RUNNING))
729                 igb_start(ifp);
730
731         IGB_CORE_UNLOCK(adapter);
732
733         return bus_generic_resume(dev);
734 }
735
736
737 /*********************************************************************
738  *  Transmit entry point
739  *
740  *  igb_start is called by the stack to initiate a transmit.
741  *  The driver will remain in this routine as long as there are
742  *  packets to transmit and transmit resources are available.
743  *  In case resources are not available stack is notified and
744  *  the packet is requeued.
745  **********************************************************************/
746
747 static void
748 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
749 {
750         struct adapter  *adapter = ifp->if_softc;
751         struct mbuf     *m_head;
752
753         IGB_TX_LOCK_ASSERT(txr);
754
755         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
756             IFF_DRV_RUNNING)
757                 return;
758         if (!adapter->link_active)
759                 return;
760
761         /* Call cleanup if number of TX descriptors low */
762         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
763                 igb_txeof(txr);
764
765         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
766                 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
767                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
768                         break;
769                 }
770                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
771                 if (m_head == NULL)
772                         break;
773                 /*
774                  *  Encapsulation can modify our pointer, and or make it
775                  *  NULL on failure.  In that event, we can't requeue.
776                  */
777                 if (igb_xmit(txr, &m_head)) {
778                         if (m_head == NULL)
779                                 break;
780                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
781                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
782                         break;
783                 }
784
785                 /* Send a copy of the frame to the BPF listener */
786                 ETHER_BPF_MTAP(ifp, m_head);
787
788                 /* Set watchdog on */
789                 txr->watchdog_time = ticks;
790                 txr->watchdog_check = TRUE;
791         }
792 }
793  
794 /*
795  * Legacy TX driver routine, called from the
796  * stack, always uses tx[0], and spins for it.
797  * Should not be used with multiqueue tx
798  */
799 static void
800 igb_start(struct ifnet *ifp)
801 {
802         struct adapter  *adapter = ifp->if_softc;
803         struct tx_ring  *txr = adapter->tx_rings;
804
805         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
806                 IGB_TX_LOCK(txr);
807                 igb_start_locked(txr, ifp);
808                 IGB_TX_UNLOCK(txr);
809         }
810         return;
811 }
812
813 #if __FreeBSD_version >= 800000
814 /*
815 ** Multiqueue Transmit driver
816 **
817 */
818 static int
819 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
820 {
821         struct adapter  *adapter = ifp->if_softc;
822         struct tx_ring  *txr;
823         int             i = 0, err = 0;
824
825         /* Which queue to use */
826         if ((m->m_flags & M_FLOWID) != 0)
827                 i = m->m_pkthdr.flowid % adapter->num_queues;
828
829         txr = &adapter->tx_rings[i];
830
831         if (IGB_TX_TRYLOCK(txr)) {
832                 err = igb_mq_start_locked(ifp, txr, m);
833                 IGB_TX_UNLOCK(txr);
834         } else
835                 err = drbr_enqueue(ifp, txr->br, m);
836
837         return (err);
838 }
839
840 static int
841 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
842 {
843         struct adapter  *adapter = txr->adapter;
844         struct mbuf     *next;
845         int             err = 0, enq;
846
847         IGB_TX_LOCK_ASSERT(txr);
848
849         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
850             IFF_DRV_RUNNING || adapter->link_active == 0) {
851                 if (m != NULL)
852                         err = drbr_enqueue(ifp, txr->br, m);
853                 return (err);
854         }
855
856         /* Call cleanup if number of TX descriptors low */
857         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
858                 igb_txeof(txr);
859
860         enq = 0;
861         if (m == NULL) {
862                 next = drbr_dequeue(ifp, txr->br);
863         } else if (drbr_needs_enqueue(ifp, txr->br)) {
864                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
865                         return (err);
866                 next = drbr_dequeue(ifp, txr->br);
867         } else
868                 next = m;
869
870         /* Process the queue */
871         while (next != NULL) {
872                 if ((err = igb_xmit(txr, &next)) != 0) {
873                         if (next != NULL)
874                                 err = drbr_enqueue(ifp, txr->br, next);
875                         break;
876                 }
877                 enq++;
878                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
879                 ETHER_BPF_MTAP(ifp, next);
880                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
881                         break;
882                 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
883                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
884                         break;
885                 }
886                 next = drbr_dequeue(ifp, txr->br);
887         }
888         if (enq > 0) {
889                 /* Set the watchdog */
890                 txr->watchdog_check = TRUE;
891                 txr->watchdog_time = ticks;
892         }
893         return (err);
894 }
895
896 /*
897 ** Flush all ring buffers
898 */
899 static void
900 igb_qflush(struct ifnet *ifp)
901 {
902         struct adapter  *adapter = ifp->if_softc;
903         struct tx_ring  *txr = adapter->tx_rings;
904         struct mbuf     *m;
905
906         for (int i = 0; i < adapter->num_queues; i++, txr++) {
907                 IGB_TX_LOCK(txr);
908                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
909                         m_freem(m);
910                 IGB_TX_UNLOCK(txr);
911         }
912         if_qflush(ifp);
913 }
914 #endif /* __FreeBSD_version >= 800000 */
915
916 /*********************************************************************
917  *  Ioctl entry point
918  *
919  *  igb_ioctl is called when the user wants to configure the
920  *  interface.
921  *
922  *  return 0 on success, positive on failure
923  **********************************************************************/
924
925 static int
926 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
927 {
928         struct adapter  *adapter = ifp->if_softc;
929         struct ifreq *ifr = (struct ifreq *)data;
930 #ifdef INET
931         struct ifaddr *ifa = (struct ifaddr *)data;
932 #endif
933         int error = 0;
934
935         if (adapter->in_detach)
936                 return (error);
937
938         switch (command) {
939         case SIOCSIFADDR:
940 #ifdef INET
941                 if (ifa->ifa_addr->sa_family == AF_INET) {
942                         /*
943                          * XXX
944                          * Since resetting hardware takes a very long time
945                          * and results in link renegotiation we only
946                          * initialize the hardware only when it is absolutely
947                          * required.
948                          */
949                         ifp->if_flags |= IFF_UP;
950                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
951                                 IGB_CORE_LOCK(adapter);
952                                 igb_init_locked(adapter);
953                                 IGB_CORE_UNLOCK(adapter);
954                         }
955                         if (!(ifp->if_flags & IFF_NOARP))
956                                 arp_ifinit(ifp, ifa);
957                 } else
958 #endif
959                         error = ether_ioctl(ifp, command, data);
960                 break;
961         case SIOCSIFMTU:
962             {
963                 int max_frame_size;
964
965                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
966
967                 IGB_CORE_LOCK(adapter);
968                 max_frame_size = 9234;
969                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
970                     ETHER_CRC_LEN) {
971                         IGB_CORE_UNLOCK(adapter);
972                         error = EINVAL;
973                         break;
974                 }
975
976                 ifp->if_mtu = ifr->ifr_mtu;
977                 adapter->max_frame_size =
978                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
979                 igb_init_locked(adapter);
980                 IGB_CORE_UNLOCK(adapter);
981                 break;
982             }
983         case SIOCSIFFLAGS:
984                 IOCTL_DEBUGOUT("ioctl rcv'd:\
985                     SIOCSIFFLAGS (Set Interface Flags)");
986                 IGB_CORE_LOCK(adapter);
987                 if (ifp->if_flags & IFF_UP) {
988                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
989                                 if ((ifp->if_flags ^ adapter->if_flags) &
990                                     (IFF_PROMISC | IFF_ALLMULTI)) {
991                                         igb_disable_promisc(adapter);
992                                         igb_set_promisc(adapter);
993                                 }
994                         } else
995                                 igb_init_locked(adapter);
996                 } else
997                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
998                                 igb_stop(adapter);
999                 adapter->if_flags = ifp->if_flags;
1000                 IGB_CORE_UNLOCK(adapter);
1001                 break;
1002         case SIOCADDMULTI:
1003         case SIOCDELMULTI:
1004                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1005                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1006                         IGB_CORE_LOCK(adapter);
1007                         igb_disable_intr(adapter);
1008                         igb_set_multi(adapter);
1009 #ifdef DEVICE_POLLING
1010                         if (!(ifp->if_capenable & IFCAP_POLLING))
1011 #endif
1012                                 igb_enable_intr(adapter);
1013                         IGB_CORE_UNLOCK(adapter);
1014                 }
1015                 break;
1016         case SIOCSIFMEDIA:
1017                 /* Check SOL/IDER usage */
1018                 IGB_CORE_LOCK(adapter);
1019                 if (e1000_check_reset_block(&adapter->hw)) {
1020                         IGB_CORE_UNLOCK(adapter);
1021                         device_printf(adapter->dev, "Media change is"
1022                             " blocked due to SOL/IDER session.\n");
1023                         break;
1024                 }
1025                 IGB_CORE_UNLOCK(adapter);
1026         case SIOCGIFMEDIA:
1027                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1028                     SIOCxIFMEDIA (Get/Set Interface Media)");
1029                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1030                 break;
1031         case SIOCSIFCAP:
1032             {
1033                 int mask, reinit;
1034
1035                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1036                 reinit = 0;
1037                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1038 #ifdef DEVICE_POLLING
1039                 if (mask & IFCAP_POLLING) {
1040                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1041                                 error = ether_poll_register(igb_poll, ifp);
1042                                 if (error)
1043                                         return (error);
1044                                 IGB_CORE_LOCK(adapter);
1045                                 igb_disable_intr(adapter);
1046                                 ifp->if_capenable |= IFCAP_POLLING;
1047                                 IGB_CORE_UNLOCK(adapter);
1048                         } else {
1049                                 error = ether_poll_deregister(ifp);
1050                                 /* Enable interrupt even in error case */
1051                                 IGB_CORE_LOCK(adapter);
1052                                 igb_enable_intr(adapter);
1053                                 ifp->if_capenable &= ~IFCAP_POLLING;
1054                                 IGB_CORE_UNLOCK(adapter);
1055                         }
1056                 }
1057 #endif
1058                 if (mask & IFCAP_HWCSUM) {
1059                         ifp->if_capenable ^= IFCAP_HWCSUM;
1060                         reinit = 1;
1061                 }
1062                 if (mask & IFCAP_TSO4) {
1063                         ifp->if_capenable ^= IFCAP_TSO4;
1064                         reinit = 1;
1065                 }
1066                 if (mask & IFCAP_VLAN_HWTAGGING) {
1067                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1068                         reinit = 1;
1069                 }
1070                 if (mask & IFCAP_VLAN_HWFILTER) {
1071                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1072                         reinit = 1;
1073                 }
1074                 if (mask & IFCAP_LRO) {
1075                         ifp->if_capenable ^= IFCAP_LRO;
1076                         reinit = 1;
1077                 }
1078                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1079                         igb_init(adapter);
1080                 VLAN_CAPABILITIES(ifp);
1081                 break;
1082             }
1083
1084         default:
1085                 error = ether_ioctl(ifp, command, data);
1086                 break;
1087         }
1088
1089         return (error);
1090 }
1091
1092
1093 /*********************************************************************
1094  *  Init entry point
1095  *
1096  *  This routine is used in two ways. It is used by the stack as
1097  *  init entry point in network interface structure. It is also used
1098  *  by the driver as a hw/sw initialization routine to get to a
1099  *  consistent state.
1100  *
1101  *  return 0 on success, positive on failure
1102  **********************************************************************/
1103
1104 static void
1105 igb_init_locked(struct adapter *adapter)
1106 {
1107         struct ifnet    *ifp = adapter->ifp;
1108         device_t        dev = adapter->dev;
1109
1110         INIT_DEBUGOUT("igb_init: begin");
1111
1112         IGB_CORE_LOCK_ASSERT(adapter);
1113
1114         igb_disable_intr(adapter);
1115         callout_stop(&adapter->timer);
1116
1117         /* Get the latest mac address, User can use a LAA */
1118         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1119               ETHER_ADDR_LEN);
1120
1121         /* Put the address into the Receive Address Array */
1122         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1123
1124         igb_reset(adapter);
1125         igb_update_link_status(adapter);
1126
1127         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1128
1129         /* Use real VLAN Filter support? */
1130         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1131                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1132                         /* Use real VLAN Filter support */
1133                         igb_setup_vlan_hw_support(adapter);
1134                 else {
1135                         u32 ctrl;
1136                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1137                         ctrl |= E1000_CTRL_VME;
1138                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1139                 }
1140         }
1141                                 
1142         /* Set hardware offload abilities */
1143         ifp->if_hwassist = 0;
1144         if (ifp->if_capenable & IFCAP_TXCSUM) {
1145                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1146 #if __FreeBSD_version >= 800000
1147                 if (adapter->hw.mac.type == e1000_82576)
1148                         ifp->if_hwassist |= CSUM_SCTP;
1149 #endif
1150         }
1151
1152         if (ifp->if_capenable & IFCAP_TSO4)
1153                 ifp->if_hwassist |= CSUM_TSO;
1154
1155         /* Configure for OS presence */
1156         igb_init_manageability(adapter);
1157
1158         /* Prepare transmit descriptors and buffers */
1159         igb_setup_transmit_structures(adapter);
1160         igb_initialize_transmit_units(adapter);
1161
1162         /* Setup Multicast table */
1163         igb_set_multi(adapter);
1164
1165         /*
1166         ** Figure out the desired mbuf pool
1167         ** for doing jumbo/packetsplit
1168         */
1169         if (ifp->if_mtu > ETHERMTU)
1170                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1171         else
1172                 adapter->rx_mbuf_sz = MCLBYTES;
1173
1174         /* Prepare receive descriptors and buffers */
1175         if (igb_setup_receive_structures(adapter)) {
1176                 device_printf(dev, "Could not setup receive structures\n");
1177                 return;
1178         }
1179         igb_initialize_receive_units(adapter);
1180
1181         /* Don't lose promiscuous settings */
1182         igb_set_promisc(adapter);
1183
1184         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1185         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1186
1187         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1188         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1189
1190         if (adapter->msix > 1) /* Set up queue routing */
1191                 igb_configure_queues(adapter);
1192
1193         /* Set up VLAN tag offload and filter */
1194         igb_setup_vlan_hw_support(adapter);
1195
1196         /* this clears any pending interrupts */
1197         E1000_READ_REG(&adapter->hw, E1000_ICR);
1198 #ifdef DEVICE_POLLING
1199         /*
1200          * Only enable interrupts if we are not polling, make sure
1201          * they are off otherwise.
1202          */
1203         if (ifp->if_capenable & IFCAP_POLLING)
1204                 igb_disable_intr(adapter);
1205         else
1206 #endif /* DEVICE_POLLING */
1207         {
1208         igb_enable_intr(adapter);
1209         E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1210         }
1211
1212         /* Don't reset the phy next time init gets called */
1213         adapter->hw.phy.reset_disable = TRUE;
1214 }
1215
1216 static void
1217 igb_init(void *arg)
1218 {
1219         struct adapter *adapter = arg;
1220
1221         IGB_CORE_LOCK(adapter);
1222         igb_init_locked(adapter);
1223         IGB_CORE_UNLOCK(adapter);
1224 }
1225
1226
1227 static void
1228 igb_handle_rxtx(void *context, int pending)
1229 {
1230         struct igb_queue        *que = context;
1231         struct adapter          *adapter = que->adapter;
1232         struct tx_ring          *txr = adapter->tx_rings;
1233         struct ifnet            *ifp;
1234
1235         ifp = adapter->ifp;
1236
1237         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1238                 if (igb_rxeof(que, adapter->rx_process_limit))
1239                         taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1240                 IGB_TX_LOCK(txr);
1241                 igb_txeof(txr);
1242
1243 #if __FreeBSD_version >= 800000
1244                 if (!drbr_empty(ifp, txr->br))
1245                         igb_mq_start_locked(ifp, txr, NULL);
1246 #else
1247                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1248                         igb_start_locked(txr, ifp);
1249 #endif
1250                 IGB_TX_UNLOCK(txr);
1251         }
1252
1253         igb_enable_intr(adapter);
1254 }
1255
1256 static void
1257 igb_handle_que(void *context, int pending)
1258 {
1259         struct igb_queue *que = context;
1260         struct adapter *adapter = que->adapter;
1261         struct tx_ring *txr = que->txr;
1262         struct ifnet    *ifp = adapter->ifp;
1263         bool            more;
1264
1265         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1266                 more = igb_rxeof(que, -1);
1267
1268                 IGB_TX_LOCK(txr);
1269                 igb_txeof(txr);
1270 #if __FreeBSD_version >= 800000
1271                 igb_mq_start_locked(ifp, txr, NULL);
1272 #else
1273                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1274                         igb_start_locked(txr, ifp);
1275 #endif
1276                 IGB_TX_UNLOCK(txr);
1277                 if (more) {
1278                         taskqueue_enqueue(que->tq, &que->que_task);
1279                         return;
1280                 }
1281         }
1282
1283         /* Reenable this interrupt */
1284 #ifdef DEVICE_POLLING
1285         if (!(ifp->if_capenable & IFCAP_POLLING))
1286 #endif
1287         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1288 }
1289
1290 /* Deal with link in a sleepable context */
1291 static void
1292 igb_handle_link(void *context, int pending)
1293 {
1294         struct adapter *adapter = context;
1295
1296         adapter->hw.mac.get_link_status = 1;
1297         igb_update_link_status(adapter);
1298 }
1299
1300 /*********************************************************************
1301  *
1302  *  MSI/Legacy Deferred
1303  *  Interrupt Service routine  
1304  *
1305  *********************************************************************/
1306 static int
1307 igb_irq_fast(void *arg)
1308 {
1309         struct adapter  *adapter = arg;
1310         uint32_t        reg_icr;
1311
1312
1313         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1314
1315         /* Hot eject?  */
1316         if (reg_icr == 0xffffffff)
1317                 return FILTER_STRAY;
1318
1319         /* Definitely not our interrupt.  */
1320         if (reg_icr == 0x0)
1321                 return FILTER_STRAY;
1322
1323         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1324                 return FILTER_STRAY;
1325
1326         /*
1327          * Mask interrupts until the taskqueue is finished running.  This is
1328          * cheap, just assume that it is needed.  This also works around the
1329          * MSI message reordering errata on certain systems.
1330          */
1331         igb_disable_intr(adapter);
1332         taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1333
1334         /* Link status change */
1335         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1336                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1337
1338         if (reg_icr & E1000_ICR_RXO)
1339                 adapter->rx_overruns++;
1340         return FILTER_HANDLED;
1341 }
1342
1343 #ifdef DEVICE_POLLING
1344 /*********************************************************************
1345  *
1346  *  Legacy polling routine : if using this code you MUST be sure that
1347  *  multiqueue is not defined, ie, set igb_num_queues to 1.
1348  *
1349  *********************************************************************/
1350 #if __FreeBSD_version >= 800000
1351 #define POLL_RETURN_COUNT(a) (a)
1352 static int
1353 #else
1354 #define POLL_RETURN_COUNT(a)
1355 static void
1356 #endif
1357 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1358 {
1359         struct adapter          *adapter = ifp->if_softc;
1360         struct igb_queue        *que = adapter->queues;
1361         struct tx_ring          *txr = adapter->tx_rings;
1362         u32                     reg_icr, rx_done = 0;
1363         u32                     loop = IGB_MAX_LOOP;
1364         bool                    more;
1365
1366         IGB_CORE_LOCK(adapter);
1367         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1368                 IGB_CORE_UNLOCK(adapter);
1369                 return POLL_RETURN_COUNT(rx_done);
1370         }
1371
1372         if (cmd == POLL_AND_CHECK_STATUS) {
1373                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1374                 /* Link status change */
1375                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1376                         taskqueue_enqueue(adapter->tq, &adapter->link_task);
1377
1378                 if (reg_icr & E1000_ICR_RXO)
1379                         adapter->rx_overruns++;
1380         }
1381         IGB_CORE_UNLOCK(adapter);
1382
1383         /* TODO: rx_count */
1384         rx_done = igb_rxeof(que, count) ? 1 : 0;
1385
1386         IGB_TX_LOCK(txr);
1387         do {
1388                 more = igb_txeof(txr);
1389         } while (loop-- && more);
1390 #if __FreeBSD_version >= 800000
1391         if (!drbr_empty(ifp, txr->br))
1392                 igb_mq_start_locked(ifp, txr, NULL);
1393 #else
1394         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1395                 igb_start_locked(txr, ifp);
1396 #endif
1397         IGB_TX_UNLOCK(txr);
1398         return POLL_RETURN_COUNT(rx_done);
1399 }
1400 #endif /* DEVICE_POLLING */
1401
1402 /*********************************************************************
1403  *
1404  *  MSIX TX Interrupt Service routine
1405  *
1406  **********************************************************************/
1407 static void
1408 igb_msix_que(void *arg)
1409 {
1410         struct igb_queue *que = arg;
1411         struct adapter *adapter = que->adapter;
1412         struct tx_ring *txr = que->txr;
1413         struct rx_ring *rxr = que->rxr;
1414         u32             newitr = 0;
1415         bool            more_tx, more_rx;
1416
1417         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1418         ++que->irqs;
1419
1420         IGB_TX_LOCK(txr);
1421         more_tx = igb_txeof(txr);
1422         IGB_TX_UNLOCK(txr);
1423
1424         more_rx = igb_rxeof(que, adapter->rx_process_limit);
1425
1426         if (igb_enable_aim == FALSE)
1427                 goto no_calc;
1428         /*
1429         ** Do Adaptive Interrupt Moderation:
1430         **  - Write out last calculated setting
1431         **  - Calculate based on average size over
1432         **    the last interval.
1433         */
1434         if (que->eitr_setting)
1435                 E1000_WRITE_REG(&adapter->hw,
1436                     E1000_EITR(que->msix), que->eitr_setting);
1437  
1438         que->eitr_setting = 0;
1439
1440         /* Idle, do nothing */
1441         if ((txr->bytes == 0) && (rxr->bytes == 0))
1442                 goto no_calc;
1443                                 
1444         /* Used half Default if sub-gig */
1445         if (adapter->link_speed != 1000)
1446                 newitr = IGB_DEFAULT_ITR / 2;
1447         else {
1448                 if ((txr->bytes) && (txr->packets))
1449                         newitr = txr->bytes/txr->packets;
1450                 if ((rxr->bytes) && (rxr->packets))
1451                         newitr = max(newitr,
1452                             (rxr->bytes / rxr->packets));
1453                 newitr += 24; /* account for hardware frame, crc */
1454                 /* set an upper boundary */
1455                 newitr = min(newitr, 3000);
1456                 /* Be nice to the mid range */
1457                 if ((newitr > 300) && (newitr < 1200))
1458                         newitr = (newitr / 3);
1459                 else
1460                         newitr = (newitr / 2);
1461         }
1462         newitr &= 0x7FFC;  /* Mask invalid bits */
1463         if (adapter->hw.mac.type == e1000_82575)
1464                 newitr |= newitr << 16;
1465         else
1466                 newitr |= E1000_EITR_CNT_IGNR;
1467                  
1468         /* save for next interrupt */
1469         que->eitr_setting = newitr;
1470
1471         /* Reset state */
1472         txr->bytes = 0;
1473         txr->packets = 0;
1474         rxr->bytes = 0;
1475         rxr->packets = 0;
1476
1477 no_calc:
1478         /* Schedule a clean task if needed*/
1479         if (more_tx || more_rx) 
1480                 taskqueue_enqueue(que->tq, &que->que_task);
1481         else
1482                 /* Reenable this interrupt */
1483                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1484         return;
1485 }
1486
1487
1488 /*********************************************************************
1489  *
1490  *  MSIX Link Interrupt Service routine
1491  *
1492  **********************************************************************/
1493
1494 static void
1495 igb_msix_link(void *arg)
1496 {
1497         struct adapter  *adapter = arg;
1498         u32             icr;
1499
1500         ++adapter->link_irq;
1501         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1502         if (!(icr & E1000_ICR_LSC))
1503                 goto spurious;
1504         taskqueue_enqueue(adapter->tq, &adapter->link_task);
1505
1506 spurious:
1507         /* Rearm */
1508         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1509         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1510         return;
1511 }
1512
1513
1514 /*********************************************************************
1515  *
1516  *  Media Ioctl callback
1517  *
1518  *  This routine is called whenever the user queries the status of
1519  *  the interface using ifconfig.
1520  *
1521  **********************************************************************/
1522 static void
1523 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1524 {
1525         struct adapter *adapter = ifp->if_softc;
1526         u_char fiber_type = IFM_1000_SX;
1527
1528         INIT_DEBUGOUT("igb_media_status: begin");
1529
1530         IGB_CORE_LOCK(adapter);
1531         igb_update_link_status(adapter);
1532
1533         ifmr->ifm_status = IFM_AVALID;
1534         ifmr->ifm_active = IFM_ETHER;
1535
1536         if (!adapter->link_active) {
1537                 IGB_CORE_UNLOCK(adapter);
1538                 return;
1539         }
1540
1541         ifmr->ifm_status |= IFM_ACTIVE;
1542
1543         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1544             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1545                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1546         else {
1547                 switch (adapter->link_speed) {
1548                 case 10:
1549                         ifmr->ifm_active |= IFM_10_T;
1550                         break;
1551                 case 100:
1552                         ifmr->ifm_active |= IFM_100_TX;
1553                         break;
1554                 case 1000:
1555                         ifmr->ifm_active |= IFM_1000_T;
1556                         break;
1557                 }
1558                 if (adapter->link_duplex == FULL_DUPLEX)
1559                         ifmr->ifm_active |= IFM_FDX;
1560                 else
1561                         ifmr->ifm_active |= IFM_HDX;
1562         }
1563         IGB_CORE_UNLOCK(adapter);
1564 }
1565
1566 /*********************************************************************
1567  *
1568  *  Media Ioctl callback
1569  *
1570  *  This routine is called when the user changes speed/duplex using
1571  *  media/mediopt option with ifconfig.
1572  *
1573  **********************************************************************/
1574 static int
1575 igb_media_change(struct ifnet *ifp)
1576 {
1577         struct adapter *adapter = ifp->if_softc;
1578         struct ifmedia  *ifm = &adapter->media;
1579
1580         INIT_DEBUGOUT("igb_media_change: begin");
1581
1582         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1583                 return (EINVAL);
1584
1585         IGB_CORE_LOCK(adapter);
1586         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1587         case IFM_AUTO:
1588                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1589                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1590                 break;
1591         case IFM_1000_LX:
1592         case IFM_1000_SX:
1593         case IFM_1000_T:
1594                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1595                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1596                 break;
1597         case IFM_100_TX:
1598                 adapter->hw.mac.autoneg = FALSE;
1599                 adapter->hw.phy.autoneg_advertised = 0;
1600                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1601                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1602                 else
1603                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1604                 break;
1605         case IFM_10_T:
1606                 adapter->hw.mac.autoneg = FALSE;
1607                 adapter->hw.phy.autoneg_advertised = 0;
1608                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1609                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1610                 else
1611                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1612                 break;
1613         default:
1614                 device_printf(adapter->dev, "Unsupported media type\n");
1615         }
1616
1617         /* As the speed/duplex settings my have changed we need to
1618          * reset the PHY.
1619          */
1620         adapter->hw.phy.reset_disable = FALSE;
1621
1622         igb_init_locked(adapter);
1623         IGB_CORE_UNLOCK(adapter);
1624
1625         return (0);
1626 }
1627
1628
1629 /*********************************************************************
1630  *
1631  *  This routine maps the mbufs to Advanced TX descriptors.
1632  *  used by the 82575 adapter.
1633  *  
1634  **********************************************************************/
1635
1636 static int
1637 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1638 {
1639         struct adapter          *adapter = txr->adapter;
1640         bus_dma_segment_t       segs[IGB_MAX_SCATTER];
1641         bus_dmamap_t            map;
1642         struct igb_tx_buffer    *tx_buffer, *tx_buffer_mapped;
1643         union e1000_adv_tx_desc *txd = NULL;
1644         struct mbuf             *m_head;
1645         u32                     olinfo_status = 0, cmd_type_len = 0;
1646         int                     nsegs, i, j, error, first, last = 0;
1647         u32                     hdrlen = 0;
1648
1649         m_head = *m_headp;
1650
1651
1652         /* Set basic descriptor constants */
1653         cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1654         cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1655         if (m_head->m_flags & M_VLANTAG)
1656                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1657
1658         /*
1659          * Force a cleanup if number of TX descriptors
1660          * available hits the threshold
1661          */
1662         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1663                 igb_txeof(txr);
1664                 /* Now do we at least have a minimal? */
1665                 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1666                         txr->no_desc_avail++;
1667                         return (ENOBUFS);
1668                 }
1669         }
1670
1671         /*
1672          * Map the packet for DMA.
1673          *
1674          * Capture the first descriptor index,
1675          * this descriptor will have the index
1676          * of the EOP which is the only one that
1677          * now gets a DONE bit writeback.
1678          */
1679         first = txr->next_avail_desc;
1680         tx_buffer = &txr->tx_buffers[first];
1681         tx_buffer_mapped = tx_buffer;
1682         map = tx_buffer->map;
1683
1684         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1685             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1686
1687         if (error == EFBIG) {
1688                 struct mbuf *m;
1689
1690                 m = m_defrag(*m_headp, M_DONTWAIT);
1691                 if (m == NULL) {
1692                         adapter->mbuf_defrag_failed++;
1693                         m_freem(*m_headp);
1694                         *m_headp = NULL;
1695                         return (ENOBUFS);
1696                 }
1697                 *m_headp = m;
1698
1699                 /* Try it again */
1700                 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1701                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1702
1703                 if (error == ENOMEM) {
1704                         adapter->no_tx_dma_setup++;
1705                         return (error);
1706                 } else if (error != 0) {
1707                         adapter->no_tx_dma_setup++;
1708                         m_freem(*m_headp);
1709                         *m_headp = NULL;
1710                         return (error);
1711                 }
1712         } else if (error == ENOMEM) {
1713                 adapter->no_tx_dma_setup++;
1714                 return (error);
1715         } else if (error != 0) {
1716                 adapter->no_tx_dma_setup++;
1717                 m_freem(*m_headp);
1718                 *m_headp = NULL;
1719                 return (error);
1720         }
1721
1722         /* Check again to be sure we have enough descriptors */
1723         if (nsegs > (txr->tx_avail - 2)) {
1724                 txr->no_desc_avail++;
1725                 bus_dmamap_unload(txr->txtag, map);
1726                 return (ENOBUFS);
1727         }
1728         m_head = *m_headp;
1729
1730         /*
1731          * Set up the context descriptor:
1732          * used when any hardware offload is done.
1733          * This includes CSUM, VLAN, and TSO. It
1734          * will use the first descriptor.
1735          */
1736         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1737                 if (igb_tso_setup(txr, m_head, &hdrlen)) {
1738                         cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1739                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1740                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1741                 } else
1742                         return (ENXIO); 
1743         } else if (igb_tx_ctx_setup(txr, m_head))
1744                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1745
1746         /* Calculate payload length */
1747         olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1748             << E1000_ADVTXD_PAYLEN_SHIFT);
1749
1750         /* 82575 needs the queue index added */
1751         if (adapter->hw.mac.type == e1000_82575)
1752                 olinfo_status |= txr->me << 4;
1753
1754         /* Set up our transmit descriptors */
1755         i = txr->next_avail_desc;
1756         for (j = 0; j < nsegs; j++) {
1757                 bus_size_t seg_len;
1758                 bus_addr_t seg_addr;
1759
1760                 tx_buffer = &txr->tx_buffers[i];
1761                 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1762                 seg_addr = segs[j].ds_addr;
1763                 seg_len  = segs[j].ds_len;
1764
1765                 txd->read.buffer_addr = htole64(seg_addr);
1766                 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1767                 txd->read.olinfo_status = htole32(olinfo_status);
1768                 last = i;
1769                 if (++i == adapter->num_tx_desc)
1770                         i = 0;
1771                 tx_buffer->m_head = NULL;
1772                 tx_buffer->next_eop = -1;
1773         }
1774
1775         txr->next_avail_desc = i;
1776         txr->tx_avail -= nsegs;
1777
1778         tx_buffer->m_head = m_head;
1779         tx_buffer_mapped->map = tx_buffer->map;
1780         tx_buffer->map = map;
1781         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1782
1783         /*
1784          * Last Descriptor of Packet
1785          * needs End Of Packet (EOP)
1786          * and Report Status (RS)
1787          */
1788         txd->read.cmd_type_len |=
1789             htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1790         /*
1791          * Keep track in the first buffer which
1792          * descriptor will be written back
1793          */
1794         tx_buffer = &txr->tx_buffers[first];
1795         tx_buffer->next_eop = last;
1796         txr->watchdog_time = ticks;
1797
1798         /*
1799          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1800          * that this frame is available to transmit.
1801          */
1802         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1803             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1804         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1805         ++txr->tx_packets;
1806
1807         return (0);
1808
1809 }
1810
1811 static void
1812 igb_set_promisc(struct adapter *adapter)
1813 {
1814         struct ifnet    *ifp = adapter->ifp;
1815         uint32_t        reg_rctl;
1816
1817         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1818
1819         if (ifp->if_flags & IFF_PROMISC) {
1820                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1821                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1822         } else if (ifp->if_flags & IFF_ALLMULTI) {
1823                 reg_rctl |= E1000_RCTL_MPE;
1824                 reg_rctl &= ~E1000_RCTL_UPE;
1825                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1826         }
1827 }
1828
1829 static void
1830 igb_disable_promisc(struct adapter *adapter)
1831 {
1832         uint32_t        reg_rctl;
1833
1834         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1835
1836         reg_rctl &=  (~E1000_RCTL_UPE);
1837         reg_rctl &=  (~E1000_RCTL_MPE);
1838         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1839 }
1840
1841
1842 /*********************************************************************
1843  *  Multicast Update
1844  *
1845  *  This routine is called whenever multicast address list is updated.
1846  *
1847  **********************************************************************/
1848
1849 static void
1850 igb_set_multi(struct adapter *adapter)
1851 {
1852         struct ifnet    *ifp = adapter->ifp;
1853         struct ifmultiaddr *ifma;
1854         u32 reg_rctl = 0;
1855         u8  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1856
1857         int mcnt = 0;
1858
1859         IOCTL_DEBUGOUT("igb_set_multi: begin");
1860
1861 #if __FreeBSD_version < 800000
1862         IF_ADDR_LOCK(ifp);
1863 #else
1864         if_maddr_rlock(ifp);
1865 #endif
1866         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1867                 if (ifma->ifma_addr->sa_family != AF_LINK)
1868                         continue;
1869
1870                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1871                         break;
1872
1873                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1874                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1875                 mcnt++;
1876         }
1877 #if __FreeBSD_version < 800000
1878         IF_ADDR_UNLOCK(ifp);
1879 #else
1880         if_maddr_runlock(ifp);
1881 #endif
1882
1883         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1884                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1885                 reg_rctl |= E1000_RCTL_MPE;
1886                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1887         } else
1888                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1889 }
1890
1891
1892 /*********************************************************************
1893  *  Timer routine:
1894  *      This routine checks for link status,
1895  *      updates statistics, and does the watchdog.
1896  *
1897  **********************************************************************/
1898
1899 static void
1900 igb_local_timer(void *arg)
1901 {
1902         struct adapter          *adapter = arg;
1903         struct ifnet            *ifp = adapter->ifp;
1904         device_t                dev = adapter->dev;
1905         struct tx_ring          *txr = adapter->tx_rings;
1906
1907
1908         IGB_CORE_LOCK_ASSERT(adapter);
1909
1910         igb_update_link_status(adapter);
1911         igb_update_stats_counters(adapter);
1912
1913         if (igb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1914                 igb_print_hw_stats(adapter);
1915
1916         /*
1917         ** Watchdog: check for time since any descriptor was cleaned
1918         */
1919         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1920                 if (txr->watchdog_check == FALSE)
1921                         continue;
1922                 if ((ticks - txr->watchdog_time) > IGB_WATCHDOG)
1923                         goto timeout;
1924         }
1925
1926         /* Trigger an RX interrupt on all queues */
1927 #ifdef DEVICE_POLLING
1928         if (!(ifp->if_capenable & IFCAP_POLLING))
1929 #endif
1930         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->rx_mask);
1931         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1932         return;
1933
1934 timeout:
1935         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1936         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1937             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
1938             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
1939         device_printf(dev,"TX(%d) desc avail = %d,"
1940             "Next TX to Clean = %d\n",
1941             txr->me, txr->tx_avail, txr->next_to_clean);
1942         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1943         adapter->watchdog_events++;
1944         igb_init_locked(adapter);
1945 }
1946
1947 static void
1948 igb_update_link_status(struct adapter *adapter)
1949 {
1950         struct e1000_hw *hw = &adapter->hw;
1951         struct ifnet *ifp = adapter->ifp;
1952         device_t dev = adapter->dev;
1953         struct tx_ring *txr = adapter->tx_rings;
1954         u32 link_check = 0;
1955
1956         /* Get the cached link value or read for real */
1957         switch (hw->phy.media_type) {
1958         case e1000_media_type_copper:
1959                 if (hw->mac.get_link_status) {
1960                         /* Do the work to read phy */
1961                         e1000_check_for_link(hw);
1962                         link_check = !hw->mac.get_link_status;
1963                 } else
1964                         link_check = TRUE;
1965                 break;
1966         case e1000_media_type_fiber:
1967                 e1000_check_for_link(hw);
1968                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1969                                  E1000_STATUS_LU);
1970                 break;
1971         case e1000_media_type_internal_serdes:
1972                 e1000_check_for_link(hw);
1973                 link_check = adapter->hw.mac.serdes_has_link;
1974                 break;
1975         default:
1976         case e1000_media_type_unknown:
1977                 break;
1978         }
1979
1980         /* Now we check if a transition has happened */
1981         if (link_check && (adapter->link_active == 0)) {
1982                 e1000_get_speed_and_duplex(&adapter->hw, 
1983                     &adapter->link_speed, &adapter->link_duplex);
1984                 if (bootverbose)
1985                         device_printf(dev, "Link is up %d Mbps %s\n",
1986                             adapter->link_speed,
1987                             ((adapter->link_duplex == FULL_DUPLEX) ?
1988                             "Full Duplex" : "Half Duplex"));
1989                 adapter->link_active = 1;
1990                 ifp->if_baudrate = adapter->link_speed * 1000000;
1991                 /* This can sleep */
1992                 if_link_state_change(ifp, LINK_STATE_UP);
1993         } else if (!link_check && (adapter->link_active == 1)) {
1994                 ifp->if_baudrate = adapter->link_speed = 0;
1995                 adapter->link_duplex = 0;
1996                 if (bootverbose)
1997                         device_printf(dev, "Link is Down\n");
1998                 adapter->link_active = 0;
1999                 /* This can sleep */
2000                 if_link_state_change(ifp, LINK_STATE_DOWN);
2001                 /* Turn off watchdogs */
2002                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2003                         txr->watchdog_check = FALSE;
2004         }
2005 }
2006
2007 /*********************************************************************
2008  *
2009  *  This routine disables all traffic on the adapter by issuing a
2010  *  global reset on the MAC and deallocates TX/RX buffers.
2011  *
2012  **********************************************************************/
2013
2014 static void
2015 igb_stop(void *arg)
2016 {
2017         struct adapter  *adapter = arg;
2018         struct ifnet    *ifp = adapter->ifp;
2019         struct tx_ring *txr = adapter->tx_rings;
2020
2021         IGB_CORE_LOCK_ASSERT(adapter);
2022
2023         INIT_DEBUGOUT("igb_stop: begin");
2024
2025         igb_disable_intr(adapter);
2026
2027         callout_stop(&adapter->timer);
2028
2029         /* Tell the stack that the interface is no longer active */
2030         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2031
2032         /* Unarm watchdog timer. */
2033         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2034                 IGB_TX_LOCK(txr);
2035                 txr->watchdog_check = FALSE;
2036                 IGB_TX_UNLOCK(txr);
2037         }
2038
2039         e1000_reset_hw(&adapter->hw);
2040         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2041
2042         e1000_led_off(&adapter->hw);
2043         e1000_cleanup_led(&adapter->hw);
2044 }
2045
2046
2047 /*********************************************************************
2048  *
2049  *  Determine hardware revision.
2050  *
2051  **********************************************************************/
2052 static void
2053 igb_identify_hardware(struct adapter *adapter)
2054 {
2055         device_t dev = adapter->dev;
2056
2057         /* Make sure our PCI config space has the necessary stuff set */
2058         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2059         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2060             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2061                 device_printf(dev, "Memory Access and/or Bus Master bits "
2062                     "were not set!\n");
2063                 adapter->hw.bus.pci_cmd_word |=
2064                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2065                 pci_write_config(dev, PCIR_COMMAND,
2066                     adapter->hw.bus.pci_cmd_word, 2);
2067         }
2068
2069         /* Save off the information about this board */
2070         adapter->hw.vendor_id = pci_get_vendor(dev);
2071         adapter->hw.device_id = pci_get_device(dev);
2072         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2073         adapter->hw.subsystem_vendor_id =
2074             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2075         adapter->hw.subsystem_device_id =
2076             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2077
2078         /* Do Shared Code Init and Setup */
2079         if (e1000_set_mac_type(&adapter->hw)) {
2080                 device_printf(dev, "Setup init failure\n");
2081                 return;
2082         }
2083 }
2084
2085 static int
2086 igb_allocate_pci_resources(struct adapter *adapter)
2087 {
2088         device_t        dev = adapter->dev;
2089         int             rid;
2090
2091         rid = PCIR_BAR(0);
2092         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2093             &rid, RF_ACTIVE);
2094         if (adapter->pci_mem == NULL) {
2095                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2096                 return (ENXIO);
2097         }
2098         adapter->osdep.mem_bus_space_tag =
2099             rman_get_bustag(adapter->pci_mem);
2100         adapter->osdep.mem_bus_space_handle =
2101             rman_get_bushandle(adapter->pci_mem);
2102         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2103
2104         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2105
2106         /* This will setup either MSI/X or MSI */
2107         adapter->msix = igb_setup_msix(adapter);
2108         adapter->hw.back = &adapter->osdep;
2109
2110         return (0);
2111 }
2112
2113 /*********************************************************************
2114  *
2115  *  Setup the Legacy or MSI Interrupt handler
2116  *
2117  **********************************************************************/
2118 static int
2119 igb_allocate_legacy(struct adapter *adapter)
2120 {
2121         device_t                dev = adapter->dev;
2122         struct igb_queue        *que = adapter->queues;
2123         int                     error, rid = 0;
2124
2125         /* Turn off all interrupts */
2126         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2127
2128         /* MSI RID is 1 */
2129         if (adapter->msix == 1)
2130                 rid = 1;
2131
2132         /* We allocate a single interrupt resource */
2133         adapter->res = bus_alloc_resource_any(dev,
2134             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2135         if (adapter->res == NULL) {
2136                 device_printf(dev, "Unable to allocate bus resource: "
2137                     "interrupt\n");
2138                 return (ENXIO);
2139         }
2140
2141         /*
2142          * Try allocating a fast interrupt and the associated deferred
2143          * processing contexts.
2144          */
2145         TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, que);
2146         /* Make tasklet for deferred link handling */
2147         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2148         adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2149             taskqueue_thread_enqueue, &adapter->tq);
2150         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2151             device_get_nameunit(adapter->dev));
2152         if ((error = bus_setup_intr(dev, adapter->res,
2153             INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2154             adapter, &adapter->tag)) != 0) {
2155                 device_printf(dev, "Failed to register fast interrupt "
2156                             "handler: %d\n", error);
2157                 taskqueue_free(adapter->tq);
2158                 adapter->tq = NULL;
2159                 return (error);
2160         }
2161
2162         return (0);
2163 }
2164
2165
2166 /*********************************************************************
2167  *
2168  *  Setup the MSIX Queue Interrupt handlers: 
2169  *
2170  **********************************************************************/
2171 static int
2172 igb_allocate_msix(struct adapter *adapter)
2173 {
2174         device_t                dev = adapter->dev;
2175         struct igb_queue        *que = adapter->queues;
2176         int                     error, rid, vector = 0;
2177
2178
2179         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2180                 rid = vector +1;
2181                 que->res = bus_alloc_resource_any(dev,
2182                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2183                 if (que->res == NULL) {
2184                         device_printf(dev,
2185                             "Unable to allocate bus resource: "
2186                             "MSIX Queue Interrupt\n");
2187                         return (ENXIO);
2188                 }
2189                 error = bus_setup_intr(dev, que->res,
2190                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2191                     igb_msix_que, que, &que->tag);
2192                 if (error) {
2193                         que->res = NULL;
2194                         device_printf(dev, "Failed to register Queue handler");
2195                         return (error);
2196                 }
2197                 que->msix = vector;
2198                 if (adapter->hw.mac.type == e1000_82575)
2199                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2200                 else
2201                         que->eims = 1 << vector;
2202                 /*
2203                 ** Bind the msix vector, and thus the
2204                 ** rings to the corresponding cpu.
2205                 */
2206                 if (adapter->num_queues > 1)
2207                         bus_bind_intr(dev, que->res, i);
2208                 /* Make tasklet for deferred handling */
2209                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2210                 que->tq = taskqueue_create_fast("igb_que", M_NOWAIT,
2211                     taskqueue_thread_enqueue, &que->tq);
2212                 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2213                     device_get_nameunit(adapter->dev));
2214         }
2215
2216         /* And Link */
2217         rid = vector + 1;
2218         adapter->res = bus_alloc_resource_any(dev,
2219             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2220         if (adapter->res == NULL) {
2221                 device_printf(dev,
2222                     "Unable to allocate bus resource: "
2223                     "MSIX Link Interrupt\n");
2224                 return (ENXIO);
2225         }
2226         if ((error = bus_setup_intr(dev, adapter->res,
2227             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2228             igb_msix_link, adapter, &adapter->tag)) != 0) {
2229                 device_printf(dev, "Failed to register Link handler");
2230                 return (error);
2231         }
2232         adapter->linkvec = vector;
2233
2234         /* Make tasklet for deferred handling */
2235         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2236         adapter->tq = taskqueue_create_fast("igb_link", M_NOWAIT,
2237             taskqueue_thread_enqueue, &adapter->tq);
2238         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s link",
2239             device_get_nameunit(adapter->dev));
2240
2241         return (0);
2242 }
2243
2244
2245 static void
2246 igb_configure_queues(struct adapter *adapter)
2247 {
2248         struct  e1000_hw        *hw = &adapter->hw;
2249         struct  igb_queue       *que;
2250         u32                     tmp, ivar = 0;
2251         u32                     newitr = IGB_DEFAULT_ITR;
2252
2253         /* First turn on RSS capability */
2254         if (adapter->hw.mac.type > e1000_82575)
2255                 E1000_WRITE_REG(hw, E1000_GPIE,
2256                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2257                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2258
2259         /* Turn on MSIX */
2260         switch (adapter->hw.mac.type) {
2261         case e1000_82580:
2262                 /* RX entries */
2263                 for (int i = 0; i < adapter->num_queues; i++) {
2264                         u32 index = i >> 1;
2265                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2266                         que = &adapter->queues[i];
2267                         if (i & 1) {
2268                                 ivar &= 0xFF00FFFF;
2269                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2270                         } else {
2271                                 ivar &= 0xFFFFFF00;
2272                                 ivar |= que->msix | E1000_IVAR_VALID;
2273                         }
2274                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2275                 }
2276                 /* TX entries */
2277                 for (int i = 0; i < adapter->num_queues; i++) {
2278                         u32 index = i >> 1;
2279                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2280                         que = &adapter->queues[i];
2281                         if (i & 1) {
2282                                 ivar &= 0x00FFFFFF;
2283                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2284                         } else {
2285                                 ivar &= 0xFFFF00FF;
2286                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2287                         }
2288                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2289                         adapter->eims_mask |= que->eims;
2290                 }
2291
2292                 /* And for the link interrupt */
2293                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2294                 adapter->link_mask = 1 << adapter->linkvec;
2295                 adapter->eims_mask |= adapter->link_mask;
2296                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2297                 break;
2298         case e1000_82576:
2299                 /* RX entries */
2300                 for (int i = 0; i < adapter->num_queues; i++) {
2301                         u32 index = i & 0x7; /* Each IVAR has two entries */
2302                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2303                         que = &adapter->queues[i];
2304                         if (i < 8) {
2305                                 ivar &= 0xFFFFFF00;
2306                                 ivar |= que->msix | E1000_IVAR_VALID;
2307                         } else {
2308                                 ivar &= 0xFF00FFFF;
2309                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2310                         }
2311                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2312                         adapter->eims_mask |= que->eims;
2313                 }
2314                 /* TX entries */
2315                 for (int i = 0; i < adapter->num_queues; i++) {
2316                         u32 index = i & 0x7; /* Each IVAR has two entries */
2317                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2318                         que = &adapter->queues[i];
2319                         if (i < 8) {
2320                                 ivar &= 0xFFFF00FF;
2321                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2322                         } else {
2323                                 ivar &= 0x00FFFFFF;
2324                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2325                         }
2326                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2327                         adapter->eims_mask |= que->eims;
2328                 }
2329
2330                 /* And for the link interrupt */
2331                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2332                 adapter->link_mask = 1 << adapter->linkvec;
2333                 adapter->eims_mask |= adapter->link_mask;
2334                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2335                 break;
2336
2337         case e1000_82575:
2338                 /* enable MSI-X support*/
2339                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2340                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2341                 /* Auto-Mask interrupts upon ICR read. */
2342                 tmp |= E1000_CTRL_EXT_EIAME;
2343                 tmp |= E1000_CTRL_EXT_IRCA;
2344                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2345
2346                 /* Queues */
2347                 for (int i = 0; i < adapter->num_queues; i++) {
2348                         que = &adapter->queues[i];
2349                         tmp = E1000_EICR_RX_QUEUE0 << i;
2350                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2351                         que->eims = tmp;
2352                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2353                             i, que->eims);
2354                         adapter->eims_mask |= que->eims;
2355                 }
2356
2357                 /* Link */
2358                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2359                     E1000_EIMS_OTHER);
2360                 adapter->link_mask |= E1000_EIMS_OTHER;
2361                 adapter->eims_mask |= adapter->link_mask;
2362         default:
2363                 break;
2364         }
2365
2366         /* Set the starting interrupt rate */
2367         if (hw->mac.type == e1000_82575)
2368                 newitr |= newitr << 16;
2369         else
2370                 newitr |= E1000_EITR_CNT_IGNR;
2371
2372         for (int i = 0; i < adapter->num_queues; i++) {
2373                 que = &adapter->queues[i];
2374                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2375         }
2376
2377         return;
2378 }
2379
2380
2381 static void
2382 igb_free_pci_resources(struct adapter *adapter)
2383 {
2384         struct          igb_queue *que = adapter->queues;
2385         device_t        dev = adapter->dev;
2386         int             rid;
2387
2388         /*
2389         ** There is a slight possibility of a failure mode
2390         ** in attach that will result in entering this function
2391         ** before interrupt resources have been initialized, and
2392         ** in that case we do not want to execute the loops below
2393         ** We can detect this reliably by the state of the adapter
2394         ** res pointer.
2395         */
2396         if (adapter->res == NULL)
2397                 goto mem;
2398
2399         /*
2400          * First release all the interrupt resources:
2401          */
2402         for (int i = 0; i < adapter->num_queues; i++, que++) {
2403                 rid = que->msix + 1;
2404                 if (que->tag != NULL) {
2405                         bus_teardown_intr(dev, que->res, que->tag);
2406                         que->tag = NULL;
2407                 }
2408                 if (que->res != NULL)
2409                         bus_release_resource(dev,
2410                             SYS_RES_IRQ, rid, que->res);
2411         }
2412
2413         /* Clean the Legacy or Link interrupt last */
2414         if (adapter->linkvec) /* we are doing MSIX */
2415                 rid = adapter->linkvec + 1;
2416         else
2417                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2418
2419         if (adapter->tag != NULL) {
2420                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2421                 adapter->tag = NULL;
2422         }
2423         if (adapter->res != NULL)
2424                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2425
2426 mem:
2427         if (adapter->msix)
2428                 pci_release_msi(dev);
2429
2430         if (adapter->msix_mem != NULL)
2431                 bus_release_resource(dev, SYS_RES_MEMORY,
2432                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2433
2434         if (adapter->pci_mem != NULL)
2435                 bus_release_resource(dev, SYS_RES_MEMORY,
2436                     PCIR_BAR(0), adapter->pci_mem);
2437
2438 }
2439
2440 /*
2441  * Setup Either MSI/X or MSI
2442  */
2443 static int
2444 igb_setup_msix(struct adapter *adapter)
2445 {
2446         device_t dev = adapter->dev;
2447         int rid, want, queues, msgs;
2448
2449         /* tuneable override */
2450         if (igb_enable_msix == 0)
2451                 goto msi;
2452
2453         /* First try MSI/X */
2454         rid = PCIR_BAR(IGB_MSIX_BAR);
2455         adapter->msix_mem = bus_alloc_resource_any(dev,
2456             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2457         if (!adapter->msix_mem) {
2458                 /* May not be enabled */
2459                 device_printf(adapter->dev,
2460                     "Unable to map MSIX table \n");
2461                 goto msi;
2462         }
2463
2464         msgs = pci_msix_count(dev); 
2465         if (msgs == 0) { /* system has msix disabled */
2466                 bus_release_resource(dev, SYS_RES_MEMORY,
2467                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2468                 adapter->msix_mem = NULL;
2469                 goto msi;
2470         }
2471
2472         /* Figure out a reasonable auto config value */
2473         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2474
2475         /* Manual override */
2476         if (igb_num_queues != 0)
2477                 queues = igb_num_queues;
2478
2479         /* Can have max of 4 queues on 82575 */
2480         if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2481                 queues = 4;
2482
2483         /*
2484         ** One vector (RX/TX pair) per queue
2485         ** plus an additional for Link interrupt
2486         */
2487         want = queues + 1;
2488         if (msgs >= want)
2489                 msgs = want;
2490         else {
2491                 device_printf(adapter->dev,
2492                     "MSIX Configuration Problem, "
2493                     "%d vectors configured, but %d queues wanted!\n",
2494                     msgs, want);
2495                 return (ENXIO);
2496         }
2497         if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2498                 device_printf(adapter->dev,
2499                     "Using MSIX interrupts with %d vectors\n", msgs);
2500                 adapter->num_queues = queues;
2501                 return (msgs);
2502         }
2503 msi:
2504         msgs = pci_msi_count(dev);
2505         if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2506                 device_printf(adapter->dev,"Using MSI interrupt\n");
2507         return (msgs);
2508 }
2509
2510 /*********************************************************************
2511  *
2512  *  Set up an fresh starting state
2513  *
2514  **********************************************************************/
2515 static void
2516 igb_reset(struct adapter *adapter)
2517 {
2518         device_t        dev = adapter->dev;
2519         struct e1000_hw *hw = &adapter->hw;
2520         struct e1000_fc_info *fc = &hw->fc;
2521         struct ifnet    *ifp = adapter->ifp;
2522         u32             pba = 0;
2523         u16             hwm;
2524
2525         INIT_DEBUGOUT("igb_reset: begin");
2526
2527         /* Let the firmware know the OS is in control */
2528         igb_get_hw_control(adapter);
2529
2530         /*
2531          * Packet Buffer Allocation (PBA)
2532          * Writing PBA sets the receive portion of the buffer
2533          * the remainder is used for the transmit buffer.
2534          */
2535         switch (hw->mac.type) {
2536         case e1000_82575:
2537                 pba = E1000_PBA_32K;
2538                 break;
2539         case e1000_82576:
2540                 pba = E1000_PBA_64K;
2541                 break;
2542         case e1000_82580:
2543                 pba = E1000_PBA_35K;
2544         default:
2545                 break;
2546         }
2547
2548         /* Special needs in case of Jumbo frames */
2549         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2550                 u32 tx_space, min_tx, min_rx;
2551                 pba = E1000_READ_REG(hw, E1000_PBA);
2552                 tx_space = pba >> 16;
2553                 pba &= 0xffff;
2554                 min_tx = (adapter->max_frame_size +
2555                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2556                 min_tx = roundup2(min_tx, 1024);
2557                 min_tx >>= 10;
2558                 min_rx = adapter->max_frame_size;
2559                 min_rx = roundup2(min_rx, 1024);
2560                 min_rx >>= 10;
2561                 if (tx_space < min_tx &&
2562                     ((min_tx - tx_space) < pba)) {
2563                         pba = pba - (min_tx - tx_space);
2564                         /*
2565                          * if short on rx space, rx wins
2566                          * and must trump tx adjustment
2567                          */
2568                         if (pba < min_rx)
2569                                 pba = min_rx;
2570                 }
2571                 E1000_WRITE_REG(hw, E1000_PBA, pba);
2572         }
2573
2574         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2575
2576         /*
2577          * These parameters control the automatic generation (Tx) and
2578          * response (Rx) to Ethernet PAUSE frames.
2579          * - High water mark should allow for at least two frames to be
2580          *   received after sending an XOFF.
2581          * - Low water mark works best when it is very near the high water mark.
2582          *   This allows the receiver to restart by sending XON when it has
2583          *   drained a bit.
2584          */
2585         hwm = min(((pba << 10) * 9 / 10),
2586             ((pba << 10) - 2 * adapter->max_frame_size));
2587
2588         if (hw->mac.type < e1000_82576) {
2589                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2590                 fc->low_water = fc->high_water - 8;
2591         } else {
2592                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2593                 fc->low_water = fc->high_water - 16;
2594         }
2595
2596         fc->pause_time = IGB_FC_PAUSE_TIME;
2597         fc->send_xon = TRUE;
2598
2599         /* Set Flow control, use the tunable location if sane */
2600         if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2601                 fc->requested_mode = igb_fc_setting;
2602         else
2603                 fc->requested_mode = e1000_fc_none;
2604
2605         fc->current_mode = fc->requested_mode;
2606
2607         /* Issue a global reset */
2608         e1000_reset_hw(hw);
2609         E1000_WRITE_REG(hw, E1000_WUC, 0);
2610
2611         if (e1000_init_hw(hw) < 0)
2612                 device_printf(dev, "Hardware Initialization Failed\n");
2613
2614         if (hw->mac.type == e1000_82580) {
2615                 u32 reg;
2616
2617                 hwm = (pba << 10) - (2 * adapter->max_frame_size);
2618                 /*
2619                  * 0x80000000 - enable DMA COAL
2620                  * 0x10000000 - use L0s as low power
2621                  * 0x20000000 - use L1 as low power
2622                  * X << 16 - exit dma coal when rx data exceeds X kB
2623                  * Y - upper limit to stay in dma coal in units of 32usecs
2624                  */
2625                 E1000_WRITE_REG(hw, E1000_DMACR,
2626                     0xA0000006 | ((hwm << 6) & 0x00FF0000));
2627
2628                 /* set hwm to PBA -  2 * max frame size */
2629                 E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2630                 /*
2631                  * This sets the time to wait before requesting transition to
2632                  * low power state to number of usecs needed to receive 1 512
2633                  * byte frame at gigabit line rate
2634                  */
2635                 E1000_WRITE_REG(hw, E1000_DMCTLX, 4);
2636
2637                 /* free space in tx packet buffer to wake from DMA coal */
2638                 E1000_WRITE_REG(hw, E1000_DMCTXTH,
2639                     (20480 - (2 * adapter->max_frame_size)) >> 6);
2640
2641                 /* make low power state decision controlled by DMA coal */
2642                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2643                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
2644                     reg | E1000_PCIEMISC_LX_DECISION);
2645         }
2646
2647         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2648         e1000_get_phy_info(hw);
2649         e1000_check_for_link(hw);
2650         return;
2651 }
2652
2653 /*********************************************************************
2654  *
2655  *  Setup networking device structure and register an interface.
2656  *
2657  **********************************************************************/
2658 static void
2659 igb_setup_interface(device_t dev, struct adapter *adapter)
2660 {
2661         struct ifnet   *ifp;
2662
2663         INIT_DEBUGOUT("igb_setup_interface: begin");
2664
2665         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2666         if (ifp == NULL)
2667                 panic("%s: can not if_alloc()", device_get_nameunit(dev));
2668         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2669         ifp->if_mtu = ETHERMTU;
2670         ifp->if_init =  igb_init;
2671         ifp->if_softc = adapter;
2672         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2673         ifp->if_ioctl = igb_ioctl;
2674         ifp->if_start = igb_start;
2675 #if __FreeBSD_version >= 800000
2676         ifp->if_transmit = igb_mq_start;
2677         ifp->if_qflush = igb_qflush;
2678 #endif
2679         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2680         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2681         IFQ_SET_READY(&ifp->if_snd);
2682
2683         ether_ifattach(ifp, adapter->hw.mac.addr);
2684
2685         ifp->if_capabilities = ifp->if_capenable = 0;
2686
2687         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_MTU;
2688         ifp->if_capabilities |= IFCAP_TSO4;
2689         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2690         if (igb_header_split)
2691                 ifp->if_capabilities |= IFCAP_LRO;
2692
2693         ifp->if_capenable = ifp->if_capabilities;
2694 #ifdef DEVICE_POLLING
2695         ifp->if_capabilities |= IFCAP_POLLING;
2696 #endif
2697
2698         /*
2699          * Tell the upper layer(s) we
2700          * support full VLAN capability.
2701          */
2702         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2703         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2704         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2705
2706         /*
2707         ** Dont turn this on by default, if vlans are
2708         ** created on another pseudo device (eg. lagg)
2709         ** then vlan events are not passed thru, breaking
2710         ** operation, but with HW FILTER off it works. If
2711         ** using vlans directly on the em driver you can
2712         ** enable this and get full hardware tag filtering.
2713         */
2714         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2715
2716         /*
2717          * Specify the media types supported by this adapter and register
2718          * callbacks to update media and link information
2719          */
2720         ifmedia_init(&adapter->media, IFM_IMASK,
2721             igb_media_change, igb_media_status);
2722         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2723             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2724                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
2725                             0, NULL);
2726                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2727         } else {
2728                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2729                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2730                             0, NULL);
2731                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2732                             0, NULL);
2733                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2734                             0, NULL);
2735                 if (adapter->hw.phy.type != e1000_phy_ife) {
2736                         ifmedia_add(&adapter->media,
2737                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2738                         ifmedia_add(&adapter->media,
2739                                 IFM_ETHER | IFM_1000_T, 0, NULL);
2740                 }
2741         }
2742         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2743         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2744 }
2745
2746
2747 /*
2748  * Manage DMA'able memory.
2749  */
2750 static void
2751 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2752 {
2753         if (error)
2754                 return;
2755         *(bus_addr_t *) arg = segs[0].ds_addr;
2756 }
2757
2758 static int
2759 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2760         struct igb_dma_alloc *dma, int mapflags)
2761 {
2762         int error;
2763
2764         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2765                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
2766                                 BUS_SPACE_MAXADDR,      /* lowaddr */
2767                                 BUS_SPACE_MAXADDR,      /* highaddr */
2768                                 NULL, NULL,             /* filter, filterarg */
2769                                 size,                   /* maxsize */
2770                                 1,                      /* nsegments */
2771                                 size,                   /* maxsegsize */
2772                                 0,                      /* flags */
2773                                 NULL,                   /* lockfunc */
2774                                 NULL,                   /* lockarg */
2775                                 &dma->dma_tag);
2776         if (error) {
2777                 device_printf(adapter->dev,
2778                     "%s: bus_dma_tag_create failed: %d\n",
2779                     __func__, error);
2780                 goto fail_0;
2781         }
2782
2783         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2784             BUS_DMA_NOWAIT, &dma->dma_map);
2785         if (error) {
2786                 device_printf(adapter->dev,
2787                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2788                     __func__, (uintmax_t)size, error);
2789                 goto fail_2;
2790         }
2791
2792         dma->dma_paddr = 0;
2793         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2794             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2795         if (error || dma->dma_paddr == 0) {
2796                 device_printf(adapter->dev,
2797                     "%s: bus_dmamap_load failed: %d\n",
2798                     __func__, error);
2799                 goto fail_3;
2800         }
2801
2802         return (0);
2803
2804 fail_3:
2805         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2806 fail_2:
2807         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2808         bus_dma_tag_destroy(dma->dma_tag);
2809 fail_0:
2810         dma->dma_map = NULL;
2811         dma->dma_tag = NULL;
2812
2813         return (error);
2814 }
2815
2816 static void
2817 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2818 {
2819         if (dma->dma_tag == NULL)
2820                 return;
2821         if (dma->dma_map != NULL) {
2822                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2823                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2824                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2825                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2826                 dma->dma_map = NULL;
2827         }
2828         bus_dma_tag_destroy(dma->dma_tag);
2829         dma->dma_tag = NULL;
2830 }
2831
2832
2833 /*********************************************************************
2834  *
2835  *  Allocate memory for the transmit and receive rings, and then
2836  *  the descriptors associated with each, called only once at attach.
2837  *
2838  **********************************************************************/
2839 static int
2840 igb_allocate_queues(struct adapter *adapter)
2841 {
2842         device_t dev = adapter->dev;
2843         struct igb_queue        *que = NULL;
2844         struct tx_ring          *txr = NULL;
2845         struct rx_ring          *rxr = NULL;
2846         int rsize, tsize, error = E1000_SUCCESS;
2847         int txconf = 0, rxconf = 0;
2848
2849         /* First allocate the top level queue structs */
2850         if (!(adapter->queues =
2851             (struct igb_queue *) malloc(sizeof(struct igb_queue) *
2852             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2853                 device_printf(dev, "Unable to allocate queue memory\n");
2854                 error = ENOMEM;
2855                 goto fail;
2856         }
2857
2858         /* Next allocate the TX ring struct memory */
2859         if (!(adapter->tx_rings =
2860             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2861             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2862                 device_printf(dev, "Unable to allocate TX ring memory\n");
2863                 error = ENOMEM;
2864                 goto tx_fail;
2865         }
2866
2867         /* Now allocate the RX */
2868         if (!(adapter->rx_rings =
2869             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2870             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2871                 device_printf(dev, "Unable to allocate RX ring memory\n");
2872                 error = ENOMEM;
2873                 goto rx_fail;
2874         }
2875
2876         tsize = roundup2(adapter->num_tx_desc *
2877             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2878         /*
2879          * Now set up the TX queues, txconf is needed to handle the
2880          * possibility that things fail midcourse and we need to
2881          * undo memory gracefully
2882          */ 
2883         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2884                 /* Set up some basics */
2885                 txr = &adapter->tx_rings[i];
2886                 txr->adapter = adapter;
2887                 txr->me = i;
2888
2889                 /* Initialize the TX lock */
2890                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2891                     device_get_nameunit(dev), txr->me);
2892                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2893
2894                 if (igb_dma_malloc(adapter, tsize,
2895                         &txr->txdma, BUS_DMA_NOWAIT)) {
2896                         device_printf(dev,
2897                             "Unable to allocate TX Descriptor memory\n");
2898                         error = ENOMEM;
2899                         goto err_tx_desc;
2900                 }
2901                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2902                 bzero((void *)txr->tx_base, tsize);
2903
2904                 /* Now allocate transmit buffers for the ring */
2905                 if (igb_allocate_transmit_buffers(txr)) {
2906                         device_printf(dev,
2907                             "Critical Failure setting up transmit buffers\n");
2908                         error = ENOMEM;
2909                         goto err_tx_desc;
2910                 }
2911 #if __FreeBSD_version >= 800000
2912                 /* Allocate a buf ring */
2913                 txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2914                     M_WAITOK, &txr->tx_mtx);
2915 #endif
2916         }
2917
2918         /*
2919          * Next the RX queues...
2920          */ 
2921         rsize = roundup2(adapter->num_rx_desc *
2922             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2923         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2924                 rxr = &adapter->rx_rings[i];
2925                 rxr->adapter = adapter;
2926                 rxr->me = i;
2927
2928                 /* Initialize the RX lock */
2929                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2930                     device_get_nameunit(dev), txr->me);
2931                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2932
2933                 if (igb_dma_malloc(adapter, rsize,
2934                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2935                         device_printf(dev,
2936                             "Unable to allocate RxDescriptor memory\n");
2937                         error = ENOMEM;
2938                         goto err_rx_desc;
2939                 }
2940                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2941                 bzero((void *)rxr->rx_base, rsize);
2942
2943                 /* Allocate receive buffers for the ring*/
2944                 if (igb_allocate_receive_buffers(rxr)) {
2945                         device_printf(dev,
2946                             "Critical Failure setting up receive buffers\n");
2947                         error = ENOMEM;
2948                         goto err_rx_desc;
2949                 }
2950         }
2951
2952         /*
2953         ** Finally set up the queue holding structs
2954         */
2955         for (int i = 0; i < adapter->num_queues; i++) {
2956                 que = &adapter->queues[i];
2957                 que->adapter = adapter;
2958                 que->txr = &adapter->tx_rings[i];
2959                 que->rxr = &adapter->rx_rings[i];
2960         }
2961
2962         return (0);
2963
2964 err_rx_desc:
2965         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2966                 igb_dma_free(adapter, &rxr->rxdma);
2967 err_tx_desc:
2968         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2969                 igb_dma_free(adapter, &txr->txdma);
2970         free(adapter->rx_rings, M_DEVBUF);
2971 rx_fail:
2972 #if __FreeBSD_version >= 800000
2973         buf_ring_free(txr->br, M_DEVBUF);
2974 #endif
2975         free(adapter->tx_rings, M_DEVBUF);
2976 tx_fail:
2977         free(adapter->queues, M_DEVBUF);
2978 fail:
2979         return (error);
2980 }
2981
2982 /*********************************************************************
2983  *
2984  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2985  *  the information needed to transmit a packet on the wire. This is
2986  *  called only once at attach, setup is done every reset.
2987  *
2988  **********************************************************************/
2989 static int
2990 igb_allocate_transmit_buffers(struct tx_ring *txr)
2991 {
2992         struct adapter *adapter = txr->adapter;
2993         device_t dev = adapter->dev;
2994         struct igb_tx_buffer *txbuf;
2995         int error, i;
2996
2997         /*
2998          * Setup DMA descriptor areas.
2999          */
3000         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3001                                1, 0,                    /* alignment, bounds */
3002                                BUS_SPACE_MAXADDR,       /* lowaddr */
3003                                BUS_SPACE_MAXADDR,       /* highaddr */
3004                                NULL, NULL,              /* filter, filterarg */
3005                                IGB_TSO_SIZE,            /* maxsize */
3006                                IGB_MAX_SCATTER,         /* nsegments */
3007                                PAGE_SIZE,               /* maxsegsize */
3008                                0,                       /* flags */
3009                                NULL,                    /* lockfunc */
3010                                NULL,                    /* lockfuncarg */
3011                                &txr->txtag))) {
3012                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3013                 goto fail;
3014         }
3015
3016         if (!(txr->tx_buffers =
3017             (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3018             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3019                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3020                 error = ENOMEM;
3021                 goto fail;
3022         }
3023
3024         /* Create the descriptor buffer dma maps */
3025         txbuf = txr->tx_buffers;
3026         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3027                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3028                 if (error != 0) {
3029                         device_printf(dev, "Unable to create TX DMA map\n");
3030                         goto fail;
3031                 }
3032         }
3033
3034         return 0;
3035 fail:
3036         /* We free all, it handles case where we are in the middle */
3037         igb_free_transmit_structures(adapter);
3038         return (error);
3039 }
3040
3041 /*********************************************************************
3042  *
3043  *  Initialize a transmit ring.
3044  *
3045  **********************************************************************/
3046 static void
3047 igb_setup_transmit_ring(struct tx_ring *txr)
3048 {
3049         struct adapter *adapter = txr->adapter;
3050         struct igb_tx_buffer *txbuf;
3051         int i;
3052
3053         /* Clear the old descriptor contents */
3054         IGB_TX_LOCK(txr);
3055         bzero((void *)txr->tx_base,
3056               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3057         /* Reset indices */
3058         txr->next_avail_desc = 0;
3059         txr->next_to_clean = 0;
3060
3061         /* Free any existing tx buffers. */
3062         txbuf = txr->tx_buffers;
3063         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3064                 if (txbuf->m_head != NULL) {
3065                         bus_dmamap_sync(txr->txtag, txbuf->map,
3066                             BUS_DMASYNC_POSTWRITE);
3067                         bus_dmamap_unload(txr->txtag, txbuf->map);
3068                         m_freem(txbuf->m_head);
3069                         txbuf->m_head = NULL;
3070                 }
3071                 /* clear the watch index */
3072                 txbuf->next_eop = -1;
3073         }
3074
3075         /* Set number of descriptors available */
3076         txr->tx_avail = adapter->num_tx_desc;
3077
3078         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3079             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3080         IGB_TX_UNLOCK(txr);
3081 }
3082
3083 /*********************************************************************
3084  *
3085  *  Initialize all transmit rings.
3086  *
3087  **********************************************************************/
3088 static void
3089 igb_setup_transmit_structures(struct adapter *adapter)
3090 {
3091         struct tx_ring *txr = adapter->tx_rings;
3092
3093         for (int i = 0; i < adapter->num_queues; i++, txr++)
3094                 igb_setup_transmit_ring(txr);
3095
3096         return;
3097 }
3098
3099 /*********************************************************************
3100  *
3101  *  Enable transmit unit.
3102  *
3103  **********************************************************************/
3104 static void
3105 igb_initialize_transmit_units(struct adapter *adapter)
3106 {
3107         struct tx_ring  *txr = adapter->tx_rings;
3108         struct e1000_hw *hw = &adapter->hw;
3109         u32             tctl, txdctl;
3110
3111          INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3112
3113         /* Setup the Tx Descriptor Rings */
3114         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3115                 u64 bus_addr = txr->txdma.dma_paddr;
3116
3117                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3118                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3119                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3120                     (uint32_t)(bus_addr >> 32));
3121                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3122                     (uint32_t)bus_addr);
3123
3124                 /* Setup the HW Tx Head and Tail descriptor pointers */
3125                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3126                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3127
3128                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3129                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3130                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3131
3132                 txr->watchdog_check = FALSE;
3133
3134                 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
3135                 txdctl |= IGB_TX_PTHRESH;
3136                 txdctl |= IGB_TX_HTHRESH << 8;
3137                 txdctl |= IGB_TX_WTHRESH << 16;
3138                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3139                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3140         }
3141
3142         /* Program the Transmit Control Register */
3143         tctl = E1000_READ_REG(hw, E1000_TCTL);
3144         tctl &= ~E1000_TCTL_CT;
3145         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3146                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3147
3148         e1000_config_collision_dist(hw);
3149
3150         /* This write will effectively turn on the transmit unit. */
3151         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3152 }
3153
3154 /*********************************************************************
3155  *
3156  *  Free all transmit rings.
3157  *
3158  **********************************************************************/
3159 static void
3160 igb_free_transmit_structures(struct adapter *adapter)
3161 {
3162         struct tx_ring *txr = adapter->tx_rings;
3163
3164         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3165                 IGB_TX_LOCK(txr);
3166                 igb_free_transmit_buffers(txr);
3167                 igb_dma_free(adapter, &txr->txdma);
3168                 IGB_TX_UNLOCK(txr);
3169                 IGB_TX_LOCK_DESTROY(txr);
3170         }
3171         free(adapter->tx_rings, M_DEVBUF);
3172 }
3173
3174 /*********************************************************************
3175  *
3176  *  Free transmit ring related data structures.
3177  *
3178  **********************************************************************/
3179 static void
3180 igb_free_transmit_buffers(struct tx_ring *txr)
3181 {
3182         struct adapter *adapter = txr->adapter;
3183         struct igb_tx_buffer *tx_buffer;
3184         int             i;
3185
3186         INIT_DEBUGOUT("free_transmit_ring: begin");
3187
3188         if (txr->tx_buffers == NULL)
3189                 return;
3190
3191         tx_buffer = txr->tx_buffers;
3192         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3193                 if (tx_buffer->m_head != NULL) {
3194                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3195                             BUS_DMASYNC_POSTWRITE);
3196                         bus_dmamap_unload(txr->txtag,
3197                             tx_buffer->map);
3198                         m_freem(tx_buffer->m_head);
3199                         tx_buffer->m_head = NULL;
3200                         if (tx_buffer->map != NULL) {
3201                                 bus_dmamap_destroy(txr->txtag,
3202                                     tx_buffer->map);
3203                                 tx_buffer->map = NULL;
3204                         }
3205                 } else if (tx_buffer->map != NULL) {
3206                         bus_dmamap_unload(txr->txtag,
3207                             tx_buffer->map);
3208                         bus_dmamap_destroy(txr->txtag,
3209                             tx_buffer->map);
3210                         tx_buffer->map = NULL;
3211                 }
3212         }
3213 #if __FreeBSD_version >= 800000
3214         if (txr->br != NULL)
3215                 buf_ring_free(txr->br, M_DEVBUF);
3216 #endif
3217         if (txr->tx_buffers != NULL) {
3218                 free(txr->tx_buffers, M_DEVBUF);
3219                 txr->tx_buffers = NULL;
3220         }
3221         if (txr->txtag != NULL) {
3222                 bus_dma_tag_destroy(txr->txtag);
3223                 txr->txtag = NULL;
3224         }
3225         return;
3226 }
3227
3228 /**********************************************************************
3229  *
3230  *  Setup work for hardware segmentation offload (TSO)
3231  *
3232  **********************************************************************/
3233 static boolean_t
3234 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3235 {
3236         struct adapter *adapter = txr->adapter;
3237         struct e1000_adv_tx_context_desc *TXD;
3238         struct igb_tx_buffer        *tx_buffer;
3239         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3240         u32 mss_l4len_idx = 0;
3241         u16 vtag = 0;
3242         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3243         struct ether_vlan_header *eh;
3244         struct ip *ip;
3245         struct tcphdr *th;
3246
3247
3248         /*
3249          * Determine where frame payload starts.
3250          * Jump over vlan headers if already present
3251          */
3252         eh = mtod(mp, struct ether_vlan_header *);
3253         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3254                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3255         else
3256                 ehdrlen = ETHER_HDR_LEN;
3257
3258         /* Ensure we have at least the IP+TCP header in the first mbuf. */
3259         if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3260                 return FALSE;
3261
3262         /* Only supports IPV4 for now */
3263         ctxd = txr->next_avail_desc;
3264         tx_buffer = &txr->tx_buffers[ctxd];
3265         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3266
3267         ip = (struct ip *)(mp->m_data + ehdrlen);
3268         if (ip->ip_p != IPPROTO_TCP)
3269                 return FALSE;   /* 0 */
3270         ip->ip_sum = 0;
3271         ip_hlen = ip->ip_hl << 2;
3272         th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3273         th->th_sum = in_pseudo(ip->ip_src.s_addr,
3274             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3275         tcp_hlen = th->th_off << 2;
3276         /*
3277          * Calculate header length, this is used
3278          * in the transmit desc in igb_xmit
3279          */
3280         *hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3281
3282         /* VLAN MACLEN IPLEN */
3283         if (mp->m_flags & M_VLANTAG) {
3284                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3285                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3286         }
3287
3288         vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3289         vlan_macip_lens |= ip_hlen;
3290         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3291
3292         /* ADV DTYPE TUCMD */
3293         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3294         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3295         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3296         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3297
3298         /* MSS L4LEN IDX */
3299         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3300         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3301         /* 82575 needs the queue index added */
3302         if (adapter->hw.mac.type == e1000_82575)
3303                 mss_l4len_idx |= txr->me << 4;
3304         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3305
3306         TXD->seqnum_seed = htole32(0);
3307         tx_buffer->m_head = NULL;
3308         tx_buffer->next_eop = -1;
3309
3310         if (++ctxd == adapter->num_tx_desc)
3311                 ctxd = 0;
3312
3313         txr->tx_avail--;
3314         txr->next_avail_desc = ctxd;
3315         return TRUE;
3316 }
3317
3318
3319 /*********************************************************************
3320  *
3321  *  Context Descriptor setup for VLAN or CSUM
3322  *
3323  **********************************************************************/
3324
3325 static bool
3326 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3327 {
3328         struct adapter *adapter = txr->adapter;
3329         struct e1000_adv_tx_context_desc *TXD;
3330         struct igb_tx_buffer        *tx_buffer;
3331         u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3332         struct ether_vlan_header *eh;
3333         struct ip *ip = NULL;
3334         struct ip6_hdr *ip6;
3335         int  ehdrlen, ctxd, ip_hlen = 0;
3336         u16     etype, vtag = 0;
3337         u8      ipproto = 0;
3338         bool    offload = TRUE;
3339
3340         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3341                 offload = FALSE;
3342
3343         vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3344         ctxd = txr->next_avail_desc;
3345         tx_buffer = &txr->tx_buffers[ctxd];
3346         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3347
3348         /*
3349         ** In advanced descriptors the vlan tag must 
3350         ** be placed into the context descriptor, thus
3351         ** we need to be here just for that setup.
3352         */
3353         if (mp->m_flags & M_VLANTAG) {
3354                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3355                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3356         } else if (offload == FALSE)
3357                 return FALSE;
3358
3359         /*
3360          * Determine where frame payload starts.
3361          * Jump over vlan headers if already present,
3362          * helpful for QinQ too.
3363          */
3364         eh = mtod(mp, struct ether_vlan_header *);
3365         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3366                 etype = ntohs(eh->evl_proto);
3367                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3368         } else {
3369                 etype = ntohs(eh->evl_encap_proto);
3370                 ehdrlen = ETHER_HDR_LEN;
3371         }
3372
3373         /* Set the ether header length */
3374         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3375
3376         switch (etype) {
3377                 case ETHERTYPE_IP:
3378                         ip = (struct ip *)(mp->m_data + ehdrlen);
3379                         ip_hlen = ip->ip_hl << 2;
3380                         if (mp->m_len < ehdrlen + ip_hlen) {
3381                                 offload = FALSE;
3382                                 break;
3383                         }
3384                         ipproto = ip->ip_p;
3385                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3386                         break;
3387                 case ETHERTYPE_IPV6:
3388                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3389                         ip_hlen = sizeof(struct ip6_hdr);
3390                         if (mp->m_len < ehdrlen + ip_hlen)
3391                                 return (FALSE);
3392                         ipproto = ip6->ip6_nxt;
3393                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3394                         break;
3395                 default:
3396                         offload = FALSE;
3397                         break;
3398         }
3399
3400         vlan_macip_lens |= ip_hlen;
3401         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3402
3403         switch (ipproto) {
3404                 case IPPROTO_TCP:
3405                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3406                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3407                         break;
3408                 case IPPROTO_UDP:
3409                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3410                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3411                         break;
3412 #if __FreeBSD_version >= 800000
3413                 case IPPROTO_SCTP:
3414                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3415                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3416                         break;
3417 #endif
3418                 default:
3419                         offload = FALSE;
3420                         break;
3421         }
3422
3423         /* 82575 needs the queue index added */
3424         if (adapter->hw.mac.type == e1000_82575)
3425                 mss_l4len_idx = txr->me << 4;
3426
3427         /* Now copy bits into descriptor */
3428         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3429         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3430         TXD->seqnum_seed = htole32(0);
3431         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3432
3433         tx_buffer->m_head = NULL;
3434         tx_buffer->next_eop = -1;
3435
3436         /* We've consumed the first desc, adjust counters */
3437         if (++ctxd == adapter->num_tx_desc)
3438                 ctxd = 0;
3439         txr->next_avail_desc = ctxd;
3440         --txr->tx_avail;
3441
3442         return (offload);
3443 }
3444
3445
3446 /**********************************************************************
3447  *
3448  *  Examine each tx_buffer in the used queue. If the hardware is done
3449  *  processing the packet then free associated resources. The
3450  *  tx_buffer is put back on the free queue.
3451  *
3452  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3453  **********************************************************************/
3454 static bool
3455 igb_txeof(struct tx_ring *txr)
3456 {
3457         struct adapter  *adapter = txr->adapter;
3458         int first, last, done;
3459         struct igb_tx_buffer *tx_buffer;
3460         struct e1000_tx_desc   *tx_desc, *eop_desc;
3461         struct ifnet   *ifp = adapter->ifp;
3462
3463         IGB_TX_LOCK_ASSERT(txr);
3464
3465         if (txr->tx_avail == adapter->num_tx_desc)
3466                 return FALSE;
3467
3468         first = txr->next_to_clean;
3469         tx_desc = &txr->tx_base[first];
3470         tx_buffer = &txr->tx_buffers[first];
3471         last = tx_buffer->next_eop;
3472         eop_desc = &txr->tx_base[last];
3473
3474         /*
3475          * What this does is get the index of the
3476          * first descriptor AFTER the EOP of the 
3477          * first packet, that way we can do the
3478          * simple comparison on the inner while loop.
3479          */
3480         if (++last == adapter->num_tx_desc)
3481                 last = 0;
3482         done = last;
3483
3484         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3485             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3486
3487         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3488                 /* We clean the range of the packet */
3489                 while (first != done) {
3490                         tx_desc->upper.data = 0;
3491                         tx_desc->lower.data = 0;
3492                         tx_desc->buffer_addr = 0;
3493                         ++txr->tx_avail;
3494
3495                         if (tx_buffer->m_head) {
3496                                 txr->bytes +=
3497                                     tx_buffer->m_head->m_pkthdr.len;
3498                                 bus_dmamap_sync(txr->txtag,
3499                                     tx_buffer->map,
3500                                     BUS_DMASYNC_POSTWRITE);
3501                                 bus_dmamap_unload(txr->txtag,
3502                                     tx_buffer->map);
3503
3504                                 m_freem(tx_buffer->m_head);
3505                                 tx_buffer->m_head = NULL;
3506                         }
3507                         tx_buffer->next_eop = -1;
3508                         txr->watchdog_time = ticks;
3509
3510                         if (++first == adapter->num_tx_desc)
3511                                 first = 0;
3512
3513                         tx_buffer = &txr->tx_buffers[first];
3514                         tx_desc = &txr->tx_base[first];
3515                 }
3516                 ++txr->packets;
3517                 ++ifp->if_opackets;
3518                 /* See if we can continue to the next packet */
3519                 last = tx_buffer->next_eop;
3520                 if (last != -1) {
3521                         eop_desc = &txr->tx_base[last];
3522                         /* Get new done point */
3523                         if (++last == adapter->num_tx_desc) last = 0;
3524                         done = last;
3525                 } else
3526                         break;
3527         }
3528         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3529             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3530
3531         txr->next_to_clean = first;
3532
3533         /*
3534          * If we have enough room, clear IFF_DRV_OACTIVE
3535          * to tell the stack that it is OK to send packets.
3536          */
3537         if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {                
3538                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3539                 /* All clean, turn off the watchdog */
3540                 if (txr->tx_avail == adapter->num_tx_desc) {
3541                         txr->watchdog_check = FALSE;
3542                         return FALSE;
3543                 }
3544         }
3545
3546         return (TRUE);
3547 }
3548
3549
3550 /*********************************************************************
3551  *
3552  *  Refresh mbuf buffers for RX descriptor rings
3553  *   - now keeps its own state so discards due to resource
3554  *     exhaustion are unnecessary, if an mbuf cannot be obtained
3555  *     it just returns, keeping its placeholder, thus it can simply
3556  *     be recalled to try again.
3557  *
3558  **********************************************************************/
3559 static void
3560 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3561 {
3562         struct adapter          *adapter = rxr->adapter;
3563         bus_dma_segment_t       hseg[1];
3564         bus_dma_segment_t       pseg[1];
3565         struct igb_rx_buf       *rxbuf;
3566         struct mbuf             *mh, *mp;
3567         int                     i, nsegs, error, cleaned;
3568
3569         i = rxr->next_to_refresh;
3570         cleaned = -1; /* Signify no completions */
3571         while (i != limit) {
3572                 rxbuf = &rxr->rx_buffers[i];
3573                 if (rxbuf->m_head == NULL) {
3574                         mh = m_gethdr(M_DONTWAIT, MT_DATA);
3575                         if (mh == NULL)
3576                                 goto update;
3577                         mh->m_pkthdr.len = mh->m_len = MHLEN;
3578                         mh->m_len = MHLEN;
3579                         mh->m_flags |= M_PKTHDR;
3580                         m_adj(mh, ETHER_ALIGN);
3581                         /* Get the memory mapping */
3582                         error = bus_dmamap_load_mbuf_sg(rxr->htag,
3583                             rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3584                         if (error != 0) {
3585                                 printf("GET BUF: dmamap load"
3586                                     " failure - %d\n", error);
3587                                 m_free(mh);
3588                                 goto update;
3589                         }
3590                         rxbuf->m_head = mh;
3591                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3592                             BUS_DMASYNC_PREREAD);
3593                         rxr->rx_base[i].read.hdr_addr =
3594                             htole64(hseg[0].ds_addr);
3595                 }
3596
3597                 if (rxbuf->m_pack == NULL) {
3598                         mp = m_getjcl(M_DONTWAIT, MT_DATA,
3599                             M_PKTHDR, adapter->rx_mbuf_sz);
3600                         if (mp == NULL)
3601                                 goto update;
3602                         mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3603                         /* Get the memory mapping */
3604                         error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3605                             rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3606                         if (error != 0) {
3607                                 printf("GET BUF: dmamap load"
3608                                     " failure - %d\n", error);
3609                                 m_free(mp);
3610                                 goto update;
3611                         }
3612                         rxbuf->m_pack = mp;
3613                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3614                             BUS_DMASYNC_PREREAD);
3615                         rxr->rx_base[i].read.pkt_addr =
3616                             htole64(pseg[0].ds_addr);
3617                 }
3618
3619                 cleaned = i;
3620                 /* Calculate next index */
3621                 if (++i == adapter->num_rx_desc)
3622                         i = 0;
3623                 /* This is the work marker for refresh */
3624                 rxr->next_to_refresh = i;
3625         }
3626 update:
3627         if (cleaned != -1) /* If we refreshed some, bump tail */
3628                 E1000_WRITE_REG(&adapter->hw,
3629                     E1000_RDT(rxr->me), cleaned);
3630         return;
3631 }
3632
3633
3634 /*********************************************************************
3635  *
3636  *  Allocate memory for rx_buffer structures. Since we use one
3637  *  rx_buffer per received packet, the maximum number of rx_buffer's
3638  *  that we'll need is equal to the number of receive descriptors
3639  *  that we've allocated.
3640  *
3641  **********************************************************************/
3642 static int
3643 igb_allocate_receive_buffers(struct rx_ring *rxr)
3644 {
3645         struct  adapter         *adapter = rxr->adapter;
3646         device_t                dev = adapter->dev;
3647         struct igb_rx_buf       *rxbuf;
3648         int                     i, bsize, error;
3649
3650         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3651         if (!(rxr->rx_buffers =
3652             (struct igb_rx_buf *) malloc(bsize,
3653             M_DEVBUF, M_NOWAIT | M_ZERO))) {
3654                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3655                 error = ENOMEM;
3656                 goto fail;
3657         }
3658
3659         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3660                                    1, 0,                /* alignment, bounds */
3661                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3662                                    BUS_SPACE_MAXADDR,   /* highaddr */
3663                                    NULL, NULL,          /* filter, filterarg */
3664                                    MSIZE,               /* maxsize */
3665                                    1,                   /* nsegments */
3666                                    MSIZE,               /* maxsegsize */
3667                                    0,                   /* flags */
3668                                    NULL,                /* lockfunc */
3669                                    NULL,                /* lockfuncarg */
3670                                    &rxr->htag))) {
3671                 device_printf(dev, "Unable to create RX DMA tag\n");
3672                 goto fail;
3673         }
3674
3675         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3676                                    1, 0,                /* alignment, bounds */
3677                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3678                                    BUS_SPACE_MAXADDR,   /* highaddr */
3679                                    NULL, NULL,          /* filter, filterarg */
3680                                    MJUMPAGESIZE,        /* maxsize */
3681                                    1,                   /* nsegments */
3682                                    MJUMPAGESIZE,        /* maxsegsize */
3683                                    0,                   /* flags */
3684                                    NULL,                /* lockfunc */
3685                                    NULL,                /* lockfuncarg */
3686                                    &rxr->ptag))) {
3687                 device_printf(dev, "Unable to create RX payload DMA tag\n");
3688                 goto fail;
3689         }
3690
3691         for (i = 0; i < adapter->num_rx_desc; i++) {
3692                 rxbuf = &rxr->rx_buffers[i];
3693                 error = bus_dmamap_create(rxr->htag,
3694                     BUS_DMA_NOWAIT, &rxbuf->hmap);
3695                 if (error) {
3696                         device_printf(dev,
3697                             "Unable to create RX head DMA maps\n");
3698                         goto fail;
3699                 }
3700                 error = bus_dmamap_create(rxr->ptag,
3701                     BUS_DMA_NOWAIT, &rxbuf->pmap);
3702                 if (error) {
3703                         device_printf(dev,
3704                             "Unable to create RX packet DMA maps\n");
3705                         goto fail;
3706                 }
3707         }
3708
3709         return (0);
3710
3711 fail:
3712         /* Frees all, but can handle partial completion */
3713         igb_free_receive_structures(adapter);
3714         return (error);
3715 }
3716
3717
3718 static void
3719 igb_free_receive_ring(struct rx_ring *rxr)
3720 {
3721         struct  adapter         *adapter;
3722         struct igb_rx_buf       *rxbuf;
3723         int i;
3724
3725         adapter = rxr->adapter;
3726         for (i = 0; i < adapter->num_rx_desc; i++) {
3727                 rxbuf = &rxr->rx_buffers[i];
3728                 if (rxbuf->m_head != NULL) {
3729                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3730                             BUS_DMASYNC_POSTREAD);
3731                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3732                         rxbuf->m_head->m_flags |= M_PKTHDR;
3733                         m_freem(rxbuf->m_head);
3734                 }
3735                 if (rxbuf->m_pack != NULL) {
3736                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3737                             BUS_DMASYNC_POSTREAD);
3738                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3739                         rxbuf->m_pack->m_flags |= M_PKTHDR;
3740                         m_freem(rxbuf->m_pack);
3741                 }
3742                 rxbuf->m_head = NULL;
3743                 rxbuf->m_pack = NULL;
3744         }
3745 }
3746
3747
3748 /*********************************************************************
3749  *
3750  *  Initialize a receive ring and its buffers.
3751  *
3752  **********************************************************************/
3753 static int
3754 igb_setup_receive_ring(struct rx_ring *rxr)
3755 {
3756         struct  adapter         *adapter;
3757         struct  ifnet           *ifp;
3758         device_t                dev;
3759         struct igb_rx_buf       *rxbuf;
3760         bus_dma_segment_t       pseg[1], hseg[1];
3761         struct lro_ctrl         *lro = &rxr->lro;
3762         int                     rsize, nsegs, error = 0;
3763
3764         adapter = rxr->adapter;
3765         dev = adapter->dev;
3766         ifp = adapter->ifp;
3767
3768         /* Clear the ring contents */
3769         IGB_RX_LOCK(rxr);
3770         rsize = roundup2(adapter->num_rx_desc *
3771             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3772         bzero((void *)rxr->rx_base, rsize);
3773
3774         /*
3775         ** Free current RX buffer structures and their mbufs
3776         */
3777         igb_free_receive_ring(rxr);
3778
3779         /* Now replenish the ring mbufs */
3780         for (int j = 0; j != adapter->num_rx_desc; ++j) {
3781                 struct mbuf     *mh, *mp;
3782
3783                 rxbuf = &rxr->rx_buffers[j];
3784
3785                 /* First the header */
3786                 rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
3787                 if (rxbuf->m_head == NULL)
3788                         goto fail;
3789                 m_adj(rxbuf->m_head, ETHER_ALIGN);
3790                 mh = rxbuf->m_head;
3791                 mh->m_len = mh->m_pkthdr.len = MHLEN;
3792                 mh->m_flags |= M_PKTHDR;
3793                 /* Get the memory mapping */
3794                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
3795                     rxbuf->hmap, rxbuf->m_head, hseg,
3796                     &nsegs, BUS_DMA_NOWAIT);
3797                 if (error != 0) /* Nothing elegant to do here */
3798                         goto fail;
3799                 bus_dmamap_sync(rxr->htag,
3800                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
3801                 /* Update descriptor */
3802                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
3803
3804                 /* Now the payload cluster */
3805                 rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
3806                     M_PKTHDR, adapter->rx_mbuf_sz);
3807                 if (rxbuf->m_pack == NULL)
3808                         goto fail;
3809                 mp = rxbuf->m_pack;
3810                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3811                 /* Get the memory mapping */
3812                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3813                     rxbuf->pmap, mp, pseg,
3814                     &nsegs, BUS_DMA_NOWAIT);
3815                 if (error != 0)
3816                         goto fail;
3817                 bus_dmamap_sync(rxr->ptag,
3818                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
3819                 /* Update descriptor */
3820                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
3821         }
3822
3823         /* Setup our descriptor indices */
3824         rxr->next_to_check = 0;
3825         rxr->next_to_refresh = 0;
3826         rxr->lro_enabled = FALSE;
3827
3828         if (igb_header_split)
3829                 rxr->hdr_split = TRUE;
3830         else
3831                 ifp->if_capabilities &= ~IFCAP_LRO;
3832
3833         rxr->fmp = NULL;
3834         rxr->lmp = NULL;
3835         rxr->discard = FALSE;
3836
3837         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3838             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3839
3840         /*
3841         ** Now set up the LRO interface, we
3842         ** also only do head split when LRO
3843         ** is enabled, since so often they
3844         ** are undesireable in similar setups.
3845         */
3846         if (ifp->if_capenable & IFCAP_LRO) {
3847                 int err = tcp_lro_init(lro);
3848                 if (err) {
3849                         device_printf(dev, "LRO Initialization failed!\n");
3850                         goto fail;
3851                 }
3852                 INIT_DEBUGOUT("RX LRO Initialized\n");
3853                 rxr->lro_enabled = TRUE;
3854                 lro->ifp = adapter->ifp;
3855         }
3856
3857         IGB_RX_UNLOCK(rxr);
3858         return (0);
3859
3860 fail:
3861         igb_free_receive_ring(rxr);
3862         IGB_RX_UNLOCK(rxr);
3863         return (error);
3864 }
3865
3866 /*********************************************************************
3867  *
3868  *  Initialize all receive rings.
3869  *
3870  **********************************************************************/
3871 static int
3872 igb_setup_receive_structures(struct adapter *adapter)
3873 {
3874         struct rx_ring *rxr = adapter->rx_rings;
3875         int i, j;
3876
3877         for (i = 0; i < adapter->num_queues; i++, rxr++)
3878                 if (igb_setup_receive_ring(rxr))
3879                         goto fail;
3880
3881         return (0);
3882 fail:
3883         /*
3884          * Free RX buffers allocated so far, we will only handle
3885          * the rings that completed, the failing case will have
3886          * cleaned up for itself. The value of 'i' will be the
3887          * failed ring so we must pre-decrement it.
3888          */
3889         rxr = adapter->rx_rings;
3890         for (--i; i > 0; i--, rxr++) {
3891                 for (j = 0; j < adapter->num_rx_desc; j++)
3892                         igb_free_receive_ring(rxr);
3893         }
3894
3895         return (ENOBUFS);
3896 }
3897
3898 /*********************************************************************
3899  *
3900  *  Enable receive unit.
3901  *
3902  **********************************************************************/
3903 static void
3904 igb_initialize_receive_units(struct adapter *adapter)
3905 {
3906         struct rx_ring  *rxr = adapter->rx_rings;
3907         struct ifnet    *ifp = adapter->ifp;
3908         struct e1000_hw *hw = &adapter->hw;
3909         u32             rctl, rxcsum, psize, srrctl = 0;
3910
3911         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3912
3913         /*
3914          * Make sure receives are disabled while setting
3915          * up the descriptor ring
3916          */
3917         rctl = E1000_READ_REG(hw, E1000_RCTL);
3918         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3919
3920         /*
3921         ** Set up for header split
3922         */
3923         if (rxr->hdr_split) {
3924                 /* Use a standard mbuf for the header */
3925                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3926                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3927         } else
3928                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3929
3930         /*
3931         ** Set up for jumbo frames
3932         */
3933         if (ifp->if_mtu > ETHERMTU) {
3934                 rctl |= E1000_RCTL_LPE;
3935                 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3936                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3937
3938                 /* Set maximum packet len */
3939                 psize = adapter->max_frame_size;
3940                 /* are we on a vlan? */
3941                 if (adapter->ifp->if_vlantrunk != NULL)
3942                         psize += VLAN_TAG_SIZE;
3943                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3944         } else {
3945                 rctl &= ~E1000_RCTL_LPE;
3946                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3947                 rctl |= E1000_RCTL_SZ_2048;
3948         }
3949
3950         /* Setup the Base and Length of the Rx Descriptor Rings */
3951         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3952                 u64 bus_addr = rxr->rxdma.dma_paddr;
3953                 u32 rxdctl;
3954
3955                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
3956                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3957                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
3958                     (uint32_t)(bus_addr >> 32));
3959                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
3960                     (uint32_t)bus_addr);
3961                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
3962                 /* Enable this Queue */
3963                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
3964                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3965                 rxdctl &= 0xFFF00000;
3966                 rxdctl |= IGB_RX_PTHRESH;
3967                 rxdctl |= IGB_RX_HTHRESH << 8;
3968                 rxdctl |= IGB_RX_WTHRESH << 16;
3969                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
3970         }
3971
3972         /*
3973         ** Setup for RX MultiQueue
3974         */
3975         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
3976         if (adapter->num_queues >1) {
3977                 u32 random[10], mrqc, shift = 0;
3978                 union igb_reta {
3979                         u32 dword;
3980                         u8  bytes[4];
3981                 } reta;
3982
3983                 arc4rand(&random, sizeof(random), 0);
3984                 if (adapter->hw.mac.type == e1000_82575)
3985                         shift = 6;
3986                 /* Warning FM follows */
3987                 for (int i = 0; i < 128; i++) {
3988                         reta.bytes[i & 3] =
3989                             (i % adapter->num_queues) << shift;
3990                         if ((i & 3) == 3)
3991                                 E1000_WRITE_REG(hw,
3992                                     E1000_RETA(i >> 2), reta.dword);
3993                 }
3994                 /* Now fill in hash table */
3995                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3996                 for (int i = 0; i < 10; i++)
3997                         E1000_WRITE_REG_ARRAY(hw,
3998                             E1000_RSSRK(0), i, random[i]);
3999
4000                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4001                     E1000_MRQC_RSS_FIELD_IPV4_TCP);
4002                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4003                     E1000_MRQC_RSS_FIELD_IPV6_TCP);
4004                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4005                     E1000_MRQC_RSS_FIELD_IPV6_UDP);
4006                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4007                     E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4008
4009                 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4010
4011                 /*
4012                 ** NOTE: Receive Full-Packet Checksum Offload 
4013                 ** is mutually exclusive with Multiqueue. However
4014                 ** this is not the same as TCP/IP checksums which
4015                 ** still work.
4016                 */
4017                 rxcsum |= E1000_RXCSUM_PCSD;
4018 #if __FreeBSD_version >= 800000
4019                 /* For SCTP Offload */
4020                 if ((hw->mac.type == e1000_82576)
4021                     && (ifp->if_capenable & IFCAP_RXCSUM))
4022                         rxcsum |= E1000_RXCSUM_CRCOFL;
4023 #endif
4024         } else {
4025                 /* Non RSS setup */
4026                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4027                         rxcsum |= E1000_RXCSUM_IPPCSE;
4028 #if __FreeBSD_version >= 800000
4029                         if (adapter->hw.mac.type == e1000_82576)
4030                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4031 #endif
4032                 } else
4033                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4034         }
4035         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4036
4037         /* Setup the Receive Control Register */
4038         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4039         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4040                    E1000_RCTL_RDMTS_HALF |
4041                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4042         /* Strip CRC bytes. */
4043         rctl |= E1000_RCTL_SECRC;
4044         /* Make sure VLAN Filters are off */
4045         rctl &= ~E1000_RCTL_VFE;
4046         /* Don't store bad packets */
4047         rctl &= ~E1000_RCTL_SBP;
4048
4049         /* Enable Receives */
4050         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4051
4052         /*
4053          * Setup the HW Rx Head and Tail Descriptor Pointers
4054          *   - needs to be after enable
4055          */
4056         for (int i = 0; i < adapter->num_queues; i++) {
4057                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4058                 E1000_WRITE_REG(hw, E1000_RDT(i),
4059                      adapter->num_rx_desc - 1);
4060         }
4061         return;
4062 }
4063
4064 /*********************************************************************
4065  *
4066  *  Free receive rings.
4067  *
4068  **********************************************************************/
4069 static void
4070 igb_free_receive_structures(struct adapter *adapter)
4071 {
4072         struct rx_ring *rxr = adapter->rx_rings;
4073
4074         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4075                 struct lro_ctrl *lro = &rxr->lro;
4076                 igb_free_receive_buffers(rxr);
4077                 tcp_lro_free(lro);
4078                 igb_dma_free(adapter, &rxr->rxdma);
4079         }
4080
4081         free(adapter->rx_rings, M_DEVBUF);
4082 }
4083
4084 /*********************************************************************
4085  *
4086  *  Free receive ring data structures.
4087  *
4088  **********************************************************************/
4089 static void
4090 igb_free_receive_buffers(struct rx_ring *rxr)
4091 {
4092         struct adapter          *adapter = rxr->adapter;
4093         struct igb_rx_buf       *rxbuf;
4094         int i;
4095
4096         INIT_DEBUGOUT("free_receive_structures: begin");
4097
4098         /* Cleanup any existing buffers */
4099         if (rxr->rx_buffers != NULL) {
4100                 for (i = 0; i < adapter->num_rx_desc; i++) {
4101                         rxbuf = &rxr->rx_buffers[i];
4102                         if (rxbuf->m_head != NULL) {
4103                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4104                                     BUS_DMASYNC_POSTREAD);
4105                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4106                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4107                                 m_freem(rxbuf->m_head);
4108                         }
4109                         if (rxbuf->m_pack != NULL) {
4110                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4111                                     BUS_DMASYNC_POSTREAD);
4112                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4113                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4114                                 m_freem(rxbuf->m_pack);
4115                         }
4116                         rxbuf->m_head = NULL;
4117                         rxbuf->m_pack = NULL;
4118                         if (rxbuf->hmap != NULL) {
4119                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4120                                 rxbuf->hmap = NULL;
4121                         }
4122                         if (rxbuf->pmap != NULL) {
4123                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4124                                 rxbuf->pmap = NULL;
4125                         }
4126                 }
4127                 if (rxr->rx_buffers != NULL) {
4128                         free(rxr->rx_buffers, M_DEVBUF);
4129                         rxr->rx_buffers = NULL;
4130                 }
4131         }
4132
4133         if (rxr->htag != NULL) {
4134                 bus_dma_tag_destroy(rxr->htag);
4135                 rxr->htag = NULL;
4136         }
4137         if (rxr->ptag != NULL) {
4138                 bus_dma_tag_destroy(rxr->ptag);
4139                 rxr->ptag = NULL;
4140         }
4141 }
4142
4143 static __inline void
4144 igb_rx_discard(struct rx_ring *rxr, int i)
4145 {
4146         struct adapter          *adapter = rxr->adapter;
4147         struct igb_rx_buf       *rbuf;
4148         struct mbuf             *mh, *mp;
4149
4150         rbuf = &rxr->rx_buffers[i];
4151         if (rxr->fmp != NULL) {
4152                 rxr->fmp->m_flags |= M_PKTHDR;
4153                 m_freem(rxr->fmp);
4154                 rxr->fmp = NULL;
4155                 rxr->lmp = NULL;
4156         }
4157
4158         mh = rbuf->m_head;
4159         mp = rbuf->m_pack;
4160
4161         /* Reuse loaded DMA map and just update mbuf chain */
4162         mh->m_len = MHLEN;
4163         mh->m_flags |= M_PKTHDR;
4164         mh->m_next = NULL;
4165
4166         mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz;
4167         mp->m_data = mp->m_ext.ext_buf;
4168         mp->m_next = NULL;
4169         return;
4170 }
4171
4172 static __inline void
4173 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4174 {
4175
4176         /*
4177          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4178          * should be computed by hardware. Also it should not have VLAN tag in
4179          * ethernet header.
4180          */
4181         if (rxr->lro_enabled &&
4182             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4183             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4184             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4185             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4186             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4187             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4188                 /*
4189                  * Send to the stack if:
4190                  **  - LRO not enabled, or
4191                  **  - no LRO resources, or
4192                  **  - lro enqueue fails
4193                  */
4194                 if (rxr->lro.lro_cnt != 0)
4195                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4196                                 return;
4197         }
4198         (*ifp->if_input)(ifp, m);
4199 }
4200
4201 /*********************************************************************
4202  *
4203  *  This routine executes in interrupt context. It replenishes
4204  *  the mbufs in the descriptor and sends data which has been
4205  *  dma'ed into host memory to upper layer.
4206  *
4207  *  We loop at most count times if count is > 0, or until done if
4208  *  count < 0.
4209  *
4210  *  Return TRUE if more to clean, FALSE otherwise
4211  *********************************************************************/
4212 static bool
4213 igb_rxeof(struct igb_queue *que, int count)
4214 {
4215         struct adapter          *adapter = que->adapter;
4216         struct rx_ring          *rxr = que->rxr;
4217         struct ifnet            *ifp = adapter->ifp;
4218         struct lro_ctrl         *lro = &rxr->lro;
4219         struct lro_entry        *queued;
4220         int                     i, processed = 0;
4221         u32                     ptype, staterr = 0;
4222         union e1000_adv_rx_desc *cur;
4223
4224         IGB_RX_LOCK(rxr);
4225         /* Sync the ring. */
4226         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4227             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4228
4229         /* Main clean loop */
4230         for (i = rxr->next_to_check; count != 0;) {
4231                 struct mbuf             *sendmp, *mh, *mp;
4232                 struct igb_rx_buf       *rxbuf;
4233                 u16                     hlen, plen, hdr, vtag;
4234                 bool                    eop = FALSE;
4235  
4236                 cur = &rxr->rx_base[i];
4237                 staterr = le32toh(cur->wb.upper.status_error);
4238                 if ((staterr & E1000_RXD_STAT_DD) == 0)
4239                         break;
4240                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4241                         break;
4242                 count--;
4243                 sendmp = mh = mp = NULL;
4244                 cur->wb.upper.status_error = 0;
4245                 rxbuf = &rxr->rx_buffers[i];
4246                 plen = le16toh(cur->wb.upper.length);
4247                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4248                 vtag = le16toh(cur->wb.upper.vlan);
4249                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4250                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4251
4252                 /* Make sure all segments of a bad packet are discarded */
4253                 if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4254                     (rxr->discard)) {
4255                         ifp->if_ierrors++;
4256                         ++rxr->rx_discarded;
4257                         if (!eop) /* Catch subsequent segs */
4258                                 rxr->discard = TRUE;
4259                         else
4260                                 rxr->discard = FALSE;
4261                         igb_rx_discard(rxr, i);
4262                         goto next_desc;
4263                 }
4264
4265                 /*
4266                 ** The way the hardware is configured to
4267                 ** split, it will ONLY use the header buffer
4268                 ** when header split is enabled, otherwise we
4269                 ** get normal behavior, ie, both header and
4270                 ** payload are DMA'd into the payload buffer.
4271                 **
4272                 ** The fmp test is to catch the case where a
4273                 ** packet spans multiple descriptors, in that
4274                 ** case only the first header is valid.
4275                 */
4276                 if (rxr->hdr_split && rxr->fmp == NULL) {
4277                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4278                             E1000_RXDADV_HDRBUFLEN_SHIFT;
4279                         if (hlen > IGB_HDR_BUF)
4280                                 hlen = IGB_HDR_BUF;
4281                         /* Handle the header mbuf */
4282                         mh = rxr->rx_buffers[i].m_head;
4283                         mh->m_len = hlen;
4284                         /* clear buf info for refresh */
4285                         rxbuf->m_head = NULL;
4286                         /*
4287                         ** Get the payload length, this
4288                         ** could be zero if its a small
4289                         ** packet.
4290                         */
4291                         if (plen > 0) {
4292                                 mp = rxr->rx_buffers[i].m_pack;
4293                                 mp->m_len = plen;
4294                                 mh->m_next = mp;
4295                                 /* clear buf info for refresh */
4296                                 rxbuf->m_pack = NULL;
4297                                 rxr->rx_split_packets++;
4298                         }
4299                 } else {
4300                         /*
4301                         ** Either no header split, or a
4302                         ** secondary piece of a fragmented
4303                         ** split packet.
4304                         */
4305                         mh = rxr->rx_buffers[i].m_pack;
4306                         mh->m_len = plen;
4307                         /* clear buf info for refresh */
4308                         rxbuf->m_pack = NULL;
4309                 }
4310
4311                 ++processed; /* So we know when to refresh */
4312
4313                 /* Initial frame - setup */
4314                 if (rxr->fmp == NULL) {
4315                         mh->m_pkthdr.len = mh->m_len;
4316                         /* Store the first mbuf */
4317                         rxr->fmp = mh;
4318                         rxr->lmp = mh;
4319                         if (mp != NULL) {
4320                                 /* Add payload if split */
4321                                 mh->m_pkthdr.len += mp->m_len;
4322                                 rxr->lmp = mh->m_next;
4323                         }
4324                 } else {
4325                         /* Chain mbuf's together */
4326                         rxr->lmp->m_next = mh;
4327                         rxr->lmp = rxr->lmp->m_next;
4328                         rxr->fmp->m_pkthdr.len += mh->m_len;
4329                 }
4330
4331                 if (eop) {
4332                         rxr->fmp->m_pkthdr.rcvif = ifp;
4333                         ifp->if_ipackets++;
4334                         rxr->rx_packets++;
4335                         /* capture data for AIM */
4336                         rxr->packets++;
4337                         rxr->bytes += rxr->fmp->m_pkthdr.len;
4338                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4339
4340                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4341                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
4342
4343                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4344                             (staterr & E1000_RXD_STAT_VP) != 0) {
4345                                 rxr->fmp->m_pkthdr.ether_vtag = vtag;
4346                                 rxr->fmp->m_flags |= M_VLANTAG;
4347                         }
4348 #if __FreeBSD_version >= 800000
4349                         rxr->fmp->m_pkthdr.flowid = que->msix;
4350                         rxr->fmp->m_flags |= M_FLOWID;
4351 #endif
4352                         sendmp = rxr->fmp;
4353                         /* Make sure to set M_PKTHDR. */
4354                         sendmp->m_flags |= M_PKTHDR;
4355                         rxr->fmp = NULL;
4356                         rxr->lmp = NULL;
4357                 }
4358
4359 next_desc:
4360                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4361                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4362
4363                 /* Advance our pointers to the next descriptor. */
4364                 if (++i == adapter->num_rx_desc)
4365                         i = 0;
4366                 /*
4367                 ** Send to the stack or LRO
4368                 */
4369                 if (sendmp != NULL)
4370                         igb_rx_input(rxr, ifp, sendmp, ptype);
4371
4372                 /* Every 8 descriptors we go to refresh mbufs */
4373                 if (processed == 8) {
4374                         igb_refresh_mbufs(rxr, i);
4375                         processed = 0;
4376                 }
4377         }
4378
4379         /* Catch any remainders */
4380         if (processed != 0) {
4381                 igb_refresh_mbufs(rxr, i);
4382                 processed = 0;
4383         }
4384
4385         rxr->next_to_check = i;
4386
4387         /*
4388          * Flush any outstanding LRO work
4389          */
4390         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4391                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4392                 tcp_lro_flush(lro, queued);
4393         }
4394
4395         IGB_RX_UNLOCK(rxr);
4396
4397         /*
4398         ** We still have cleaning to do?
4399         ** Schedule another interrupt if so.
4400         */
4401         if ((staterr & E1000_RXD_STAT_DD) != 0)
4402                 return (TRUE);
4403
4404         return (FALSE);
4405 }
4406
4407 /*********************************************************************
4408  *
4409  *  Verify that the hardware indicated that the checksum is valid.
4410  *  Inform the stack about the status of checksum so that stack
4411  *  doesn't spend time verifying the checksum.
4412  *
4413  *********************************************************************/
4414 static void
4415 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4416 {
4417         u16 status = (u16)staterr;
4418         u8  errors = (u8) (staterr >> 24);
4419         int sctp;
4420
4421         /* Ignore Checksum bit is set */
4422         if (status & E1000_RXD_STAT_IXSM) {
4423                 mp->m_pkthdr.csum_flags = 0;
4424                 return;
4425         }
4426
4427         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4428             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4429                 sctp = 1;
4430         else
4431                 sctp = 0;
4432         if (status & E1000_RXD_STAT_IPCS) {
4433                 /* Did it pass? */
4434                 if (!(errors & E1000_RXD_ERR_IPE)) {
4435                         /* IP Checksum Good */
4436                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4437                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4438                 } else
4439                         mp->m_pkthdr.csum_flags = 0;
4440         }
4441
4442         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4443                 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4444 #if __FreeBSD_version >= 800000
4445                 if (sctp) /* reassign */
4446                         type = CSUM_SCTP_VALID;
4447 #endif
4448                 /* Did it pass? */
4449                 if (!(errors & E1000_RXD_ERR_TCPE)) {
4450                         mp->m_pkthdr.csum_flags |= type;
4451                         if (sctp == 0)
4452                                 mp->m_pkthdr.csum_data = htons(0xffff);
4453                 }
4454         }
4455         return;
4456 }
4457
4458 /*
4459  * This routine is run via an vlan
4460  * config EVENT
4461  */
4462 static void
4463 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4464 {
4465         struct adapter  *adapter = ifp->if_softc;
4466         u32             index, bit;
4467
4468         if (ifp->if_softc !=  arg)   /* Not our event */
4469                 return;
4470
4471         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4472                 return;
4473
4474         index = (vtag >> 5) & 0x7F;
4475         bit = vtag & 0x1F;
4476         igb_shadow_vfta[index] |= (1 << bit);
4477         ++adapter->num_vlans;
4478         /* Re-init to load the changes */
4479         igb_init(adapter);
4480 }
4481
4482 /*
4483  * This routine is run via an vlan
4484  * unconfig EVENT
4485  */
4486 static void
4487 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4488 {
4489         struct adapter  *adapter = ifp->if_softc;
4490         u32             index, bit;
4491
4492         if (ifp->if_softc !=  arg)
4493                 return;
4494
4495         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4496                 return;
4497
4498         index = (vtag >> 5) & 0x7F;
4499         bit = vtag & 0x1F;
4500         igb_shadow_vfta[index] &= ~(1 << bit);
4501         --adapter->num_vlans;
4502         /* Re-init to load the changes */
4503         igb_init(adapter);
4504 }
4505
4506 static void
4507 igb_setup_vlan_hw_support(struct adapter *adapter)
4508 {
4509         struct e1000_hw *hw = &adapter->hw;
4510         u32             reg;
4511
4512         /*
4513         ** We get here thru init_locked, meaning
4514         ** a soft reset, this has already cleared
4515         ** the VFTA and other state, so if there
4516         ** have been no vlan's registered do nothing.
4517         */
4518         if (adapter->num_vlans == 0)
4519                 return;
4520
4521         /*
4522         ** A soft reset zero's out the VFTA, so
4523         ** we need to repopulate it now.
4524         */
4525         for (int i = 0; i < IGB_VFTA_SIZE; i++)
4526                 if (igb_shadow_vfta[i] != 0)
4527                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4528                             i, igb_shadow_vfta[i]);
4529
4530         reg = E1000_READ_REG(hw, E1000_CTRL);
4531         reg |= E1000_CTRL_VME;
4532         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4533
4534         /* Enable the Filter Table */
4535         reg = E1000_READ_REG(hw, E1000_RCTL);
4536         reg &= ~E1000_RCTL_CFIEN;
4537         reg |= E1000_RCTL_VFE;
4538         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4539
4540         /* Update the frame size */
4541         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4542             adapter->max_frame_size + VLAN_TAG_SIZE);
4543 }
4544
4545 static void
4546 igb_enable_intr(struct adapter *adapter)
4547 {
4548         /* With RSS set up what to auto clear */
4549         if (adapter->msix_mem) {
4550                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4551                     adapter->eims_mask);
4552                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4553                     adapter->eims_mask);
4554                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4555                     adapter->eims_mask);
4556                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4557                     E1000_IMS_LSC);
4558         } else {
4559                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4560                     IMS_ENABLE_MASK);
4561         }
4562         E1000_WRITE_FLUSH(&adapter->hw);
4563
4564         return;
4565 }
4566
4567 static void
4568 igb_disable_intr(struct adapter *adapter)
4569 {
4570         if (adapter->msix_mem) {
4571                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4572                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4573         } 
4574         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4575         E1000_WRITE_FLUSH(&adapter->hw);
4576         return;
4577 }
4578
4579 /*
4580  * Bit of a misnomer, what this really means is
4581  * to enable OS management of the system... aka
4582  * to disable special hardware management features 
4583  */
4584 static void
4585 igb_init_manageability(struct adapter *adapter)
4586 {
4587         if (adapter->has_manage) {
4588                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4589                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4590
4591                 /* disable hardware interception of ARP */
4592                 manc &= ~(E1000_MANC_ARP_EN);
4593
4594                 /* enable receiving management packets to the host */
4595                 manc |= E1000_MANC_EN_MNG2HOST;
4596                 manc2h |= 1 << 5;  /* Mng Port 623 */
4597                 manc2h |= 1 << 6;  /* Mng Port 664 */
4598                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4599                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4600         }
4601 }
4602
4603 /*
4604  * Give control back to hardware management
4605  * controller if there is one.
4606  */
4607 static void
4608 igb_release_manageability(struct adapter *adapter)
4609 {
4610         if (adapter->has_manage) {
4611                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4612
4613                 /* re-enable hardware interception of ARP */
4614                 manc |= E1000_MANC_ARP_EN;
4615                 manc &= ~E1000_MANC_EN_MNG2HOST;
4616
4617                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4618         }
4619 }
4620
4621 /*
4622  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4623  * For ASF and Pass Through versions of f/w this means that
4624  * the driver is loaded. 
4625  *
4626  */
4627 static void
4628 igb_get_hw_control(struct adapter *adapter)
4629 {
4630         u32 ctrl_ext;
4631
4632         /* Let firmware know the driver has taken over */
4633         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4634         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4635             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4636 }
4637
4638 /*
4639  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4640  * For ASF and Pass Through versions of f/w this means that the
4641  * driver is no longer loaded.
4642  *
4643  */
4644 static void
4645 igb_release_hw_control(struct adapter *adapter)
4646 {
4647         u32 ctrl_ext;
4648
4649         /* Let firmware taken over control of h/w */
4650         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4651         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4652             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4653 }
4654
4655 static int
4656 igb_is_valid_ether_addr(uint8_t *addr)
4657 {
4658         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4659
4660         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4661                 return (FALSE);
4662         }
4663
4664         return (TRUE);
4665 }
4666
4667
4668 /*
4669  * Enable PCI Wake On Lan capability
4670  */
4671 static void
4672 igb_enable_wakeup(device_t dev)
4673 {
4674         u16     cap, status;
4675         u8      id;
4676
4677         /* First find the capabilities pointer*/
4678         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4679         /* Read the PM Capabilities */
4680         id = pci_read_config(dev, cap, 1);
4681         if (id != PCIY_PMG)     /* Something wrong */
4682                 return;
4683         /* OK, we have the power capabilities, so
4684            now get the status register */
4685         cap += PCIR_POWER_STATUS;
4686         status = pci_read_config(dev, cap, 2);
4687         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4688         pci_write_config(dev, cap, status, 2);
4689         return;
4690 }
4691
4692 static void
4693 igb_led_func(void *arg, int onoff)
4694 {
4695         struct adapter  *adapter = arg;
4696
4697         IGB_CORE_LOCK(adapter);
4698         if (onoff) {
4699                 e1000_setup_led(&adapter->hw);
4700                 e1000_led_on(&adapter->hw);
4701         } else {
4702                 e1000_led_off(&adapter->hw);
4703                 e1000_cleanup_led(&adapter->hw);
4704         }
4705         IGB_CORE_UNLOCK(adapter);
4706 }
4707
4708 /**********************************************************************
4709  *
4710  *  Update the board statistics counters.
4711  *
4712  **********************************************************************/
4713 static void
4714 igb_update_stats_counters(struct adapter *adapter)
4715 {
4716         struct ifnet   *ifp;
4717
4718         if (adapter->hw.phy.media_type == e1000_media_type_copper ||
4719            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4720                 adapter->stats.symerrs +=
4721                     E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4722                 adapter->stats.sec +=
4723                     E1000_READ_REG(&adapter->hw, E1000_SEC);
4724         }
4725         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4726         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4727         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4728         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4729
4730         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4731         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4732         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4733         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4734         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4735         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4736         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4737         adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4738         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4739         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4740         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4741         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4742         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4743         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4744         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4745         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4746         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4747         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4748         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4749         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4750
4751         /* For the 64-bit byte counters the low dword must be read first. */
4752         /* Both registers clear on the read of the high dword */
4753
4754         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4755         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4756
4757         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4758         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4759         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4760         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4761         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4762
4763         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4764         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4765
4766         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4767         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4768         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4769         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4770         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4771         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4772         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4773         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4774         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4775         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4776
4777         adapter->stats.algnerrc += 
4778                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4779         adapter->stats.rxerrc += 
4780                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4781         adapter->stats.tncrs += 
4782                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4783         adapter->stats.cexterr += 
4784                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4785         adapter->stats.tsctc += 
4786                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4787         adapter->stats.tsctfc += 
4788                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4789         ifp = adapter->ifp;
4790
4791         ifp->if_collisions = adapter->stats.colc;
4792
4793         /* Rx Errors */
4794         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4795             adapter->stats.crcerrs + adapter->stats.algnerrc +
4796             adapter->stats.ruc + adapter->stats.roc +
4797             adapter->stats.mpc + adapter->stats.cexterr;
4798
4799         /* Tx Errors */
4800         ifp->if_oerrors = adapter->stats.ecol +
4801             adapter->stats.latecol + adapter->watchdog_events;
4802 }
4803
4804
4805 /**********************************************************************
4806  *
4807  *  This routine is called only when igb_display_debug_stats is enabled.
4808  *  This routine provides a way to take a look at important statistics
4809  *  maintained by the driver and hardware.
4810  *
4811  **********************************************************************/
4812 static void
4813 igb_print_debug_info(struct adapter *adapter)
4814 {
4815         device_t dev = adapter->dev;
4816         struct igb_queue *que = adapter->queues;
4817         struct rx_ring *rxr = adapter->rx_rings;
4818         struct tx_ring *txr = adapter->tx_rings;
4819         uint8_t *hw_addr = adapter->hw.hw_addr;
4820
4821         device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4822         device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4823             E1000_READ_REG(&adapter->hw, E1000_CTRL),
4824             E1000_READ_REG(&adapter->hw, E1000_RCTL));
4825
4826 #if     (DEBUG_HW > 0)  /* Dont output these errors normally */
4827         device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4828             E1000_READ_REG(&adapter->hw, E1000_IMS),
4829             E1000_READ_REG(&adapter->hw, E1000_EIMS));
4830 #endif
4831
4832         device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4833             ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4834             (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4835         device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4836             adapter->hw.fc.high_water,
4837             adapter->hw.fc.low_water);
4838
4839         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
4840                 device_printf(dev, "Queue(%d) tdh = %d, tdt = %d  ", i,
4841                     E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4842                     E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4843                 device_printf(dev, "rdh = %d, rdt = %d\n",
4844                     E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4845                     E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4846                 device_printf(dev, "TX(%d) no descriptors avail event = %lld\n",
4847                     txr->me, (long long)txr->no_desc_avail);
4848                 device_printf(dev, "TX(%d) Packets sent = %lld\n",
4849                     txr->me, (long long)txr->tx_packets);
4850                 device_printf(dev, "RX(%d) Packets received = %lld  ",
4851                     rxr->me, (long long)rxr->rx_packets);
4852         }
4853
4854         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4855                 struct lro_ctrl *lro = &rxr->lro;
4856                 device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4857                     E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4858                     E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4859                 device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4860                     (long long)rxr->rx_packets);
4861                 device_printf(dev, " Split Packets = %lld ",
4862                     (long long)rxr->rx_split_packets);
4863                 device_printf(dev, " Byte count = %lld\n",
4864                     (long long)rxr->rx_bytes);
4865                 device_printf(dev,"RX(%d) LRO Queued= %d  ",
4866                     i, lro->lro_queued);
4867                 device_printf(dev,"LRO Flushed= %d\n",lro->lro_flushed);
4868         }
4869
4870         for (int i = 0; i < adapter->num_queues; i++, que++)
4871                 device_printf(dev,"QUE(%d) IRQs = %llx\n",
4872                     i, (long long)que->irqs);
4873
4874         device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4875         device_printf(dev, "Mbuf defrag failed = %ld\n",
4876             adapter->mbuf_defrag_failed);
4877         device_printf(dev, "Std mbuf header failed = %ld\n",
4878             adapter->mbuf_header_failed);
4879         device_printf(dev, "Std mbuf packet failed = %ld\n",
4880             adapter->mbuf_packet_failed);
4881         device_printf(dev, "Driver dropped packets = %ld\n",
4882             adapter->dropped_pkts);
4883         device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4884                 adapter->no_tx_dma_setup);
4885 }
4886
4887 static void
4888 igb_print_hw_stats(struct adapter *adapter)
4889 {
4890         device_t dev = adapter->dev;
4891
4892         device_printf(dev, "Excessive collisions = %lld\n",
4893             (long long)adapter->stats.ecol);
4894 #if     (DEBUG_HW > 0)  /* Dont output these errors normally */
4895         device_printf(dev, "Symbol errors = %lld\n",
4896             (long long)adapter->stats.symerrs);
4897 #endif
4898         device_printf(dev, "Sequence errors = %lld\n",
4899             (long long)adapter->stats.sec);
4900         device_printf(dev, "Defer count = %lld\n",
4901             (long long)adapter->stats.dc);
4902         device_printf(dev, "Missed Packets = %lld\n",
4903             (long long)adapter->stats.mpc);
4904         device_printf(dev, "Receive No Buffers = %lld\n",
4905             (long long)adapter->stats.rnbc);
4906         /* RLEC is inaccurate on some hardware, calculate our own. */
4907         device_printf(dev, "Receive Length Errors = %lld\n",
4908             ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4909         device_printf(dev, "Receive errors = %lld\n",
4910             (long long)adapter->stats.rxerrc);
4911         device_printf(dev, "Crc errors = %lld\n",
4912             (long long)adapter->stats.crcerrs);
4913         device_printf(dev, "Alignment errors = %lld\n",
4914             (long long)adapter->stats.algnerrc);
4915         /* On 82575 these are collision counts */
4916         device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4917             (long long)adapter->stats.cexterr);
4918         device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4919         device_printf(dev, "watchdog timeouts = %ld\n",
4920             adapter->watchdog_events);
4921         device_printf(dev, "XON Rcvd = %lld\n",
4922             (long long)adapter->stats.xonrxc);
4923         device_printf(dev, "XON Xmtd = %lld\n",
4924             (long long)adapter->stats.xontxc);
4925         device_printf(dev, "XOFF Rcvd = %lld\n",
4926             (long long)adapter->stats.xoffrxc);
4927         device_printf(dev, "XOFF Xmtd = %lld\n",
4928             (long long)adapter->stats.xofftxc);
4929         device_printf(dev, "Good Packets Rcvd = %lld\n",
4930             (long long)adapter->stats.gprc);
4931         device_printf(dev, "Good Packets Xmtd = %lld\n",
4932             (long long)adapter->stats.gptc);
4933         device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4934             (long long)adapter->stats.tsctc);
4935         device_printf(dev, "TSO Contexts Failed = %lld\n",
4936             (long long)adapter->stats.tsctfc);
4937 }
4938
4939 /**********************************************************************
4940  *
4941  *  This routine provides a way to dump out the adapter eeprom,
4942  *  often a useful debug/service tool. This only dumps the first
4943  *  32 words, stuff that matters is in that extent.
4944  *
4945  **********************************************************************/
4946 static void
4947 igb_print_nvm_info(struct adapter *adapter)
4948 {
4949         u16     eeprom_data;
4950         int     i, j, row = 0;
4951
4952         /* Its a bit crude, but it gets the job done */
4953         printf("\nInterface EEPROM Dump:\n");
4954         printf("Offset\n0x0000  ");
4955         for (i = 0, j = 0; i < 32; i++, j++) {
4956                 if (j == 8) { /* Make the offset block */
4957                         j = 0; ++row;
4958                         printf("\n0x00%x0  ",row);
4959                 }
4960                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4961                 printf("%04x ", eeprom_data);
4962         }
4963         printf("\n");
4964 }
4965
4966 static int
4967 igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4968 {
4969         struct adapter *adapter;
4970         int error;
4971         int result;
4972
4973         result = -1;
4974         error = sysctl_handle_int(oidp, &result, 0, req);
4975
4976         if (error || !req->newptr)
4977                 return (error);
4978
4979         if (result == 1) {
4980                 adapter = (struct adapter *)arg1;
4981                 igb_print_debug_info(adapter);
4982         }
4983         /*
4984          * This value will cause a hex dump of the
4985          * first 32 16-bit words of the EEPROM to
4986          * the screen.
4987          */
4988         if (result == 2) {
4989                 adapter = (struct adapter *)arg1;
4990                 igb_print_nvm_info(adapter);
4991         }
4992
4993         return (error);
4994 }
4995
4996
4997 static int
4998 igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
4999 {
5000         struct adapter *adapter;
5001         int error;
5002         int result;
5003
5004         result = -1;
5005         error = sysctl_handle_int(oidp, &result, 0, req);
5006
5007         if (error || !req->newptr)
5008                 return (error);
5009
5010         if (result == 1) {
5011                 adapter = (struct adapter *)arg1;
5012                 igb_print_hw_stats(adapter);
5013         }
5014
5015         return (error);
5016 }
5017
5018 static void
5019 igb_add_rx_process_limit(struct adapter *adapter, const char *name,
5020         const char *description, int *limit, int value)
5021 {
5022         *limit = value;
5023         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5024             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5025             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5026 }