]> CyberLeo.Net >> Repos - FreeBSD/stable/8.git/blob - sys/dev/e1000/if_igb.c
MFC bug fixes to em and igb from HEAD.
[FreeBSD/stable/8.git] / sys / dev / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2010, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #ifdef HAVE_KERNEL_OPTION_HEADERS
37 #include "opt_device_polling.h"
38 #include "opt_inet.h"
39 #include "opt_altq.h"
40 #endif
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #if __FreeBSD_version >= 800000
45 #include <sys/buf_ring.h>
46 #endif
47 #include <sys/bus.h>
48 #include <sys/endian.h>
49 #include <sys/kernel.h>
50 #include <sys/kthread.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/rman.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/taskqueue.h>
59 #include <sys/eventhandler.h>
60 #include <sys/pcpu.h>
61 #include <sys/smp.h>
62 #include <machine/smp.h>
63 #include <machine/bus.h>
64 #include <machine/resource.h>
65
66 #include <net/bpf.h>
67 #include <net/ethernet.h>
68 #include <net/if.h>
69 #include <net/if_arp.h>
70 #include <net/if_dl.h>
71 #include <net/if_media.h>
72
73 #include <net/if_types.h>
74 #include <net/if_vlan_var.h>
75
76 #include <netinet/in_systm.h>
77 #include <netinet/in.h>
78 #include <netinet/if_ether.h>
79 #include <netinet/ip.h>
80 #include <netinet/ip6.h>
81 #include <netinet/tcp.h>
82 #include <netinet/tcp_lro.h>
83 #include <netinet/udp.h>
84
85 #include <machine/in_cksum.h>
86 #include <dev/led/led.h>
87 #include <dev/pci/pcivar.h>
88 #include <dev/pci/pcireg.h>
89
90 #include "e1000_api.h"
91 #include "e1000_82575.h"
92 #include "if_igb.h"
93
94 /*********************************************************************
95  *  Set this to one to display debug statistics
96  *********************************************************************/
97 int     igb_display_debug_stats = 0;
98
99 /*********************************************************************
100  *  Driver version:
101  *********************************************************************/
102 char igb_driver_version[] = "version - 1.9.5";
103
104
105 /*********************************************************************
106  *  PCI Device ID Table
107  *
108  *  Used by probe to select devices to load on
109  *  Last field stores an index into e1000_strings
110  *  Last entry must be all 0s
111  *
112  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
113  *********************************************************************/
114
115 static igb_vendor_info_t igb_vendor_info_array[] =
116 {
117         { 0x8086, E1000_DEV_ID_82575EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
119                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82576,           PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82576_NS,        PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82576_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82576_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
128                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
130                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82580_COPPER,    PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82580_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82580_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82580_SGMII,     PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
136                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
137         /* required last entry */
138         { 0, 0, 0, 0, 0}
139 };
140
141 /*********************************************************************
142  *  Table of branding strings for all supported NICs.
143  *********************************************************************/
144
145 static char *igb_strings[] = {
146         "Intel(R) PRO/1000 Network Connection"
147 };
148
149 /*********************************************************************
150  *  Function prototypes
151  *********************************************************************/
152 static int      igb_probe(device_t);
153 static int      igb_attach(device_t);
154 static int      igb_detach(device_t);
155 static int      igb_shutdown(device_t);
156 static int      igb_suspend(device_t);
157 static int      igb_resume(device_t);
158 static void     igb_start(struct ifnet *);
159 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
160 #if __FreeBSD_version >= 800000
161 static int      igb_mq_start(struct ifnet *, struct mbuf *);
162 static int      igb_mq_start_locked(struct ifnet *,
163                     struct tx_ring *, struct mbuf *);
164 static void     igb_qflush(struct ifnet *);
165 #endif
166 static int      igb_ioctl(struct ifnet *, u_long, caddr_t);
167 static void     igb_init(void *);
168 static void     igb_init_locked(struct adapter *);
169 static void     igb_stop(void *);
170 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
171 static int      igb_media_change(struct ifnet *);
172 static void     igb_identify_hardware(struct adapter *);
173 static int      igb_allocate_pci_resources(struct adapter *);
174 static int      igb_allocate_msix(struct adapter *);
175 static int      igb_allocate_legacy(struct adapter *);
176 static int      igb_setup_msix(struct adapter *);
177 static void     igb_free_pci_resources(struct adapter *);
178 static void     igb_local_timer(void *);
179 static void     igb_reset(struct adapter *);
180 static void     igb_setup_interface(device_t, struct adapter *);
181 static int      igb_allocate_queues(struct adapter *);
182 static void     igb_configure_queues(struct adapter *);
183
184 static int      igb_allocate_transmit_buffers(struct tx_ring *);
185 static void     igb_setup_transmit_structures(struct adapter *);
186 static void     igb_setup_transmit_ring(struct tx_ring *);
187 static void     igb_initialize_transmit_units(struct adapter *);
188 static void     igb_free_transmit_structures(struct adapter *);
189 static void     igb_free_transmit_buffers(struct tx_ring *);
190
191 static int      igb_allocate_receive_buffers(struct rx_ring *);
192 static int      igb_setup_receive_structures(struct adapter *);
193 static int      igb_setup_receive_ring(struct rx_ring *);
194 static void     igb_initialize_receive_units(struct adapter *);
195 static void     igb_free_receive_structures(struct adapter *);
196 static void     igb_free_receive_buffers(struct rx_ring *);
197 static void     igb_free_receive_ring(struct rx_ring *);
198
199 static void     igb_enable_intr(struct adapter *);
200 static void     igb_disable_intr(struct adapter *);
201 static void     igb_update_stats_counters(struct adapter *);
202 static bool     igb_txeof(struct tx_ring *);
203
204 static __inline void igb_rx_discard(struct rx_ring *, int);
205 static __inline void igb_rx_input(struct rx_ring *,
206                     struct ifnet *, struct mbuf *, u32);
207
208 static bool     igb_rxeof(struct igb_queue *, int);
209 static void     igb_rx_checksum(u32, struct mbuf *, u32);
210 static int      igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
211 static bool     igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
212 static void     igb_set_promisc(struct adapter *);
213 static void     igb_disable_promisc(struct adapter *);
214 static void     igb_set_multi(struct adapter *);
215 static void     igb_print_hw_stats(struct adapter *);
216 static void     igb_update_link_status(struct adapter *);
217 static void     igb_refresh_mbufs(struct rx_ring *, int);
218
219 static void     igb_register_vlan(void *, struct ifnet *, u16);
220 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
221 static void     igb_setup_vlan_hw_support(struct adapter *);
222
223 static int      igb_xmit(struct tx_ring *, struct mbuf **);
224 static int      igb_dma_malloc(struct adapter *, bus_size_t,
225                     struct igb_dma_alloc *, int);
226 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
227 static void     igb_print_debug_info(struct adapter *);
228 static void     igb_print_nvm_info(struct adapter *);
229 static int      igb_is_valid_ether_addr(u8 *);
230 static int      igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
231 static int      igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
232 /* Management and WOL Support */
233 static void     igb_init_manageability(struct adapter *);
234 static void     igb_release_manageability(struct adapter *);
235 static void     igb_get_hw_control(struct adapter *);
236 static void     igb_release_hw_control(struct adapter *);
237 static void     igb_enable_wakeup(device_t);
238 static void     igb_led_func(void *, int);
239
240 static int      igb_irq_fast(void *);
241 static void     igb_add_rx_process_limit(struct adapter *, const char *,
242                     const char *, int *, int);
243 static void     igb_handle_rxtx(void *context, int pending);
244 static void     igb_handle_que(void *context, int pending);
245 static void     igb_handle_link(void *context, int pending);
246
247 /* These are MSIX only irq handlers */
248 static void     igb_msix_que(void *);
249 static void     igb_msix_link(void *);
250
251 #ifdef DEVICE_POLLING
252 static poll_handler_t igb_poll;
253 #endif /* POLLING */
254
255 /*********************************************************************
256  *  FreeBSD Device Interface Entry Points
257  *********************************************************************/
258
259 static device_method_t igb_methods[] = {
260         /* Device interface */
261         DEVMETHOD(device_probe, igb_probe),
262         DEVMETHOD(device_attach, igb_attach),
263         DEVMETHOD(device_detach, igb_detach),
264         DEVMETHOD(device_shutdown, igb_shutdown),
265         DEVMETHOD(device_suspend, igb_suspend),
266         DEVMETHOD(device_resume, igb_resume),
267         {0, 0}
268 };
269
270 static driver_t igb_driver = {
271         "igb", igb_methods, sizeof(struct adapter),
272 };
273
274 static devclass_t igb_devclass;
275 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
276 MODULE_DEPEND(igb, pci, 1, 1, 1);
277 MODULE_DEPEND(igb, ether, 1, 1, 1);
278
279 /*********************************************************************
280  *  Tunable default values.
281  *********************************************************************/
282
283 /* Descriptor defaults */
284 static int igb_rxd = IGB_DEFAULT_RXD;
285 static int igb_txd = IGB_DEFAULT_TXD;
286 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
287 TUNABLE_INT("hw.igb.txd", &igb_txd);
288
289 /*
290 ** AIM: Adaptive Interrupt Moderation
291 ** which means that the interrupt rate
292 ** is varied over time based on the
293 ** traffic for that interrupt vector
294 */
295 static int igb_enable_aim = TRUE;
296 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
297
298 /*
299  * MSIX should be the default for best performance,
300  * but this allows it to be forced off for testing.
301  */         
302 static int igb_enable_msix = 1;
303 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
304
305 /*
306  * Header split has seemed to be beneficial in
307  * many circumstances tested, however there have
308  * been some stability issues, so the default is
309  * off. 
310  */
311 static bool igb_header_split = FALSE;
312 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
313
314 /*
315 ** This will autoconfigure based on
316 ** the number of CPUs if left at 0.
317 */
318 static int igb_num_queues = 0;
319 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
320
321 /* How many packets rxeof tries to clean at a time */
322 static int igb_rx_process_limit = 100;
323 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
324
325 /* Flow control setting - default to FULL */
326 static int igb_fc_setting = e1000_fc_full;
327 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
328
329 /*
330 ** Shadow VFTA table, this is needed because
331 ** the real filter table gets cleared during
332 ** a soft reset and the driver needs to be able
333 ** to repopulate it.
334 */
335 static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
336
337
338 /*********************************************************************
339  *  Device identification routine
340  *
341  *  igb_probe determines if the driver should be loaded on
342  *  adapter based on PCI vendor/device id of the adapter.
343  *
344  *  return BUS_PROBE_DEFAULT on success, positive on failure
345  *********************************************************************/
346
347 static int
348 igb_probe(device_t dev)
349 {
350         char            adapter_name[60];
351         uint16_t        pci_vendor_id = 0;
352         uint16_t        pci_device_id = 0;
353         uint16_t        pci_subvendor_id = 0;
354         uint16_t        pci_subdevice_id = 0;
355         igb_vendor_info_t *ent;
356
357         INIT_DEBUGOUT("igb_probe: begin");
358
359         pci_vendor_id = pci_get_vendor(dev);
360         if (pci_vendor_id != IGB_VENDOR_ID)
361                 return (ENXIO);
362
363         pci_device_id = pci_get_device(dev);
364         pci_subvendor_id = pci_get_subvendor(dev);
365         pci_subdevice_id = pci_get_subdevice(dev);
366
367         ent = igb_vendor_info_array;
368         while (ent->vendor_id != 0) {
369                 if ((pci_vendor_id == ent->vendor_id) &&
370                     (pci_device_id == ent->device_id) &&
371
372                     ((pci_subvendor_id == ent->subvendor_id) ||
373                     (ent->subvendor_id == PCI_ANY_ID)) &&
374
375                     ((pci_subdevice_id == ent->subdevice_id) ||
376                     (ent->subdevice_id == PCI_ANY_ID))) {
377                         sprintf(adapter_name, "%s %s",
378                                 igb_strings[ent->index],
379                                 igb_driver_version);
380                         device_set_desc_copy(dev, adapter_name);
381                         return (BUS_PROBE_DEFAULT);
382                 }
383                 ent++;
384         }
385
386         return (ENXIO);
387 }
388
389 /*********************************************************************
390  *  Device initialization routine
391  *
392  *  The attach entry point is called when the driver is being loaded.
393  *  This routine identifies the type of hardware, allocates all resources
394  *  and initializes the hardware.
395  *
396  *  return 0 on success, positive on failure
397  *********************************************************************/
398
399 static int
400 igb_attach(device_t dev)
401 {
402         struct adapter  *adapter;
403         int             error = 0;
404         u16             eeprom_data;
405
406         INIT_DEBUGOUT("igb_attach: begin");
407
408         adapter = device_get_softc(dev);
409         adapter->dev = adapter->osdep.dev = dev;
410         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
411
412         /* SYSCTL stuff */
413         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
414             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
415             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
416             igb_sysctl_debug_info, "I", "Debug Information");
417
418         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
419             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
420             OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
421             igb_sysctl_stats, "I", "Statistics");
422
423         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
424             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
425             OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
426             &igb_fc_setting, 0, "Flow Control");
427
428         SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
429             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
430             OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
431             &igb_enable_aim, 1, "Interrupt Moderation");
432
433         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
434
435         /* Determine hardware and mac info */
436         igb_identify_hardware(adapter);
437
438         /* Setup PCI resources */
439         if (igb_allocate_pci_resources(adapter)) {
440                 device_printf(dev, "Allocation of PCI resources failed\n");
441                 error = ENXIO;
442                 goto err_pci;
443         }
444
445         /* Do Shared Code initialization */
446         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
447                 device_printf(dev, "Setup of Shared code failed\n");
448                 error = ENXIO;
449                 goto err_pci;
450         }
451
452         e1000_get_bus_info(&adapter->hw);
453
454         /* Sysctls for limiting the amount of work done in the taskqueue */
455         igb_add_rx_process_limit(adapter, "rx_processing_limit",
456             "max number of rx packets to process", &adapter->rx_process_limit,
457             igb_rx_process_limit);
458
459         /*
460          * Validate number of transmit and receive descriptors. It
461          * must not exceed hardware maximum, and must be multiple
462          * of E1000_DBA_ALIGN.
463          */
464         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
465             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
466                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
467                     IGB_DEFAULT_TXD, igb_txd);
468                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
469         } else
470                 adapter->num_tx_desc = igb_txd;
471         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
472             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
473                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
474                     IGB_DEFAULT_RXD, igb_rxd);
475                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
476         } else
477                 adapter->num_rx_desc = igb_rxd;
478
479         adapter->hw.mac.autoneg = DO_AUTO_NEG;
480         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
481         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
482
483         /* Copper options */
484         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
485                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
486                 adapter->hw.phy.disable_polarity_correction = FALSE;
487                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
488         }
489
490         /*
491          * Set the frame limits assuming
492          * standard ethernet sized frames.
493          */
494         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
495         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
496
497         /*
498         ** Allocate and Setup Queues
499         */
500         if (igb_allocate_queues(adapter)) {
501                 error = ENOMEM;
502                 goto err_pci;
503         }
504
505         /*
506         ** Start from a known state, this is
507         ** important in reading the nvm and
508         ** mac from that.
509         */
510         e1000_reset_hw(&adapter->hw);
511
512         /* Make sure we have a good EEPROM before we read from it */
513         if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
514                 /*
515                 ** Some PCI-E parts fail the first check due to
516                 ** the link being in sleep state, call it again,
517                 ** if it fails a second time its a real issue.
518                 */
519                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
520                         device_printf(dev,
521                             "The EEPROM Checksum Is Not Valid\n");
522                         error = EIO;
523                         goto err_late;
524                 }
525         }
526
527         /*
528         ** Copy the permanent MAC address out of the EEPROM
529         */
530         if (e1000_read_mac_addr(&adapter->hw) < 0) {
531                 device_printf(dev, "EEPROM read error while reading MAC"
532                     " address\n");
533                 error = EIO;
534                 goto err_late;
535         }
536         /* Check its sanity */
537         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
538                 device_printf(dev, "Invalid MAC address\n");
539                 error = EIO;
540                 goto err_late;
541         }
542
543         /* 
544         ** Configure Interrupts
545         */
546         if ((adapter->msix > 1) && (igb_enable_msix))
547                 error = igb_allocate_msix(adapter);
548         else /* MSI or Legacy */
549                 error = igb_allocate_legacy(adapter);
550         if (error)
551                 goto err_late;
552
553         /* Setup OS specific network interface */
554         igb_setup_interface(dev, adapter);
555
556         /* Now get a good starting state */
557         igb_reset(adapter);
558
559         /* Initialize statistics */
560         igb_update_stats_counters(adapter);
561
562         adapter->hw.mac.get_link_status = 1;
563         igb_update_link_status(adapter);
564
565         /* Indicate SOL/IDER usage */
566         if (e1000_check_reset_block(&adapter->hw))
567                 device_printf(dev,
568                     "PHY reset is blocked due to SOL/IDER session.\n");
569
570         /* Determine if we have to control management hardware */
571         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
572
573         /*
574          * Setup Wake-on-Lan
575          */
576         /* APME bit in EEPROM is mapped to WUC.APME */
577         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
578         if (eeprom_data)
579                 adapter->wol = E1000_WUFC_MAG;
580
581         /* Register for VLAN events */
582         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
583              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
584         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
585              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
586
587         /* Tell the stack that the interface is not active */
588         adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
589
590         adapter->led_dev = led_create(igb_led_func, adapter,
591             device_get_nameunit(dev));
592
593         INIT_DEBUGOUT("igb_attach: end");
594
595         return (0);
596
597 err_late:
598         igb_free_transmit_structures(adapter);
599         igb_free_receive_structures(adapter);
600         igb_release_hw_control(adapter);
601 err_pci:
602         igb_free_pci_resources(adapter);
603         IGB_CORE_LOCK_DESTROY(adapter);
604
605         return (error);
606 }
607
608 /*********************************************************************
609  *  Device removal routine
610  *
611  *  The detach entry point is called when the driver is being removed.
612  *  This routine stops the adapter and deallocates all the resources
613  *  that were allocated for driver operation.
614  *
615  *  return 0 on success, positive on failure
616  *********************************************************************/
617
618 static int
619 igb_detach(device_t dev)
620 {
621         struct adapter  *adapter = device_get_softc(dev);
622         struct ifnet    *ifp = adapter->ifp;
623
624         INIT_DEBUGOUT("igb_detach: begin");
625
626         /* Make sure VLANS are not using driver */
627         if (adapter->ifp->if_vlantrunk != NULL) {
628                 device_printf(dev,"Vlan in use, detach first\n");
629                 return (EBUSY);
630         }
631
632         if (adapter->led_dev != NULL)
633                 led_destroy(adapter->led_dev);
634
635 #ifdef DEVICE_POLLING
636         if (ifp->if_capenable & IFCAP_POLLING)
637                 ether_poll_deregister(ifp);
638 #endif
639
640         IGB_CORE_LOCK(adapter);
641         adapter->in_detach = 1;
642         igb_stop(adapter);
643         IGB_CORE_UNLOCK(adapter);
644
645         e1000_phy_hw_reset(&adapter->hw);
646
647         /* Give control back to firmware */
648         igb_release_manageability(adapter);
649         igb_release_hw_control(adapter);
650
651         if (adapter->wol) {
652                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
653                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
654                 igb_enable_wakeup(dev);
655         }
656
657         /* Unregister VLAN events */
658         if (adapter->vlan_attach != NULL)
659                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
660         if (adapter->vlan_detach != NULL)
661                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
662
663         ether_ifdetach(adapter->ifp);
664
665         callout_drain(&adapter->timer);
666
667         igb_free_pci_resources(adapter);
668         bus_generic_detach(dev);
669         if_free(ifp);
670
671         igb_free_transmit_structures(adapter);
672         igb_free_receive_structures(adapter);
673
674         IGB_CORE_LOCK_DESTROY(adapter);
675
676         return (0);
677 }
678
679 /*********************************************************************
680  *
681  *  Shutdown entry point
682  *
683  **********************************************************************/
684
685 static int
686 igb_shutdown(device_t dev)
687 {
688         return igb_suspend(dev);
689 }
690
691 /*
692  * Suspend/resume device methods.
693  */
694 static int
695 igb_suspend(device_t dev)
696 {
697         struct adapter *adapter = device_get_softc(dev);
698
699         IGB_CORE_LOCK(adapter);
700
701         igb_stop(adapter);
702
703         igb_release_manageability(adapter);
704         igb_release_hw_control(adapter);
705
706         if (adapter->wol) {
707                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
708                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
709                 igb_enable_wakeup(dev);
710         }
711
712         IGB_CORE_UNLOCK(adapter);
713
714         return bus_generic_suspend(dev);
715 }
716
717 static int
718 igb_resume(device_t dev)
719 {
720         struct adapter *adapter = device_get_softc(dev);
721         struct ifnet *ifp = adapter->ifp;
722
723         IGB_CORE_LOCK(adapter);
724         igb_init_locked(adapter);
725         igb_init_manageability(adapter);
726
727         if ((ifp->if_flags & IFF_UP) &&
728             (ifp->if_drv_flags & IFF_DRV_RUNNING))
729                 igb_start(ifp);
730
731         IGB_CORE_UNLOCK(adapter);
732
733         return bus_generic_resume(dev);
734 }
735
736
737 /*********************************************************************
738  *  Transmit entry point
739  *
740  *  igb_start is called by the stack to initiate a transmit.
741  *  The driver will remain in this routine as long as there are
742  *  packets to transmit and transmit resources are available.
743  *  In case resources are not available stack is notified and
744  *  the packet is requeued.
745  **********************************************************************/
746
747 static void
748 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
749 {
750         struct adapter  *adapter = ifp->if_softc;
751         struct mbuf     *m_head;
752
753         IGB_TX_LOCK_ASSERT(txr);
754
755         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
756             IFF_DRV_RUNNING)
757                 return;
758         if (!adapter->link_active)
759                 return;
760
761         /* Call cleanup if number of TX descriptors low */
762         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
763                 igb_txeof(txr);
764
765         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
766                 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
767                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
768                         break;
769                 }
770                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
771                 if (m_head == NULL)
772                         break;
773                 /*
774                  *  Encapsulation can modify our pointer, and or make it
775                  *  NULL on failure.  In that event, we can't requeue.
776                  */
777                 if (igb_xmit(txr, &m_head)) {
778                         if (m_head == NULL)
779                                 break;
780                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
781                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
782                         break;
783                 }
784
785                 /* Send a copy of the frame to the BPF listener */
786                 ETHER_BPF_MTAP(ifp, m_head);
787
788                 /* Set watchdog on */
789                 txr->watchdog_time = ticks;
790                 txr->watchdog_check = TRUE;
791         }
792 }
793  
794 /*
795  * Legacy TX driver routine, called from the
796  * stack, always uses tx[0], and spins for it.
797  * Should not be used with multiqueue tx
798  */
799 static void
800 igb_start(struct ifnet *ifp)
801 {
802         struct adapter  *adapter = ifp->if_softc;
803         struct tx_ring  *txr = adapter->tx_rings;
804
805         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
806                 IGB_TX_LOCK(txr);
807                 igb_start_locked(txr, ifp);
808                 IGB_TX_UNLOCK(txr);
809         }
810         return;
811 }
812
813 #if __FreeBSD_version >= 800000
814 /*
815 ** Multiqueue Transmit driver
816 **
817 */
818 static int
819 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
820 {
821         struct adapter  *adapter = ifp->if_softc;
822         struct tx_ring  *txr;
823         int             i = 0, err = 0;
824
825         /* Which queue to use */
826         if ((m->m_flags & M_FLOWID) != 0)
827                 i = m->m_pkthdr.flowid % adapter->num_queues;
828
829         txr = &adapter->tx_rings[i];
830
831         if (IGB_TX_TRYLOCK(txr)) {
832                 err = igb_mq_start_locked(ifp, txr, m);
833                 IGB_TX_UNLOCK(txr);
834         } else
835                 err = drbr_enqueue(ifp, txr->br, m);
836
837         return (err);
838 }
839
840 static int
841 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
842 {
843         struct adapter  *adapter = txr->adapter;
844         struct mbuf     *next;
845         int             err = 0, enq;
846
847         IGB_TX_LOCK_ASSERT(txr);
848
849         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
850             IFF_DRV_RUNNING || adapter->link_active == 0) {
851                 if (m != NULL)
852                         err = drbr_enqueue(ifp, txr->br, m);
853                 return (err);
854         }
855
856         /* Call cleanup if number of TX descriptors low */
857         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
858                 igb_txeof(txr);
859
860         enq = 0;
861         if (m == NULL) {
862                 next = drbr_dequeue(ifp, txr->br);
863         } else if (drbr_needs_enqueue(ifp, txr->br)) {
864                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
865                         return (err);
866                 next = drbr_dequeue(ifp, txr->br);
867         } else
868                 next = m;
869
870         /* Process the queue */
871         while (next != NULL) {
872                 if ((err = igb_xmit(txr, &next)) != 0) {
873                         if (next != NULL)
874                                 err = drbr_enqueue(ifp, txr->br, next);
875                         break;
876                 }
877                 enq++;
878                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
879                 ETHER_BPF_MTAP(ifp, next);
880                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
881                         break;
882                 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
883                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
884                         break;
885                 }
886                 next = drbr_dequeue(ifp, txr->br);
887         }
888         if (enq > 0) {
889                 /* Set the watchdog */
890                 txr->watchdog_check = TRUE;
891                 txr->watchdog_time = ticks;
892         }
893         return (err);
894 }
895
896 /*
897 ** Flush all ring buffers
898 */
899 static void
900 igb_qflush(struct ifnet *ifp)
901 {
902         struct adapter  *adapter = ifp->if_softc;
903         struct tx_ring  *txr = adapter->tx_rings;
904         struct mbuf     *m;
905
906         for (int i = 0; i < adapter->num_queues; i++, txr++) {
907                 IGB_TX_LOCK(txr);
908                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
909                         m_freem(m);
910                 IGB_TX_UNLOCK(txr);
911         }
912         if_qflush(ifp);
913 }
914 #endif /* __FreeBSD_version >= 800000 */
915
916 /*********************************************************************
917  *  Ioctl entry point
918  *
919  *  igb_ioctl is called when the user wants to configure the
920  *  interface.
921  *
922  *  return 0 on success, positive on failure
923  **********************************************************************/
924
925 static int
926 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
927 {
928         struct adapter  *adapter = ifp->if_softc;
929         struct ifreq *ifr = (struct ifreq *)data;
930 #ifdef INET
931         struct ifaddr *ifa = (struct ifaddr *)data;
932 #endif
933         int error = 0;
934
935         if (adapter->in_detach)
936                 return (error);
937
938         switch (command) {
939         case SIOCSIFADDR:
940 #ifdef INET
941                 if (ifa->ifa_addr->sa_family == AF_INET) {
942                         /*
943                          * XXX
944                          * Since resetting hardware takes a very long time
945                          * and results in link renegotiation we only
946                          * initialize the hardware only when it is absolutely
947                          * required.
948                          */
949                         ifp->if_flags |= IFF_UP;
950                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
951                                 IGB_CORE_LOCK(adapter);
952                                 igb_init_locked(adapter);
953                                 IGB_CORE_UNLOCK(adapter);
954                         }
955                         if (!(ifp->if_flags & IFF_NOARP))
956                                 arp_ifinit(ifp, ifa);
957                 } else
958 #endif
959                         error = ether_ioctl(ifp, command, data);
960                 break;
961         case SIOCSIFMTU:
962             {
963                 int max_frame_size;
964
965                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
966
967                 IGB_CORE_LOCK(adapter);
968                 max_frame_size = 9234;
969                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
970                     ETHER_CRC_LEN) {
971                         IGB_CORE_UNLOCK(adapter);
972                         error = EINVAL;
973                         break;
974                 }
975
976                 ifp->if_mtu = ifr->ifr_mtu;
977                 adapter->max_frame_size =
978                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
979                 igb_init_locked(adapter);
980                 IGB_CORE_UNLOCK(adapter);
981                 break;
982             }
983         case SIOCSIFFLAGS:
984                 IOCTL_DEBUGOUT("ioctl rcv'd:\
985                     SIOCSIFFLAGS (Set Interface Flags)");
986                 IGB_CORE_LOCK(adapter);
987                 if (ifp->if_flags & IFF_UP) {
988                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
989                                 if ((ifp->if_flags ^ adapter->if_flags) &
990                                     (IFF_PROMISC | IFF_ALLMULTI)) {
991                                         igb_disable_promisc(adapter);
992                                         igb_set_promisc(adapter);
993                                 }
994                         } else
995                                 igb_init_locked(adapter);
996                 } else
997                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
998                                 igb_stop(adapter);
999                 adapter->if_flags = ifp->if_flags;
1000                 IGB_CORE_UNLOCK(adapter);
1001                 break;
1002         case SIOCADDMULTI:
1003         case SIOCDELMULTI:
1004                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1005                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1006                         IGB_CORE_LOCK(adapter);
1007                         igb_disable_intr(adapter);
1008                         igb_set_multi(adapter);
1009 #ifdef DEVICE_POLLING
1010                         if (!(ifp->if_capenable & IFCAP_POLLING))
1011 #endif
1012                                 igb_enable_intr(adapter);
1013                         IGB_CORE_UNLOCK(adapter);
1014                 }
1015                 break;
1016         case SIOCSIFMEDIA:
1017                 /* Check SOL/IDER usage */
1018                 IGB_CORE_LOCK(adapter);
1019                 if (e1000_check_reset_block(&adapter->hw)) {
1020                         IGB_CORE_UNLOCK(adapter);
1021                         device_printf(adapter->dev, "Media change is"
1022                             " blocked due to SOL/IDER session.\n");
1023                         break;
1024                 }
1025                 IGB_CORE_UNLOCK(adapter);
1026         case SIOCGIFMEDIA:
1027                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1028                     SIOCxIFMEDIA (Get/Set Interface Media)");
1029                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1030                 break;
1031         case SIOCSIFCAP:
1032             {
1033                 int mask, reinit;
1034
1035                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1036                 reinit = 0;
1037                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1038 #ifdef DEVICE_POLLING
1039                 if (mask & IFCAP_POLLING) {
1040                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1041                                 error = ether_poll_register(igb_poll, ifp);
1042                                 if (error)
1043                                         return (error);
1044                                 IGB_CORE_LOCK(adapter);
1045                                 igb_disable_intr(adapter);
1046                                 ifp->if_capenable |= IFCAP_POLLING;
1047                                 IGB_CORE_UNLOCK(adapter);
1048                         } else {
1049                                 error = ether_poll_deregister(ifp);
1050                                 /* Enable interrupt even in error case */
1051                                 IGB_CORE_LOCK(adapter);
1052                                 igb_enable_intr(adapter);
1053                                 ifp->if_capenable &= ~IFCAP_POLLING;
1054                                 IGB_CORE_UNLOCK(adapter);
1055                         }
1056                 }
1057 #endif
1058                 if (mask & IFCAP_HWCSUM) {
1059                         ifp->if_capenable ^= IFCAP_HWCSUM;
1060                         reinit = 1;
1061                 }
1062                 if (mask & IFCAP_TSO4) {
1063                         ifp->if_capenable ^= IFCAP_TSO4;
1064                         reinit = 1;
1065                 }
1066                 if (mask & IFCAP_VLAN_HWTAGGING) {
1067                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1068                         reinit = 1;
1069                 }
1070                 if (mask & IFCAP_VLAN_HWFILTER) {
1071                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1072                         reinit = 1;
1073                 }
1074                 if (mask & IFCAP_LRO) {
1075                         ifp->if_capenable ^= IFCAP_LRO;
1076                         reinit = 1;
1077                 }
1078                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1079                         igb_init(adapter);
1080                 VLAN_CAPABILITIES(ifp);
1081                 break;
1082             }
1083
1084         default:
1085                 error = ether_ioctl(ifp, command, data);
1086                 break;
1087         }
1088
1089         return (error);
1090 }
1091
1092
1093 /*********************************************************************
1094  *  Init entry point
1095  *
1096  *  This routine is used in two ways. It is used by the stack as
1097  *  init entry point in network interface structure. It is also used
1098  *  by the driver as a hw/sw initialization routine to get to a
1099  *  consistent state.
1100  *
1101  *  return 0 on success, positive on failure
1102  **********************************************************************/
1103
1104 static void
1105 igb_init_locked(struct adapter *adapter)
1106 {
1107         struct ifnet    *ifp = adapter->ifp;
1108         device_t        dev = adapter->dev;
1109
1110         INIT_DEBUGOUT("igb_init: begin");
1111
1112         IGB_CORE_LOCK_ASSERT(adapter);
1113
1114         igb_disable_intr(adapter);
1115         callout_stop(&adapter->timer);
1116
1117         /* Get the latest mac address, User can use a LAA */
1118         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1119               ETHER_ADDR_LEN);
1120
1121         /* Put the address into the Receive Address Array */
1122         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1123
1124         igb_reset(adapter);
1125         igb_update_link_status(adapter);
1126
1127         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1128
1129         /* Use real VLAN Filter support? */
1130         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1131                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1132                         /* Use real VLAN Filter support */
1133                         igb_setup_vlan_hw_support(adapter);
1134                 else {
1135                         u32 ctrl;
1136                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1137                         ctrl |= E1000_CTRL_VME;
1138                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1139                 }
1140         }
1141                                 
1142         /* Set hardware offload abilities */
1143         ifp->if_hwassist = 0;
1144         if (ifp->if_capenable & IFCAP_TXCSUM) {
1145                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1146 #if __FreeBSD_version >= 800000
1147                 if (adapter->hw.mac.type == e1000_82576)
1148                         ifp->if_hwassist |= CSUM_SCTP;
1149 #endif
1150         }
1151
1152         if (ifp->if_capenable & IFCAP_TSO4)
1153                 ifp->if_hwassist |= CSUM_TSO;
1154
1155         /* Configure for OS presence */
1156         igb_init_manageability(adapter);
1157
1158         /* Prepare transmit descriptors and buffers */
1159         igb_setup_transmit_structures(adapter);
1160         igb_initialize_transmit_units(adapter);
1161
1162         /* Setup Multicast table */
1163         igb_set_multi(adapter);
1164
1165         /*
1166         ** Figure out the desired mbuf pool
1167         ** for doing jumbo/packetsplit
1168         */
1169         if (ifp->if_mtu > ETHERMTU)
1170                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1171         else
1172                 adapter->rx_mbuf_sz = MCLBYTES;
1173
1174         /* Prepare receive descriptors and buffers */
1175         if (igb_setup_receive_structures(adapter)) {
1176                 device_printf(dev, "Could not setup receive structures\n");
1177                 return;
1178         }
1179         igb_initialize_receive_units(adapter);
1180
1181         /* Don't lose promiscuous settings */
1182         igb_set_promisc(adapter);
1183
1184         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1185         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1186
1187         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1188         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1189
1190         if (adapter->msix > 1) /* Set up queue routing */
1191                 igb_configure_queues(adapter);
1192
1193         /* Set up VLAN tag offload and filter */
1194         igb_setup_vlan_hw_support(adapter);
1195
1196         /* this clears any pending interrupts */
1197         E1000_READ_REG(&adapter->hw, E1000_ICR);
1198 #ifdef DEVICE_POLLING
1199         /*
1200          * Only enable interrupts if we are not polling, make sure
1201          * they are off otherwise.
1202          */
1203         if (ifp->if_capenable & IFCAP_POLLING)
1204                 igb_disable_intr(adapter);
1205         else
1206 #endif /* DEVICE_POLLING */
1207         {
1208         igb_enable_intr(adapter);
1209         E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1210         }
1211
1212         /* Don't reset the phy next time init gets called */
1213         adapter->hw.phy.reset_disable = TRUE;
1214 }
1215
1216 static void
1217 igb_init(void *arg)
1218 {
1219         struct adapter *adapter = arg;
1220
1221         IGB_CORE_LOCK(adapter);
1222         igb_init_locked(adapter);
1223         IGB_CORE_UNLOCK(adapter);
1224 }
1225
1226
1227 static void
1228 igb_handle_rxtx(void *context, int pending)
1229 {
1230         struct igb_queue        *que = context;
1231         struct adapter          *adapter = que->adapter;
1232         struct tx_ring          *txr = adapter->tx_rings;
1233         struct ifnet            *ifp;
1234
1235         ifp = adapter->ifp;
1236
1237         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1238                 if (igb_rxeof(que, adapter->rx_process_limit))
1239                         taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1240                 IGB_TX_LOCK(txr);
1241                 igb_txeof(txr);
1242
1243 #if __FreeBSD_version >= 800000
1244                 if (!drbr_empty(ifp, txr->br))
1245                         igb_mq_start_locked(ifp, txr, NULL);
1246 #else
1247                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1248                         igb_start_locked(txr, ifp);
1249 #endif
1250                 IGB_TX_UNLOCK(txr);
1251         }
1252
1253         igb_enable_intr(adapter);
1254 }
1255
1256 static void
1257 igb_handle_que(void *context, int pending)
1258 {
1259         struct igb_queue *que = context;
1260         struct adapter *adapter = que->adapter;
1261         struct tx_ring *txr = que->txr;
1262         struct ifnet    *ifp = adapter->ifp;
1263         bool            more;
1264
1265         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1266                 more = igb_rxeof(que, -1);
1267
1268                 IGB_TX_LOCK(txr);
1269                 igb_txeof(txr);
1270 #if __FreeBSD_version >= 800000
1271                 igb_mq_start_locked(ifp, txr, NULL);
1272 #else
1273                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1274                         igb_start_locked(txr, ifp);
1275 #endif
1276                 IGB_TX_UNLOCK(txr);
1277                 if (more) {
1278                         taskqueue_enqueue(que->tq, &que->que_task);
1279                         return;
1280                 }
1281         }
1282
1283         /* Reenable this interrupt */
1284 #ifdef DEVICE_POLLING
1285         if (!(ifp->if_capenable & IFCAP_POLLING))
1286 #endif
1287         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1288 }
1289
1290 /* Deal with link in a sleepable context */
1291 static void
1292 igb_handle_link(void *context, int pending)
1293 {
1294         struct adapter *adapter = context;
1295
1296         adapter->hw.mac.get_link_status = 1;
1297         igb_update_link_status(adapter);
1298 }
1299
1300 /*********************************************************************
1301  *
1302  *  MSI/Legacy Deferred
1303  *  Interrupt Service routine  
1304  *
1305  *********************************************************************/
1306 static int
1307 igb_irq_fast(void *arg)
1308 {
1309         struct adapter  *adapter = arg;
1310         uint32_t        reg_icr;
1311
1312
1313         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1314
1315         /* Hot eject?  */
1316         if (reg_icr == 0xffffffff)
1317                 return FILTER_STRAY;
1318
1319         /* Definitely not our interrupt.  */
1320         if (reg_icr == 0x0)
1321                 return FILTER_STRAY;
1322
1323         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1324                 return FILTER_STRAY;
1325
1326         /*
1327          * Mask interrupts until the taskqueue is finished running.  This is
1328          * cheap, just assume that it is needed.  This also works around the
1329          * MSI message reordering errata on certain systems.
1330          */
1331         igb_disable_intr(adapter);
1332         taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1333
1334         /* Link status change */
1335         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1336                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1337
1338         if (reg_icr & E1000_ICR_RXO)
1339                 adapter->rx_overruns++;
1340         return FILTER_HANDLED;
1341 }
1342
1343 #ifdef DEVICE_POLLING
1344 /*********************************************************************
1345  *
1346  *  Legacy polling routine : if using this code you MUST be sure that
1347  *  multiqueue is not defined, ie, set igb_num_queues to 1.
1348  *
1349  *********************************************************************/
1350 #if __FreeBSD_version >= 800000
1351 #define POLL_RETURN_COUNT(a) (a)
1352 static int
1353 #else
1354 #define POLL_RETURN_COUNT(a)
1355 static void
1356 #endif
1357 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1358 {
1359         struct adapter          *adapter = ifp->if_softc;
1360         struct igb_queue        *que = adapter->queues;
1361         struct tx_ring          *txr = adapter->tx_rings;
1362         u32                     reg_icr, rx_done = 0;
1363         u32                     loop = IGB_MAX_LOOP;
1364         bool                    more;
1365
1366         IGB_CORE_LOCK(adapter);
1367         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1368                 IGB_CORE_UNLOCK(adapter);
1369                 return POLL_RETURN_COUNT(rx_done);
1370         }
1371
1372         if (cmd == POLL_AND_CHECK_STATUS) {
1373                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1374                 /* Link status change */
1375                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1376                         taskqueue_enqueue(adapter->tq, &adapter->link_task);
1377
1378                 if (reg_icr & E1000_ICR_RXO)
1379                         adapter->rx_overruns++;
1380         }
1381         IGB_CORE_UNLOCK(adapter);
1382
1383         /* TODO: rx_count */
1384         rx_done = igb_rxeof(que, count) ? 1 : 0;
1385
1386         IGB_TX_LOCK(txr);
1387         do {
1388                 more = igb_txeof(txr);
1389         } while (loop-- && more);
1390 #if __FreeBSD_version >= 800000
1391         if (!drbr_empty(ifp, txr->br))
1392                 igb_mq_start_locked(ifp, txr, NULL);
1393 #else
1394         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1395                 igb_start_locked(txr, ifp);
1396 #endif
1397         IGB_TX_UNLOCK(txr);
1398         return POLL_RETURN_COUNT(rx_done);
1399 }
1400 #endif /* DEVICE_POLLING */
1401
1402 /*********************************************************************
1403  *
1404  *  MSIX TX Interrupt Service routine
1405  *
1406  **********************************************************************/
1407 static void
1408 igb_msix_que(void *arg)
1409 {
1410         struct igb_queue *que = arg;
1411         struct adapter *adapter = que->adapter;
1412         struct tx_ring *txr = que->txr;
1413         struct rx_ring *rxr = que->rxr;
1414         u32             newitr = 0;
1415         bool            more_tx, more_rx;
1416
1417         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1418         ++que->irqs;
1419
1420         IGB_TX_LOCK(txr);
1421         more_tx = igb_txeof(txr);
1422         IGB_TX_UNLOCK(txr);
1423
1424         more_rx = igb_rxeof(que, adapter->rx_process_limit);
1425
1426         if (igb_enable_aim == FALSE)
1427                 goto no_calc;
1428         /*
1429         ** Do Adaptive Interrupt Moderation:
1430         **  - Write out last calculated setting
1431         **  - Calculate based on average size over
1432         **    the last interval.
1433         */
1434         if (que->eitr_setting)
1435                 E1000_WRITE_REG(&adapter->hw,
1436                     E1000_EITR(que->msix), que->eitr_setting);
1437  
1438         que->eitr_setting = 0;
1439
1440         /* Idle, do nothing */
1441         if ((txr->bytes == 0) && (rxr->bytes == 0))
1442                 goto no_calc;
1443                                 
1444         /* Used half Default if sub-gig */
1445         if (adapter->link_speed != 1000)
1446                 newitr = IGB_DEFAULT_ITR / 2;
1447         else {
1448                 if ((txr->bytes) && (txr->packets))
1449                         newitr = txr->bytes/txr->packets;
1450                 if ((rxr->bytes) && (rxr->packets))
1451                         newitr = max(newitr,
1452                             (rxr->bytes / rxr->packets));
1453                 newitr += 24; /* account for hardware frame, crc */
1454                 /* set an upper boundary */
1455                 newitr = min(newitr, 3000);
1456                 /* Be nice to the mid range */
1457                 if ((newitr > 300) && (newitr < 1200))
1458                         newitr = (newitr / 3);
1459                 else
1460                         newitr = (newitr / 2);
1461         }
1462         newitr &= 0x7FFC;  /* Mask invalid bits */
1463         if (adapter->hw.mac.type == e1000_82575)
1464                 newitr |= newitr << 16;
1465         else
1466                 newitr |= E1000_EITR_CNT_IGNR;
1467                  
1468         /* save for next interrupt */
1469         que->eitr_setting = newitr;
1470
1471         /* Reset state */
1472         txr->bytes = 0;
1473         txr->packets = 0;
1474         rxr->bytes = 0;
1475         rxr->packets = 0;
1476
1477 no_calc:
1478         /* Schedule a clean task if needed*/
1479         if (more_tx || more_rx) 
1480                 taskqueue_enqueue(que->tq, &que->que_task);
1481         else
1482                 /* Reenable this interrupt */
1483                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1484         return;
1485 }
1486
1487
1488 /*********************************************************************
1489  *
1490  *  MSIX Link Interrupt Service routine
1491  *
1492  **********************************************************************/
1493
1494 static void
1495 igb_msix_link(void *arg)
1496 {
1497         struct adapter  *adapter = arg;
1498         u32             icr;
1499
1500         ++adapter->link_irq;
1501         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1502         if (!(icr & E1000_ICR_LSC))
1503                 goto spurious;
1504         taskqueue_enqueue(adapter->tq, &adapter->link_task);
1505
1506 spurious:
1507         /* Rearm */
1508         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1509         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1510         return;
1511 }
1512
1513
1514 /*********************************************************************
1515  *
1516  *  Media Ioctl callback
1517  *
1518  *  This routine is called whenever the user queries the status of
1519  *  the interface using ifconfig.
1520  *
1521  **********************************************************************/
1522 static void
1523 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1524 {
1525         struct adapter *adapter = ifp->if_softc;
1526         u_char fiber_type = IFM_1000_SX;
1527
1528         INIT_DEBUGOUT("igb_media_status: begin");
1529
1530         IGB_CORE_LOCK(adapter);
1531         igb_update_link_status(adapter);
1532
1533         ifmr->ifm_status = IFM_AVALID;
1534         ifmr->ifm_active = IFM_ETHER;
1535
1536         if (!adapter->link_active) {
1537                 IGB_CORE_UNLOCK(adapter);
1538                 return;
1539         }
1540
1541         ifmr->ifm_status |= IFM_ACTIVE;
1542
1543         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1544             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1545                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1546         else {
1547                 switch (adapter->link_speed) {
1548                 case 10:
1549                         ifmr->ifm_active |= IFM_10_T;
1550                         break;
1551                 case 100:
1552                         ifmr->ifm_active |= IFM_100_TX;
1553                         break;
1554                 case 1000:
1555                         ifmr->ifm_active |= IFM_1000_T;
1556                         break;
1557                 }
1558                 if (adapter->link_duplex == FULL_DUPLEX)
1559                         ifmr->ifm_active |= IFM_FDX;
1560                 else
1561                         ifmr->ifm_active |= IFM_HDX;
1562         }
1563         IGB_CORE_UNLOCK(adapter);
1564 }
1565
1566 /*********************************************************************
1567  *
1568  *  Media Ioctl callback
1569  *
1570  *  This routine is called when the user changes speed/duplex using
1571  *  media/mediopt option with ifconfig.
1572  *
1573  **********************************************************************/
1574 static int
1575 igb_media_change(struct ifnet *ifp)
1576 {
1577         struct adapter *adapter = ifp->if_softc;
1578         struct ifmedia  *ifm = &adapter->media;
1579
1580         INIT_DEBUGOUT("igb_media_change: begin");
1581
1582         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1583                 return (EINVAL);
1584
1585         IGB_CORE_LOCK(adapter);
1586         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1587         case IFM_AUTO:
1588                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1589                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1590                 break;
1591         case IFM_1000_LX:
1592         case IFM_1000_SX:
1593         case IFM_1000_T:
1594                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1595                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1596                 break;
1597         case IFM_100_TX:
1598                 adapter->hw.mac.autoneg = FALSE;
1599                 adapter->hw.phy.autoneg_advertised = 0;
1600                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1601                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1602                 else
1603                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1604                 break;
1605         case IFM_10_T:
1606                 adapter->hw.mac.autoneg = FALSE;
1607                 adapter->hw.phy.autoneg_advertised = 0;
1608                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1609                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1610                 else
1611                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1612                 break;
1613         default:
1614                 device_printf(adapter->dev, "Unsupported media type\n");
1615         }
1616
1617         /* As the speed/duplex settings my have changed we need to
1618          * reset the PHY.
1619          */
1620         adapter->hw.phy.reset_disable = FALSE;
1621
1622         igb_init_locked(adapter);
1623         IGB_CORE_UNLOCK(adapter);
1624
1625         return (0);
1626 }
1627
1628
1629 /*********************************************************************
1630  *
1631  *  This routine maps the mbufs to Advanced TX descriptors.
1632  *  used by the 82575 adapter.
1633  *  
1634  **********************************************************************/
1635
1636 static int
1637 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1638 {
1639         struct adapter          *adapter = txr->adapter;
1640         bus_dma_segment_t       segs[IGB_MAX_SCATTER];
1641         bus_dmamap_t            map;
1642         struct igb_tx_buffer    *tx_buffer, *tx_buffer_mapped;
1643         union e1000_adv_tx_desc *txd = NULL;
1644         struct mbuf             *m_head;
1645         u32                     olinfo_status = 0, cmd_type_len = 0;
1646         int                     nsegs, i, j, error, first, last = 0;
1647         u32                     hdrlen = 0;
1648
1649         m_head = *m_headp;
1650
1651
1652         /* Set basic descriptor constants */
1653         cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1654         cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1655         if (m_head->m_flags & M_VLANTAG)
1656                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1657
1658         /*
1659          * Force a cleanup if number of TX descriptors
1660          * available hits the threshold
1661          */
1662         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1663                 igb_txeof(txr);
1664                 /* Now do we at least have a minimal? */
1665                 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1666                         txr->no_desc_avail++;
1667                         return (ENOBUFS);
1668                 }
1669         }
1670
1671         /*
1672          * Map the packet for DMA.
1673          *
1674          * Capture the first descriptor index,
1675          * this descriptor will have the index
1676          * of the EOP which is the only one that
1677          * now gets a DONE bit writeback.
1678          */
1679         first = txr->next_avail_desc;
1680         tx_buffer = &txr->tx_buffers[first];
1681         tx_buffer_mapped = tx_buffer;
1682         map = tx_buffer->map;
1683
1684         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1685             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1686
1687         if (error == EFBIG) {
1688                 struct mbuf *m;
1689
1690                 m = m_defrag(*m_headp, M_DONTWAIT);
1691                 if (m == NULL) {
1692                         adapter->mbuf_defrag_failed++;
1693                         m_freem(*m_headp);
1694                         *m_headp = NULL;
1695                         return (ENOBUFS);
1696                 }
1697                 *m_headp = m;
1698
1699                 /* Try it again */
1700                 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1701                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1702
1703                 if (error == ENOMEM) {
1704                         adapter->no_tx_dma_setup++;
1705                         return (error);
1706                 } else if (error != 0) {
1707                         adapter->no_tx_dma_setup++;
1708                         m_freem(*m_headp);
1709                         *m_headp = NULL;
1710                         return (error);
1711                 }
1712         } else if (error == ENOMEM) {
1713                 adapter->no_tx_dma_setup++;
1714                 return (error);
1715         } else if (error != 0) {
1716                 adapter->no_tx_dma_setup++;
1717                 m_freem(*m_headp);
1718                 *m_headp = NULL;
1719                 return (error);
1720         }
1721
1722         /* Check again to be sure we have enough descriptors */
1723         if (nsegs > (txr->tx_avail - 2)) {
1724                 txr->no_desc_avail++;
1725                 bus_dmamap_unload(txr->txtag, map);
1726                 return (ENOBUFS);
1727         }
1728         m_head = *m_headp;
1729
1730         /*
1731          * Set up the context descriptor:
1732          * used when any hardware offload is done.
1733          * This includes CSUM, VLAN, and TSO. It
1734          * will use the first descriptor.
1735          */
1736         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1737                 if (igb_tso_setup(txr, m_head, &hdrlen)) {
1738                         cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1739                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1740                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1741                 } else
1742                         return (ENXIO); 
1743         } else if (igb_tx_ctx_setup(txr, m_head))
1744                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1745
1746         /* Calculate payload length */
1747         olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1748             << E1000_ADVTXD_PAYLEN_SHIFT);
1749
1750         /* 82575 needs the queue index added */
1751         if (adapter->hw.mac.type == e1000_82575)
1752                 olinfo_status |= txr->me << 4;
1753
1754         /* Set up our transmit descriptors */
1755         i = txr->next_avail_desc;
1756         for (j = 0; j < nsegs; j++) {
1757                 bus_size_t seg_len;
1758                 bus_addr_t seg_addr;
1759
1760                 tx_buffer = &txr->tx_buffers[i];
1761                 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1762                 seg_addr = segs[j].ds_addr;
1763                 seg_len  = segs[j].ds_len;
1764
1765                 txd->read.buffer_addr = htole64(seg_addr);
1766                 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1767                 txd->read.olinfo_status = htole32(olinfo_status);
1768                 last = i;
1769                 if (++i == adapter->num_tx_desc)
1770                         i = 0;
1771                 tx_buffer->m_head = NULL;
1772                 tx_buffer->next_eop = -1;
1773         }
1774
1775         txr->next_avail_desc = i;
1776         txr->tx_avail -= nsegs;
1777
1778         tx_buffer->m_head = m_head;
1779         tx_buffer_mapped->map = tx_buffer->map;
1780         tx_buffer->map = map;
1781         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1782
1783         /*
1784          * Last Descriptor of Packet
1785          * needs End Of Packet (EOP)
1786          * and Report Status (RS)
1787          */
1788         txd->read.cmd_type_len |=
1789             htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1790         /*
1791          * Keep track in the first buffer which
1792          * descriptor will be written back
1793          */
1794         tx_buffer = &txr->tx_buffers[first];
1795         tx_buffer->next_eop = last;
1796         txr->watchdog_time = ticks;
1797
1798         /*
1799          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1800          * that this frame is available to transmit.
1801          */
1802         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1803             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1804         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1805         ++txr->tx_packets;
1806
1807         return (0);
1808
1809 }
1810
1811 static void
1812 igb_set_promisc(struct adapter *adapter)
1813 {
1814         struct ifnet    *ifp = adapter->ifp;
1815         uint32_t        reg_rctl;
1816
1817         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1818
1819         if (ifp->if_flags & IFF_PROMISC) {
1820                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1821                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1822         } else if (ifp->if_flags & IFF_ALLMULTI) {
1823                 reg_rctl |= E1000_RCTL_MPE;
1824                 reg_rctl &= ~E1000_RCTL_UPE;
1825                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1826         }
1827 }
1828
1829 static void
1830 igb_disable_promisc(struct adapter *adapter)
1831 {
1832         uint32_t        reg_rctl;
1833
1834         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1835
1836         reg_rctl &=  (~E1000_RCTL_UPE);
1837         reg_rctl &=  (~E1000_RCTL_MPE);
1838         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1839 }
1840
1841
1842 /*********************************************************************
1843  *  Multicast Update
1844  *
1845  *  This routine is called whenever multicast address list is updated.
1846  *
1847  **********************************************************************/
1848
1849 static void
1850 igb_set_multi(struct adapter *adapter)
1851 {
1852         struct ifnet    *ifp = adapter->ifp;
1853         struct ifmultiaddr *ifma;
1854         u32 reg_rctl = 0;
1855         u8  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1856
1857         int mcnt = 0;
1858
1859         IOCTL_DEBUGOUT("igb_set_multi: begin");
1860
1861 #if __FreeBSD_version < 800000
1862         IF_ADDR_LOCK(ifp);
1863 #else
1864         if_maddr_rlock(ifp);
1865 #endif
1866         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1867                 if (ifma->ifma_addr->sa_family != AF_LINK)
1868                         continue;
1869
1870                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1871                         break;
1872
1873                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1874                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1875                 mcnt++;
1876         }
1877 #if __FreeBSD_version < 800000
1878         IF_ADDR_UNLOCK(ifp);
1879 #else
1880         if_maddr_runlock(ifp);
1881 #endif
1882
1883         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1884                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1885                 reg_rctl |= E1000_RCTL_MPE;
1886                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1887         } else
1888                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1889 }
1890
1891
1892 /*********************************************************************
1893  *  Timer routine:
1894  *      This routine checks for link status,
1895  *      updates statistics, and does the watchdog.
1896  *
1897  **********************************************************************/
1898
1899 static void
1900 igb_local_timer(void *arg)
1901 {
1902         struct adapter          *adapter = arg;
1903         struct ifnet            *ifp = adapter->ifp;
1904         device_t                dev = adapter->dev;
1905         struct tx_ring          *txr = adapter->tx_rings;
1906
1907
1908         IGB_CORE_LOCK_ASSERT(adapter);
1909
1910         igb_update_link_status(adapter);
1911         igb_update_stats_counters(adapter);
1912
1913         if (igb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1914                 igb_print_hw_stats(adapter);
1915
1916         /*
1917         ** Watchdog: check for time since any descriptor was cleaned
1918         */
1919         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1920                 if (txr->watchdog_check == FALSE)
1921                         continue;
1922                 if ((ticks - txr->watchdog_time) > IGB_WATCHDOG)
1923                         goto timeout;
1924         }
1925
1926         /* Trigger an RX interrupt on all queues */
1927 #ifdef DEVICE_POLLING
1928         if (!(ifp->if_capenable & IFCAP_POLLING))
1929 #endif
1930         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->rx_mask);
1931         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1932         return;
1933
1934 timeout:
1935         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1936         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1937             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
1938             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
1939         device_printf(dev,"TX(%d) desc avail = %d,"
1940             "Next TX to Clean = %d\n",
1941             txr->me, txr->tx_avail, txr->next_to_clean);
1942         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1943         adapter->watchdog_events++;
1944         igb_init_locked(adapter);
1945 }
1946
1947 static void
1948 igb_update_link_status(struct adapter *adapter)
1949 {
1950         struct e1000_hw *hw = &adapter->hw;
1951         struct ifnet *ifp = adapter->ifp;
1952         device_t dev = adapter->dev;
1953         struct tx_ring *txr = adapter->tx_rings;
1954         u32 link_check = 0;
1955
1956         /* Get the cached link value or read for real */
1957         switch (hw->phy.media_type) {
1958         case e1000_media_type_copper:
1959                 if (hw->mac.get_link_status) {
1960                         /* Do the work to read phy */
1961                         e1000_check_for_link(hw);
1962                         link_check = !hw->mac.get_link_status;
1963                 } else
1964                         link_check = TRUE;
1965                 break;
1966         case e1000_media_type_fiber:
1967                 e1000_check_for_link(hw);
1968                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1969                                  E1000_STATUS_LU);
1970                 break;
1971         case e1000_media_type_internal_serdes:
1972                 e1000_check_for_link(hw);
1973                 link_check = adapter->hw.mac.serdes_has_link;
1974                 break;
1975         default:
1976         case e1000_media_type_unknown:
1977                 break;
1978         }
1979
1980         /* Now we check if a transition has happened */
1981         if (link_check && (adapter->link_active == 0)) {
1982                 e1000_get_speed_and_duplex(&adapter->hw, 
1983                     &adapter->link_speed, &adapter->link_duplex);
1984                 if (bootverbose)
1985                         device_printf(dev, "Link is up %d Mbps %s\n",
1986                             adapter->link_speed,
1987                             ((adapter->link_duplex == FULL_DUPLEX) ?
1988                             "Full Duplex" : "Half Duplex"));
1989                 adapter->link_active = 1;
1990                 ifp->if_baudrate = adapter->link_speed * 1000000;
1991                 /* This can sleep */
1992                 if_link_state_change(ifp, LINK_STATE_UP);
1993         } else if (!link_check && (adapter->link_active == 1)) {
1994                 ifp->if_baudrate = adapter->link_speed = 0;
1995                 adapter->link_duplex = 0;
1996                 if (bootverbose)
1997                         device_printf(dev, "Link is Down\n");
1998                 adapter->link_active = 0;
1999                 /* This can sleep */
2000                 if_link_state_change(ifp, LINK_STATE_DOWN);
2001                 /* Turn off watchdogs */
2002                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2003                         txr->watchdog_check = FALSE;
2004         }
2005 }
2006
2007 /*********************************************************************
2008  *
2009  *  This routine disables all traffic on the adapter by issuing a
2010  *  global reset on the MAC and deallocates TX/RX buffers.
2011  *
2012  **********************************************************************/
2013
2014 static void
2015 igb_stop(void *arg)
2016 {
2017         struct adapter  *adapter = arg;
2018         struct ifnet    *ifp = adapter->ifp;
2019         struct tx_ring *txr = adapter->tx_rings;
2020
2021         IGB_CORE_LOCK_ASSERT(adapter);
2022
2023         INIT_DEBUGOUT("igb_stop: begin");
2024
2025         igb_disable_intr(adapter);
2026
2027         callout_stop(&adapter->timer);
2028
2029         /* Tell the stack that the interface is no longer active */
2030         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2031
2032         /* Unarm watchdog timer. */
2033         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2034                 IGB_TX_LOCK(txr);
2035                 txr->watchdog_check = FALSE;
2036                 IGB_TX_UNLOCK(txr);
2037         }
2038
2039         e1000_reset_hw(&adapter->hw);
2040         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2041
2042         e1000_led_off(&adapter->hw);
2043         e1000_cleanup_led(&adapter->hw);
2044 }
2045
2046
2047 /*********************************************************************
2048  *
2049  *  Determine hardware revision.
2050  *
2051  **********************************************************************/
2052 static void
2053 igb_identify_hardware(struct adapter *adapter)
2054 {
2055         device_t dev = adapter->dev;
2056
2057         /* Make sure our PCI config space has the necessary stuff set */
2058         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2059         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2060             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2061                 device_printf(dev, "Memory Access and/or Bus Master bits "
2062                     "were not set!\n");
2063                 adapter->hw.bus.pci_cmd_word |=
2064                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2065                 pci_write_config(dev, PCIR_COMMAND,
2066                     adapter->hw.bus.pci_cmd_word, 2);
2067         }
2068
2069         /* Save off the information about this board */
2070         adapter->hw.vendor_id = pci_get_vendor(dev);
2071         adapter->hw.device_id = pci_get_device(dev);
2072         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2073         adapter->hw.subsystem_vendor_id =
2074             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2075         adapter->hw.subsystem_device_id =
2076             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2077
2078         /* Do Shared Code Init and Setup */
2079         if (e1000_set_mac_type(&adapter->hw)) {
2080                 device_printf(dev, "Setup init failure\n");
2081                 return;
2082         }
2083 }
2084
2085 static int
2086 igb_allocate_pci_resources(struct adapter *adapter)
2087 {
2088         device_t        dev = adapter->dev;
2089         int             rid;
2090
2091         rid = PCIR_BAR(0);
2092         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2093             &rid, RF_ACTIVE);
2094         if (adapter->pci_mem == NULL) {
2095                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2096                 return (ENXIO);
2097         }
2098         adapter->osdep.mem_bus_space_tag =
2099             rman_get_bustag(adapter->pci_mem);
2100         adapter->osdep.mem_bus_space_handle =
2101             rman_get_bushandle(adapter->pci_mem);
2102         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2103
2104         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2105
2106         /* This will setup either MSI/X or MSI */
2107         adapter->msix = igb_setup_msix(adapter);
2108         adapter->hw.back = &adapter->osdep;
2109
2110         return (0);
2111 }
2112
2113 /*********************************************************************
2114  *
2115  *  Setup the Legacy or MSI Interrupt handler
2116  *
2117  **********************************************************************/
2118 static int
2119 igb_allocate_legacy(struct adapter *adapter)
2120 {
2121         device_t                dev = adapter->dev;
2122         struct igb_queue        *que = adapter->queues;
2123         int                     error, rid = 0;
2124
2125         /* Turn off all interrupts */
2126         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2127
2128         /* MSI RID is 1 */
2129         if (adapter->msix == 1)
2130                 rid = 1;
2131
2132         /* We allocate a single interrupt resource */
2133         adapter->res = bus_alloc_resource_any(dev,
2134             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2135         if (adapter->res == NULL) {
2136                 device_printf(dev, "Unable to allocate bus resource: "
2137                     "interrupt\n");
2138                 return (ENXIO);
2139         }
2140
2141         /*
2142          * Try allocating a fast interrupt and the associated deferred
2143          * processing contexts.
2144          */
2145         TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, que);
2146         /* Make tasklet for deferred link handling */
2147         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2148         adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2149             taskqueue_thread_enqueue, &adapter->tq);
2150         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2151             device_get_nameunit(adapter->dev));
2152         if ((error = bus_setup_intr(dev, adapter->res,
2153             INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2154             adapter, &adapter->tag)) != 0) {
2155                 device_printf(dev, "Failed to register fast interrupt "
2156                             "handler: %d\n", error);
2157                 taskqueue_free(adapter->tq);
2158                 adapter->tq = NULL;
2159                 return (error);
2160         }
2161
2162         return (0);
2163 }
2164
2165
2166 /*********************************************************************
2167  *
2168  *  Setup the MSIX Queue Interrupt handlers: 
2169  *
2170  **********************************************************************/
2171 static int
2172 igb_allocate_msix(struct adapter *adapter)
2173 {
2174         device_t                dev = adapter->dev;
2175         struct igb_queue        *que = adapter->queues;
2176         int                     error, rid, vector = 0;
2177
2178
2179         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2180                 rid = vector +1;
2181                 que->res = bus_alloc_resource_any(dev,
2182                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2183                 if (que->res == NULL) {
2184                         device_printf(dev,
2185                             "Unable to allocate bus resource: "
2186                             "MSIX Queue Interrupt\n");
2187                         return (ENXIO);
2188                 }
2189                 error = bus_setup_intr(dev, que->res,
2190                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2191                     igb_msix_que, que, &que->tag);
2192                 if (error) {
2193                         que->res = NULL;
2194                         device_printf(dev, "Failed to register Queue handler");
2195                         return (error);
2196                 }
2197                 que->msix = vector;
2198                 if (adapter->hw.mac.type == e1000_82575)
2199                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2200                 else
2201                         que->eims = 1 << vector;
2202                 /*
2203                 ** Bind the msix vector, and thus the
2204                 ** rings to the corresponding cpu.
2205                 */
2206                 if (adapter->num_queues > 1)
2207                         bus_bind_intr(dev, que->res, i);
2208                 /* Make tasklet for deferred handling */
2209                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2210                 que->tq = taskqueue_create_fast("igb_que", M_NOWAIT,
2211                     taskqueue_thread_enqueue, &que->tq);
2212                 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2213                     device_get_nameunit(adapter->dev));
2214         }
2215
2216         /* And Link */
2217         rid = vector + 1;
2218         adapter->res = bus_alloc_resource_any(dev,
2219             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2220         if (adapter->res == NULL) {
2221                 device_printf(dev,
2222                     "Unable to allocate bus resource: "
2223                     "MSIX Link Interrupt\n");
2224                 return (ENXIO);
2225         }
2226         if ((error = bus_setup_intr(dev, adapter->res,
2227             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2228             igb_msix_link, adapter, &adapter->tag)) != 0) {
2229                 device_printf(dev, "Failed to register Link handler");
2230                 return (error);
2231         }
2232         adapter->linkvec = vector;
2233
2234         /* Make tasklet for deferred handling */
2235         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2236         adapter->tq = taskqueue_create_fast("igb_link", M_NOWAIT,
2237             taskqueue_thread_enqueue, &adapter->tq);
2238         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s link",
2239             device_get_nameunit(adapter->dev));
2240
2241         return (0);
2242 }
2243
2244
2245 static void
2246 igb_configure_queues(struct adapter *adapter)
2247 {
2248         struct  e1000_hw        *hw = &adapter->hw;
2249         struct  igb_queue       *que;
2250         u32                     tmp, ivar = 0;
2251         u32                     newitr = IGB_DEFAULT_ITR;
2252
2253         /* First turn on RSS capability */
2254         if (adapter->hw.mac.type > e1000_82575)
2255                 E1000_WRITE_REG(hw, E1000_GPIE,
2256                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2257                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2258
2259         /* Turn on MSIX */
2260         switch (adapter->hw.mac.type) {
2261         case e1000_82580:
2262                 /* RX entries */
2263                 for (int i = 0; i < adapter->num_queues; i++) {
2264                         u32 index = i >> 1;
2265                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2266                         que = &adapter->queues[i];
2267                         if (i & 1) {
2268                                 ivar &= 0xFF00FFFF;
2269                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2270                         } else {
2271                                 ivar &= 0xFFFFFF00;
2272                                 ivar |= que->msix | E1000_IVAR_VALID;
2273                         }
2274                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2275                 }
2276                 /* TX entries */
2277                 for (int i = 0; i < adapter->num_queues; i++) {
2278                         u32 index = i >> 1;
2279                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2280                         que = &adapter->queues[i];
2281                         if (i & 1) {
2282                                 ivar &= 0x00FFFFFF;
2283                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2284                         } else {
2285                                 ivar &= 0xFFFF00FF;
2286                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2287                         }
2288                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2289                         adapter->eims_mask |= que->eims;
2290                 }
2291
2292                 /* And for the link interrupt */
2293                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2294                 adapter->link_mask = 1 << adapter->linkvec;
2295                 adapter->eims_mask |= adapter->link_mask;
2296                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2297                 break;
2298         case e1000_82576:
2299                 /* RX entries */
2300                 for (int i = 0; i < adapter->num_queues; i++) {
2301                         u32 index = i & 0x7; /* Each IVAR has two entries */
2302                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2303                         que = &adapter->queues[i];
2304                         if (i < 8) {
2305                                 ivar &= 0xFFFFFF00;
2306                                 ivar |= que->msix | E1000_IVAR_VALID;
2307                         } else {
2308                                 ivar &= 0xFF00FFFF;
2309                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2310                         }
2311                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2312                         adapter->eims_mask |= que->eims;
2313                 }
2314                 /* TX entries */
2315                 for (int i = 0; i < adapter->num_queues; i++) {
2316                         u32 index = i & 0x7; /* Each IVAR has two entries */
2317                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2318                         que = &adapter->queues[i];
2319                         if (i < 8) {
2320                                 ivar &= 0xFFFF00FF;
2321                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2322                         } else {
2323                                 ivar &= 0x00FFFFFF;
2324                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2325                         }
2326                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2327                         adapter->eims_mask |= que->eims;
2328                 }
2329
2330                 /* And for the link interrupt */
2331                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2332                 adapter->link_mask = 1 << adapter->linkvec;
2333                 adapter->eims_mask |= adapter->link_mask;
2334                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2335                 break;
2336
2337         case e1000_82575:
2338                 /* enable MSI-X support*/
2339                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2340                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2341                 /* Auto-Mask interrupts upon ICR read. */
2342                 tmp |= E1000_CTRL_EXT_EIAME;
2343                 tmp |= E1000_CTRL_EXT_IRCA;
2344                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2345
2346                 /* Queues */
2347                 for (int i = 0; i < adapter->num_queues; i++) {
2348                         que = &adapter->queues[i];
2349                         tmp = E1000_EICR_RX_QUEUE0 << i;
2350                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2351                         que->eims = tmp;
2352                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2353                             i, que->eims);
2354                         adapter->eims_mask |= que->eims;
2355                 }
2356
2357                 /* Link */
2358                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2359                     E1000_EIMS_OTHER);
2360                 adapter->link_mask |= E1000_EIMS_OTHER;
2361                 adapter->eims_mask |= adapter->link_mask;
2362         default:
2363                 break;
2364         }
2365
2366         /* Set the starting interrupt rate */
2367         if (hw->mac.type == e1000_82575)
2368                 newitr |= newitr << 16;
2369         else
2370                 newitr |= E1000_EITR_CNT_IGNR;
2371
2372         for (int i = 0; i < adapter->num_queues; i++) {
2373                 que = &adapter->queues[i];
2374                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2375         }
2376
2377         return;
2378 }
2379
2380
2381 static void
2382 igb_free_pci_resources(struct adapter *adapter)
2383 {
2384         struct          igb_queue *que = adapter->queues;
2385         device_t        dev = adapter->dev;
2386         int             rid;
2387
2388         /*
2389         ** There is a slight possibility of a failure mode
2390         ** in attach that will result in entering this function
2391         ** before interrupt resources have been initialized, and
2392         ** in that case we do not want to execute the loops below
2393         ** We can detect this reliably by the state of the adapter
2394         ** res pointer.
2395         */
2396         if (adapter->res == NULL)
2397                 goto mem;
2398
2399         /*
2400          * First release all the interrupt resources:
2401          */
2402         for (int i = 0; i < adapter->num_queues; i++, que++) {
2403                 rid = que->msix + 1;
2404                 if (que->tag != NULL) {
2405                         bus_teardown_intr(dev, que->res, que->tag);
2406                         que->tag = NULL;
2407                 }
2408                 if (que->res != NULL)
2409                         bus_release_resource(dev,
2410                             SYS_RES_IRQ, rid, que->res);
2411         }
2412
2413         /* Clean the Legacy or Link interrupt last */
2414         if (adapter->linkvec) /* we are doing MSIX */
2415                 rid = adapter->linkvec + 1;
2416         else
2417                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2418
2419         if (adapter->tag != NULL) {
2420                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2421                 adapter->tag = NULL;
2422         }
2423         if (adapter->res != NULL)
2424                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2425
2426 mem:
2427         if (adapter->msix)
2428                 pci_release_msi(dev);
2429
2430         if (adapter->msix_mem != NULL)
2431                 bus_release_resource(dev, SYS_RES_MEMORY,
2432                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2433
2434         if (adapter->pci_mem != NULL)
2435                 bus_release_resource(dev, SYS_RES_MEMORY,
2436                     PCIR_BAR(0), adapter->pci_mem);
2437
2438 }
2439
2440 /*
2441  * Setup Either MSI/X or MSI
2442  */
2443 static int
2444 igb_setup_msix(struct adapter *adapter)
2445 {
2446         device_t dev = adapter->dev;
2447         int rid, want, queues, msgs;
2448
2449         /* tuneable override */
2450         if (igb_enable_msix == 0)
2451                 goto msi;
2452
2453         /* First try MSI/X */
2454         rid = PCIR_BAR(IGB_MSIX_BAR);
2455         adapter->msix_mem = bus_alloc_resource_any(dev,
2456             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2457         if (!adapter->msix_mem) {
2458                 /* May not be enabled */
2459                 device_printf(adapter->dev,
2460                     "Unable to map MSIX table \n");
2461                 goto msi;
2462         }
2463
2464         msgs = pci_msix_count(dev); 
2465         if (msgs == 0) { /* system has msix disabled */
2466                 bus_release_resource(dev, SYS_RES_MEMORY,
2467                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2468                 adapter->msix_mem = NULL;
2469                 goto msi;
2470         }
2471
2472         /* Figure out a reasonable auto config value */
2473         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2474
2475         /* Manual override */
2476         if (igb_num_queues != 0)
2477                 queues = igb_num_queues;
2478
2479         /* Can have max of 4 queues on 82575 */
2480         if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2481                 queues = 4;
2482
2483         /*
2484         ** One vector (RX/TX pair) per queue
2485         ** plus an additional for Link interrupt
2486         */
2487         want = queues + 1;
2488         if (msgs >= want)
2489                 msgs = want;
2490         else {
2491                 device_printf(adapter->dev,
2492                     "MSIX Configuration Problem, "
2493                     "%d vectors configured, but %d queues wanted!\n",
2494                     msgs, want);
2495                 return (ENXIO);
2496         }
2497         if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2498                 device_printf(adapter->dev,
2499                     "Using MSIX interrupts with %d vectors\n", msgs);
2500                 adapter->num_queues = queues;
2501                 return (msgs);
2502         }
2503 msi:
2504         msgs = pci_msi_count(dev);
2505         if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2506                 device_printf(adapter->dev,"Using MSI interrupt\n");
2507         return (msgs);
2508 }
2509
2510 /*********************************************************************
2511  *
2512  *  Set up an fresh starting state
2513  *
2514  **********************************************************************/
2515 static void
2516 igb_reset(struct adapter *adapter)
2517 {
2518         device_t        dev = adapter->dev;
2519         struct e1000_hw *hw = &adapter->hw;
2520         struct e1000_fc_info *fc = &hw->fc;
2521         struct ifnet    *ifp = adapter->ifp;
2522         u32             pba = 0;
2523         u16             hwm;
2524
2525         INIT_DEBUGOUT("igb_reset: begin");
2526
2527         /* Let the firmware know the OS is in control */
2528         igb_get_hw_control(adapter);
2529
2530         /*
2531          * Packet Buffer Allocation (PBA)
2532          * Writing PBA sets the receive portion of the buffer
2533          * the remainder is used for the transmit buffer.
2534          */
2535         switch (hw->mac.type) {
2536         case e1000_82575:
2537                 pba = E1000_PBA_32K;
2538                 break;
2539         case e1000_82576:
2540                 pba = E1000_PBA_64K;
2541                 break;
2542         case e1000_82580:
2543                 pba = E1000_PBA_35K;
2544         default:
2545                 break;
2546         }
2547
2548         /* Special needs in case of Jumbo frames */
2549         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2550                 u32 tx_space, min_tx, min_rx;
2551                 pba = E1000_READ_REG(hw, E1000_PBA);
2552                 tx_space = pba >> 16;
2553                 pba &= 0xffff;
2554                 min_tx = (adapter->max_frame_size +
2555                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2556                 min_tx = roundup2(min_tx, 1024);
2557                 min_tx >>= 10;
2558                 min_rx = adapter->max_frame_size;
2559                 min_rx = roundup2(min_rx, 1024);
2560                 min_rx >>= 10;
2561                 if (tx_space < min_tx &&
2562                     ((min_tx - tx_space) < pba)) {
2563                         pba = pba - (min_tx - tx_space);
2564                         /*
2565                          * if short on rx space, rx wins
2566                          * and must trump tx adjustment
2567                          */
2568                         if (pba < min_rx)
2569                                 pba = min_rx;
2570                 }
2571                 E1000_WRITE_REG(hw, E1000_PBA, pba);
2572         }
2573
2574         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2575
2576         /*
2577          * These parameters control the automatic generation (Tx) and
2578          * response (Rx) to Ethernet PAUSE frames.
2579          * - High water mark should allow for at least two frames to be
2580          *   received after sending an XOFF.
2581          * - Low water mark works best when it is very near the high water mark.
2582          *   This allows the receiver to restart by sending XON when it has
2583          *   drained a bit.
2584          */
2585         hwm = min(((pba << 10) * 9 / 10),
2586             ((pba << 10) - 2 * adapter->max_frame_size));
2587
2588         if (hw->mac.type < e1000_82576) {
2589                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2590                 fc->low_water = fc->high_water - 8;
2591         } else {
2592                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2593                 fc->low_water = fc->high_water - 16;
2594         }
2595
2596         fc->pause_time = IGB_FC_PAUSE_TIME;
2597         fc->send_xon = TRUE;
2598
2599         /* Set Flow control, use the tunable location if sane */
2600         if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2601                 fc->requested_mode = igb_fc_setting;
2602         else
2603                 fc->requested_mode = e1000_fc_none;
2604
2605         fc->current_mode = fc->requested_mode;
2606
2607         /* Issue a global reset */
2608         e1000_reset_hw(hw);
2609         E1000_WRITE_REG(hw, E1000_WUC, 0);
2610
2611         if (e1000_init_hw(hw) < 0)
2612                 device_printf(dev, "Hardware Initialization Failed\n");
2613
2614         if (hw->mac.type == e1000_82580) {
2615                 u32 reg;
2616
2617                 hwm = (pba << 10) - (2 * adapter->max_frame_size);
2618                 /*
2619                  * 0x80000000 - enable DMA COAL
2620                  * 0x10000000 - use L0s as low power
2621                  * 0x20000000 - use L1 as low power
2622                  * X << 16 - exit dma coal when rx data exceeds X kB
2623                  * Y - upper limit to stay in dma coal in units of 32usecs
2624                  */
2625                 E1000_WRITE_REG(hw, E1000_DMACR,
2626                     0xA0000006 | ((hwm << 6) & 0x00FF0000));
2627
2628                 /* set hwm to PBA -  2 * max frame size */
2629                 E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2630                 /*
2631                  * This sets the time to wait before requesting transition to
2632                  * low power state to number of usecs needed to receive 1 512
2633                  * byte frame at gigabit line rate
2634                  */
2635                 E1000_WRITE_REG(hw, E1000_DMCTLX, 4);
2636
2637                 /* free space in tx packet buffer to wake from DMA coal */
2638                 E1000_WRITE_REG(hw, E1000_DMCTXTH,
2639                     (20480 - (2 * adapter->max_frame_size)) >> 6);
2640
2641                 /* make low power state decision controlled by DMA coal */
2642                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2643                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
2644                     reg | E1000_PCIEMISC_LX_DECISION);
2645         }
2646
2647         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2648         e1000_get_phy_info(hw);
2649         e1000_check_for_link(hw);
2650         return;
2651 }
2652
2653 /*********************************************************************
2654  *
2655  *  Setup networking device structure and register an interface.
2656  *
2657  **********************************************************************/
2658 static void
2659 igb_setup_interface(device_t dev, struct adapter *adapter)
2660 {
2661         struct ifnet   *ifp;
2662
2663         INIT_DEBUGOUT("igb_setup_interface: begin");
2664
2665         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2666         if (ifp == NULL)
2667                 panic("%s: can not if_alloc()", device_get_nameunit(dev));
2668         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2669         ifp->if_mtu = ETHERMTU;
2670         ifp->if_init =  igb_init;
2671         ifp->if_softc = adapter;
2672         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2673         ifp->if_ioctl = igb_ioctl;
2674         ifp->if_start = igb_start;
2675 #if __FreeBSD_version >= 800000
2676         ifp->if_transmit = igb_mq_start;
2677         ifp->if_qflush = igb_qflush;
2678 #endif
2679         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2680         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2681         IFQ_SET_READY(&ifp->if_snd);
2682
2683         ether_ifattach(ifp, adapter->hw.mac.addr);
2684
2685         ifp->if_capabilities = ifp->if_capenable = 0;
2686
2687         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_MTU;
2688         ifp->if_capabilities |= IFCAP_TSO4;
2689         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2690         if (igb_header_split)
2691                 ifp->if_capabilities |= IFCAP_LRO;
2692
2693         ifp->if_capenable = ifp->if_capabilities;
2694 #ifdef DEVICE_POLLING
2695         ifp->if_capabilities |= IFCAP_POLLING;
2696 #endif
2697
2698         /*
2699          * Tell the upper layer(s) we
2700          * support full VLAN capability.
2701          */
2702         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2703         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2704         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2705
2706         /*
2707         ** Dont turn this on by default, if vlans are
2708         ** created on another pseudo device (eg. lagg)
2709         ** then vlan events are not passed thru, breaking
2710         ** operation, but with HW FILTER off it works. If
2711         ** using vlans directly on the em driver you can
2712         ** enable this and get full hardware tag filtering.
2713         */
2714         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2715
2716         /*
2717          * Specify the media types supported by this adapter and register
2718          * callbacks to update media and link information
2719          */
2720         ifmedia_init(&adapter->media, IFM_IMASK,
2721             igb_media_change, igb_media_status);
2722         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2723             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2724                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
2725                             0, NULL);
2726                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2727         } else {
2728                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2729                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2730                             0, NULL);
2731                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2732                             0, NULL);
2733                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2734                             0, NULL);
2735                 if (adapter->hw.phy.type != e1000_phy_ife) {
2736                         ifmedia_add(&adapter->media,
2737                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2738                         ifmedia_add(&adapter->media,
2739                                 IFM_ETHER | IFM_1000_T, 0, NULL);
2740                 }
2741         }
2742         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2743         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2744 }
2745
2746
2747 /*
2748  * Manage DMA'able memory.
2749  */
2750 static void
2751 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2752 {
2753         if (error)
2754                 return;
2755         *(bus_addr_t *) arg = segs[0].ds_addr;
2756 }
2757
2758 static int
2759 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2760         struct igb_dma_alloc *dma, int mapflags)
2761 {
2762         int error;
2763
2764         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2765                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
2766                                 BUS_SPACE_MAXADDR,      /* lowaddr */
2767                                 BUS_SPACE_MAXADDR,      /* highaddr */
2768                                 NULL, NULL,             /* filter, filterarg */
2769                                 size,                   /* maxsize */
2770                                 1,                      /* nsegments */
2771                                 size,                   /* maxsegsize */
2772                                 0,                      /* flags */
2773                                 NULL,                   /* lockfunc */
2774                                 NULL,                   /* lockarg */
2775                                 &dma->dma_tag);
2776         if (error) {
2777                 device_printf(adapter->dev,
2778                     "%s: bus_dma_tag_create failed: %d\n",
2779                     __func__, error);
2780                 goto fail_0;
2781         }
2782
2783         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2784             BUS_DMA_NOWAIT, &dma->dma_map);
2785         if (error) {
2786                 device_printf(adapter->dev,
2787                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2788                     __func__, (uintmax_t)size, error);
2789                 goto fail_2;
2790         }
2791
2792         dma->dma_paddr = 0;
2793         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2794             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2795         if (error || dma->dma_paddr == 0) {
2796                 device_printf(adapter->dev,
2797                     "%s: bus_dmamap_load failed: %d\n",
2798                     __func__, error);
2799                 goto fail_3;
2800         }
2801
2802         return (0);
2803
2804 fail_3:
2805         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2806 fail_2:
2807         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2808         bus_dma_tag_destroy(dma->dma_tag);
2809 fail_0:
2810         dma->dma_map = NULL;
2811         dma->dma_tag = NULL;
2812
2813         return (error);
2814 }
2815
2816 static void
2817 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2818 {
2819         if (dma->dma_tag == NULL)
2820                 return;
2821         if (dma->dma_map != NULL) {
2822                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2823                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2824                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2825                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2826                 dma->dma_map = NULL;
2827         }
2828         bus_dma_tag_destroy(dma->dma_tag);
2829         dma->dma_tag = NULL;
2830 }
2831
2832
2833 /*********************************************************************
2834  *
2835  *  Allocate memory for the transmit and receive rings, and then
2836  *  the descriptors associated with each, called only once at attach.
2837  *
2838  **********************************************************************/
2839 static int
2840 igb_allocate_queues(struct adapter *adapter)
2841 {
2842         device_t dev = adapter->dev;
2843         struct igb_queue        *que = NULL;
2844         struct tx_ring          *txr = NULL;
2845         struct rx_ring          *rxr = NULL;
2846         int rsize, tsize, error = E1000_SUCCESS;
2847         int txconf = 0, rxconf = 0;
2848
2849         /* First allocate the top level queue structs */
2850         if (!(adapter->queues =
2851             (struct igb_queue *) malloc(sizeof(struct igb_queue) *
2852             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2853                 device_printf(dev, "Unable to allocate queue memory\n");
2854                 error = ENOMEM;
2855                 goto fail;
2856         }
2857
2858         /* Next allocate the TX ring struct memory */
2859         if (!(adapter->tx_rings =
2860             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2861             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2862                 device_printf(dev, "Unable to allocate TX ring memory\n");
2863                 error = ENOMEM;
2864                 goto tx_fail;
2865         }
2866
2867         /* Now allocate the RX */
2868         if (!(adapter->rx_rings =
2869             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2870             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2871                 device_printf(dev, "Unable to allocate RX ring memory\n");
2872                 error = ENOMEM;
2873                 goto rx_fail;
2874         }
2875
2876         tsize = roundup2(adapter->num_tx_desc *
2877             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2878         /*
2879          * Now set up the TX queues, txconf is needed to handle the
2880          * possibility that things fail midcourse and we need to
2881          * undo memory gracefully
2882          */ 
2883         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2884                 /* Set up some basics */
2885                 txr = &adapter->tx_rings[i];
2886                 txr->adapter = adapter;
2887                 txr->me = i;
2888
2889                 /* Initialize the TX lock */
2890                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2891                     device_get_nameunit(dev), txr->me);
2892                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2893
2894                 if (igb_dma_malloc(adapter, tsize,
2895                         &txr->txdma, BUS_DMA_NOWAIT)) {
2896                         device_printf(dev,
2897                             "Unable to allocate TX Descriptor memory\n");
2898                         error = ENOMEM;
2899                         goto err_tx_desc;
2900                 }
2901                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2902                 bzero((void *)txr->tx_base, tsize);
2903
2904                 /* Now allocate transmit buffers for the ring */
2905                 if (igb_allocate_transmit_buffers(txr)) {
2906                         device_printf(dev,
2907                             "Critical Failure setting up transmit buffers\n");
2908                         error = ENOMEM;
2909                         goto err_tx_desc;
2910                 }
2911 #if __FreeBSD_version >= 800000
2912                 /* Allocate a buf ring */
2913                 txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2914                     M_WAITOK, &txr->tx_mtx);
2915 #endif
2916         }
2917
2918         /*
2919          * Next the RX queues...
2920          */ 
2921         rsize = roundup2(adapter->num_rx_desc *
2922             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2923         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2924                 rxr = &adapter->rx_rings[i];
2925                 rxr->adapter = adapter;
2926                 rxr->me = i;
2927
2928                 /* Initialize the RX lock */
2929                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2930                     device_get_nameunit(dev), txr->me);
2931                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2932
2933                 if (igb_dma_malloc(adapter, rsize,
2934                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2935                         device_printf(dev,
2936                             "Unable to allocate RxDescriptor memory\n");
2937                         error = ENOMEM;
2938                         goto err_rx_desc;
2939                 }
2940                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2941                 bzero((void *)rxr->rx_base, rsize);
2942
2943                 /* Allocate receive buffers for the ring*/
2944                 if (igb_allocate_receive_buffers(rxr)) {
2945                         device_printf(dev,
2946                             "Critical Failure setting up receive buffers\n");
2947                         error = ENOMEM;
2948                         goto err_rx_desc;
2949                 }
2950         }
2951
2952         /*
2953         ** Finally set up the queue holding structs
2954         */
2955         for (int i = 0; i < adapter->num_queues; i++) {
2956                 que = &adapter->queues[i];
2957                 que->adapter = adapter;
2958                 que->txr = &adapter->tx_rings[i];
2959                 que->rxr = &adapter->rx_rings[i];
2960         }
2961
2962         return (0);
2963
2964 err_rx_desc:
2965         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2966                 igb_dma_free(adapter, &rxr->rxdma);
2967 err_tx_desc:
2968         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2969                 igb_dma_free(adapter, &txr->txdma);
2970         free(adapter->rx_rings, M_DEVBUF);
2971 rx_fail:
2972         buf_ring_free(txr->br, M_DEVBUF);
2973         free(adapter->tx_rings, M_DEVBUF);
2974 tx_fail:
2975         free(adapter->queues, M_DEVBUF);
2976 fail:
2977         return (error);
2978 }
2979
2980 /*********************************************************************
2981  *
2982  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2983  *  the information needed to transmit a packet on the wire. This is
2984  *  called only once at attach, setup is done every reset.
2985  *
2986  **********************************************************************/
2987 static int
2988 igb_allocate_transmit_buffers(struct tx_ring *txr)
2989 {
2990         struct adapter *adapter = txr->adapter;
2991         device_t dev = adapter->dev;
2992         struct igb_tx_buffer *txbuf;
2993         int error, i;
2994
2995         /*
2996          * Setup DMA descriptor areas.
2997          */
2998         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
2999                                1, 0,                    /* alignment, bounds */
3000                                BUS_SPACE_MAXADDR,       /* lowaddr */
3001                                BUS_SPACE_MAXADDR,       /* highaddr */
3002                                NULL, NULL,              /* filter, filterarg */
3003                                IGB_TSO_SIZE,            /* maxsize */
3004                                IGB_MAX_SCATTER,         /* nsegments */
3005                                PAGE_SIZE,               /* maxsegsize */
3006                                0,                       /* flags */
3007                                NULL,                    /* lockfunc */
3008                                NULL,                    /* lockfuncarg */
3009                                &txr->txtag))) {
3010                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3011                 goto fail;
3012         }
3013
3014         if (!(txr->tx_buffers =
3015             (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3016             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3017                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3018                 error = ENOMEM;
3019                 goto fail;
3020         }
3021
3022         /* Create the descriptor buffer dma maps */
3023         txbuf = txr->tx_buffers;
3024         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3025                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3026                 if (error != 0) {
3027                         device_printf(dev, "Unable to create TX DMA map\n");
3028                         goto fail;
3029                 }
3030         }
3031
3032         return 0;
3033 fail:
3034         /* We free all, it handles case where we are in the middle */
3035         igb_free_transmit_structures(adapter);
3036         return (error);
3037 }
3038
3039 /*********************************************************************
3040  *
3041  *  Initialize a transmit ring.
3042  *
3043  **********************************************************************/
3044 static void
3045 igb_setup_transmit_ring(struct tx_ring *txr)
3046 {
3047         struct adapter *adapter = txr->adapter;
3048         struct igb_tx_buffer *txbuf;
3049         int i;
3050
3051         /* Clear the old descriptor contents */
3052         IGB_TX_LOCK(txr);
3053         bzero((void *)txr->tx_base,
3054               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3055         /* Reset indices */
3056         txr->next_avail_desc = 0;
3057         txr->next_to_clean = 0;
3058
3059         /* Free any existing tx buffers. */
3060         txbuf = txr->tx_buffers;
3061         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3062                 if (txbuf->m_head != NULL) {
3063                         bus_dmamap_sync(txr->txtag, txbuf->map,
3064                             BUS_DMASYNC_POSTWRITE);
3065                         bus_dmamap_unload(txr->txtag, txbuf->map);
3066                         m_freem(txbuf->m_head);
3067                         txbuf->m_head = NULL;
3068                 }
3069                 /* clear the watch index */
3070                 txbuf->next_eop = -1;
3071         }
3072
3073         /* Set number of descriptors available */
3074         txr->tx_avail = adapter->num_tx_desc;
3075
3076         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3077             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3078         IGB_TX_UNLOCK(txr);
3079 }
3080
3081 /*********************************************************************
3082  *
3083  *  Initialize all transmit rings.
3084  *
3085  **********************************************************************/
3086 static void
3087 igb_setup_transmit_structures(struct adapter *adapter)
3088 {
3089         struct tx_ring *txr = adapter->tx_rings;
3090
3091         for (int i = 0; i < adapter->num_queues; i++, txr++)
3092                 igb_setup_transmit_ring(txr);
3093
3094         return;
3095 }
3096
3097 /*********************************************************************
3098  *
3099  *  Enable transmit unit.
3100  *
3101  **********************************************************************/
3102 static void
3103 igb_initialize_transmit_units(struct adapter *adapter)
3104 {
3105         struct tx_ring  *txr = adapter->tx_rings;
3106         struct e1000_hw *hw = &adapter->hw;
3107         u32             tctl, txdctl;
3108
3109          INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3110
3111         /* Setup the Tx Descriptor Rings */
3112         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3113                 u64 bus_addr = txr->txdma.dma_paddr;
3114
3115                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3116                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3117                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3118                     (uint32_t)(bus_addr >> 32));
3119                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3120                     (uint32_t)bus_addr);
3121
3122                 /* Setup the HW Tx Head and Tail descriptor pointers */
3123                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3124                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3125
3126                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3127                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3128                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3129
3130                 txr->watchdog_check = FALSE;
3131
3132                 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
3133                 txdctl |= IGB_TX_PTHRESH;
3134                 txdctl |= IGB_TX_HTHRESH << 8;
3135                 txdctl |= IGB_TX_WTHRESH << 16;
3136                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3137                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3138         }
3139
3140         /* Program the Transmit Control Register */
3141         tctl = E1000_READ_REG(hw, E1000_TCTL);
3142         tctl &= ~E1000_TCTL_CT;
3143         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3144                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3145
3146         e1000_config_collision_dist(hw);
3147
3148         /* This write will effectively turn on the transmit unit. */
3149         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3150 }
3151
3152 /*********************************************************************
3153  *
3154  *  Free all transmit rings.
3155  *
3156  **********************************************************************/
3157 static void
3158 igb_free_transmit_structures(struct adapter *adapter)
3159 {
3160         struct tx_ring *txr = adapter->tx_rings;
3161
3162         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3163                 IGB_TX_LOCK(txr);
3164                 igb_free_transmit_buffers(txr);
3165                 igb_dma_free(adapter, &txr->txdma);
3166                 IGB_TX_UNLOCK(txr);
3167                 IGB_TX_LOCK_DESTROY(txr);
3168         }
3169         free(adapter->tx_rings, M_DEVBUF);
3170 }
3171
3172 /*********************************************************************
3173  *
3174  *  Free transmit ring related data structures.
3175  *
3176  **********************************************************************/
3177 static void
3178 igb_free_transmit_buffers(struct tx_ring *txr)
3179 {
3180         struct adapter *adapter = txr->adapter;
3181         struct igb_tx_buffer *tx_buffer;
3182         int             i;
3183
3184         INIT_DEBUGOUT("free_transmit_ring: begin");
3185
3186         if (txr->tx_buffers == NULL)
3187                 return;
3188
3189         tx_buffer = txr->tx_buffers;
3190         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3191                 if (tx_buffer->m_head != NULL) {
3192                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3193                             BUS_DMASYNC_POSTWRITE);
3194                         bus_dmamap_unload(txr->txtag,
3195                             tx_buffer->map);
3196                         m_freem(tx_buffer->m_head);
3197                         tx_buffer->m_head = NULL;
3198                         if (tx_buffer->map != NULL) {
3199                                 bus_dmamap_destroy(txr->txtag,
3200                                     tx_buffer->map);
3201                                 tx_buffer->map = NULL;
3202                         }
3203                 } else if (tx_buffer->map != NULL) {
3204                         bus_dmamap_unload(txr->txtag,
3205                             tx_buffer->map);
3206                         bus_dmamap_destroy(txr->txtag,
3207                             tx_buffer->map);
3208                         tx_buffer->map = NULL;
3209                 }
3210         }
3211 #if __FreeBSD_version >= 800000
3212         if (txr->br != NULL)
3213                 buf_ring_free(txr->br, M_DEVBUF);
3214 #endif
3215         if (txr->tx_buffers != NULL) {
3216                 free(txr->tx_buffers, M_DEVBUF);
3217                 txr->tx_buffers = NULL;
3218         }
3219         if (txr->txtag != NULL) {
3220                 bus_dma_tag_destroy(txr->txtag);
3221                 txr->txtag = NULL;
3222         }
3223         return;
3224 }
3225
3226 /**********************************************************************
3227  *
3228  *  Setup work for hardware segmentation offload (TSO)
3229  *
3230  **********************************************************************/
3231 static boolean_t
3232 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3233 {
3234         struct adapter *adapter = txr->adapter;
3235         struct e1000_adv_tx_context_desc *TXD;
3236         struct igb_tx_buffer        *tx_buffer;
3237         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3238         u32 mss_l4len_idx = 0;
3239         u16 vtag = 0;
3240         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3241         struct ether_vlan_header *eh;
3242         struct ip *ip;
3243         struct tcphdr *th;
3244
3245
3246         /*
3247          * Determine where frame payload starts.
3248          * Jump over vlan headers if already present
3249          */
3250         eh = mtod(mp, struct ether_vlan_header *);
3251         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3252                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3253         else
3254                 ehdrlen = ETHER_HDR_LEN;
3255
3256         /* Ensure we have at least the IP+TCP header in the first mbuf. */
3257         if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3258                 return FALSE;
3259
3260         /* Only supports IPV4 for now */
3261         ctxd = txr->next_avail_desc;
3262         tx_buffer = &txr->tx_buffers[ctxd];
3263         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3264
3265         ip = (struct ip *)(mp->m_data + ehdrlen);
3266         if (ip->ip_p != IPPROTO_TCP)
3267                 return FALSE;   /* 0 */
3268         ip->ip_sum = 0;
3269         ip_hlen = ip->ip_hl << 2;
3270         th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3271         th->th_sum = in_pseudo(ip->ip_src.s_addr,
3272             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3273         tcp_hlen = th->th_off << 2;
3274         /*
3275          * Calculate header length, this is used
3276          * in the transmit desc in igb_xmit
3277          */
3278         *hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3279
3280         /* VLAN MACLEN IPLEN */
3281         if (mp->m_flags & M_VLANTAG) {
3282                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3283                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3284         }
3285
3286         vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3287         vlan_macip_lens |= ip_hlen;
3288         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3289
3290         /* ADV DTYPE TUCMD */
3291         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3292         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3293         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3294         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3295
3296         /* MSS L4LEN IDX */
3297         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3298         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3299         /* 82575 needs the queue index added */
3300         if (adapter->hw.mac.type == e1000_82575)
3301                 mss_l4len_idx |= txr->me << 4;
3302         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3303
3304         TXD->seqnum_seed = htole32(0);
3305         tx_buffer->m_head = NULL;
3306         tx_buffer->next_eop = -1;
3307
3308         if (++ctxd == adapter->num_tx_desc)
3309                 ctxd = 0;
3310
3311         txr->tx_avail--;
3312         txr->next_avail_desc = ctxd;
3313         return TRUE;
3314 }
3315
3316
3317 /*********************************************************************
3318  *
3319  *  Context Descriptor setup for VLAN or CSUM
3320  *
3321  **********************************************************************/
3322
3323 static bool
3324 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3325 {
3326         struct adapter *adapter = txr->adapter;
3327         struct e1000_adv_tx_context_desc *TXD;
3328         struct igb_tx_buffer        *tx_buffer;
3329         u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3330         struct ether_vlan_header *eh;
3331         struct ip *ip = NULL;
3332         struct ip6_hdr *ip6;
3333         int  ehdrlen, ctxd, ip_hlen = 0;
3334         u16     etype, vtag = 0;
3335         u8      ipproto = 0;
3336         bool    offload = TRUE;
3337
3338         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3339                 offload = FALSE;
3340
3341         vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3342         ctxd = txr->next_avail_desc;
3343         tx_buffer = &txr->tx_buffers[ctxd];
3344         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3345
3346         /*
3347         ** In advanced descriptors the vlan tag must 
3348         ** be placed into the context descriptor, thus
3349         ** we need to be here just for that setup.
3350         */
3351         if (mp->m_flags & M_VLANTAG) {
3352                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3353                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3354         } else if (offload == FALSE)
3355                 return FALSE;
3356
3357         /*
3358          * Determine where frame payload starts.
3359          * Jump over vlan headers if already present,
3360          * helpful for QinQ too.
3361          */
3362         eh = mtod(mp, struct ether_vlan_header *);
3363         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3364                 etype = ntohs(eh->evl_proto);
3365                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3366         } else {
3367                 etype = ntohs(eh->evl_encap_proto);
3368                 ehdrlen = ETHER_HDR_LEN;
3369         }
3370
3371         /* Set the ether header length */
3372         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3373
3374         switch (etype) {
3375                 case ETHERTYPE_IP:
3376                         ip = (struct ip *)(mp->m_data + ehdrlen);
3377                         ip_hlen = ip->ip_hl << 2;
3378                         if (mp->m_len < ehdrlen + ip_hlen) {
3379                                 offload = FALSE;
3380                                 break;
3381                         }
3382                         ipproto = ip->ip_p;
3383                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3384                         break;
3385                 case ETHERTYPE_IPV6:
3386                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3387                         ip_hlen = sizeof(struct ip6_hdr);
3388                         if (mp->m_len < ehdrlen + ip_hlen)
3389                                 return (FALSE);
3390                         ipproto = ip6->ip6_nxt;
3391                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3392                         break;
3393                 default:
3394                         offload = FALSE;
3395                         break;
3396         }
3397
3398         vlan_macip_lens |= ip_hlen;
3399         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3400
3401         switch (ipproto) {
3402                 case IPPROTO_TCP:
3403                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3404                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3405                         break;
3406                 case IPPROTO_UDP:
3407                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3408                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3409                         break;
3410 #if __FreeBSD_version >= 800000
3411                 case IPPROTO_SCTP:
3412                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3413                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3414                         break;
3415 #endif
3416                 default:
3417                         offload = FALSE;
3418                         break;
3419         }
3420
3421         /* 82575 needs the queue index added */
3422         if (adapter->hw.mac.type == e1000_82575)
3423                 mss_l4len_idx = txr->me << 4;
3424
3425         /* Now copy bits into descriptor */
3426         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3427         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3428         TXD->seqnum_seed = htole32(0);
3429         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3430
3431         tx_buffer->m_head = NULL;
3432         tx_buffer->next_eop = -1;
3433
3434         /* We've consumed the first desc, adjust counters */
3435         if (++ctxd == adapter->num_tx_desc)
3436                 ctxd = 0;
3437         txr->next_avail_desc = ctxd;
3438         --txr->tx_avail;
3439
3440         return (offload);
3441 }
3442
3443
3444 /**********************************************************************
3445  *
3446  *  Examine each tx_buffer in the used queue. If the hardware is done
3447  *  processing the packet then free associated resources. The
3448  *  tx_buffer is put back on the free queue.
3449  *
3450  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3451  **********************************************************************/
3452 static bool
3453 igb_txeof(struct tx_ring *txr)
3454 {
3455         struct adapter  *adapter = txr->adapter;
3456         int first, last, done;
3457         struct igb_tx_buffer *tx_buffer;
3458         struct e1000_tx_desc   *tx_desc, *eop_desc;
3459         struct ifnet   *ifp = adapter->ifp;
3460
3461         IGB_TX_LOCK_ASSERT(txr);
3462
3463         if (txr->tx_avail == adapter->num_tx_desc)
3464                 return FALSE;
3465
3466         first = txr->next_to_clean;
3467         tx_desc = &txr->tx_base[first];
3468         tx_buffer = &txr->tx_buffers[first];
3469         last = tx_buffer->next_eop;
3470         eop_desc = &txr->tx_base[last];
3471
3472         /*
3473          * What this does is get the index of the
3474          * first descriptor AFTER the EOP of the 
3475          * first packet, that way we can do the
3476          * simple comparison on the inner while loop.
3477          */
3478         if (++last == adapter->num_tx_desc)
3479                 last = 0;
3480         done = last;
3481
3482         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3483             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3484
3485         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3486                 /* We clean the range of the packet */
3487                 while (first != done) {
3488                         tx_desc->upper.data = 0;
3489                         tx_desc->lower.data = 0;
3490                         tx_desc->buffer_addr = 0;
3491                         ++txr->tx_avail;
3492
3493                         if (tx_buffer->m_head) {
3494                                 txr->bytes +=
3495                                     tx_buffer->m_head->m_pkthdr.len;
3496                                 bus_dmamap_sync(txr->txtag,
3497                                     tx_buffer->map,
3498                                     BUS_DMASYNC_POSTWRITE);
3499                                 bus_dmamap_unload(txr->txtag,
3500                                     tx_buffer->map);
3501
3502                                 m_freem(tx_buffer->m_head);
3503                                 tx_buffer->m_head = NULL;
3504                         }
3505                         tx_buffer->next_eop = -1;
3506                         txr->watchdog_time = ticks;
3507
3508                         if (++first == adapter->num_tx_desc)
3509                                 first = 0;
3510
3511                         tx_buffer = &txr->tx_buffers[first];
3512                         tx_desc = &txr->tx_base[first];
3513                 }
3514                 ++txr->packets;
3515                 ++ifp->if_opackets;
3516                 /* See if we can continue to the next packet */
3517                 last = tx_buffer->next_eop;
3518                 if (last != -1) {
3519                         eop_desc = &txr->tx_base[last];
3520                         /* Get new done point */
3521                         if (++last == adapter->num_tx_desc) last = 0;
3522                         done = last;
3523                 } else
3524                         break;
3525         }
3526         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3527             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3528
3529         txr->next_to_clean = first;
3530
3531         /*
3532          * If we have enough room, clear IFF_DRV_OACTIVE
3533          * to tell the stack that it is OK to send packets.
3534          */
3535         if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {                
3536                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3537                 /* All clean, turn off the watchdog */
3538                 if (txr->tx_avail == adapter->num_tx_desc) {
3539                         txr->watchdog_check = FALSE;
3540                         return FALSE;
3541                 }
3542         }
3543
3544         return (TRUE);
3545 }
3546
3547
3548 /*********************************************************************
3549  *
3550  *  Refresh mbuf buffers for RX descriptor rings
3551  *   - now keeps its own state so discards due to resource
3552  *     exhaustion are unnecessary, if an mbuf cannot be obtained
3553  *     it just returns, keeping its placeholder, thus it can simply
3554  *     be recalled to try again.
3555  *
3556  **********************************************************************/
3557 static void
3558 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3559 {
3560         struct adapter          *adapter = rxr->adapter;
3561         bus_dma_segment_t       hseg[1];
3562         bus_dma_segment_t       pseg[1];
3563         struct igb_rx_buf       *rxbuf;
3564         struct mbuf             *mh, *mp;
3565         int                     i, nsegs, error, cleaned;
3566
3567         i = rxr->next_to_refresh;
3568         cleaned = -1; /* Signify no completions */
3569         while (i != limit) {
3570                 rxbuf = &rxr->rx_buffers[i];
3571                 if (rxbuf->m_head == NULL) {
3572                         mh = m_gethdr(M_DONTWAIT, MT_DATA);
3573                         if (mh == NULL)
3574                                 goto update;
3575                         mh->m_pkthdr.len = mh->m_len = MHLEN;
3576                         mh->m_len = MHLEN;
3577                         mh->m_flags |= M_PKTHDR;
3578                         m_adj(mh, ETHER_ALIGN);
3579                         /* Get the memory mapping */
3580                         error = bus_dmamap_load_mbuf_sg(rxr->htag,
3581                             rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3582                         if (error != 0) {
3583                                 printf("GET BUF: dmamap load"
3584                                     " failure - %d\n", error);
3585                                 m_free(mh);
3586                                 goto update;
3587                         }
3588                         rxbuf->m_head = mh;
3589                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3590                             BUS_DMASYNC_PREREAD);
3591                         rxr->rx_base[i].read.hdr_addr =
3592                             htole64(hseg[0].ds_addr);
3593                 }
3594
3595                 if (rxbuf->m_pack == NULL) {
3596                         mp = m_getjcl(M_DONTWAIT, MT_DATA,
3597                             M_PKTHDR, adapter->rx_mbuf_sz);
3598                         if (mp == NULL)
3599                                 goto update;
3600                         mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3601                         /* Get the memory mapping */
3602                         error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3603                             rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3604                         if (error != 0) {
3605                                 printf("GET BUF: dmamap load"
3606                                     " failure - %d\n", error);
3607                                 m_free(mp);
3608                                 goto update;
3609                         }
3610                         rxbuf->m_pack = mp;
3611                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3612                             BUS_DMASYNC_PREREAD);
3613                         rxr->rx_base[i].read.pkt_addr =
3614                             htole64(pseg[0].ds_addr);
3615                 }
3616
3617                 cleaned = i;
3618                 /* Calculate next index */
3619                 if (++i == adapter->num_rx_desc)
3620                         i = 0;
3621                 /* This is the work marker for refresh */
3622                 rxr->next_to_refresh = i;
3623         }
3624 update:
3625         if (cleaned != -1) /* If we refreshed some, bump tail */
3626                 E1000_WRITE_REG(&adapter->hw,
3627                     E1000_RDT(rxr->me), cleaned);
3628         return;
3629 }
3630
3631
3632 /*********************************************************************
3633  *
3634  *  Allocate memory for rx_buffer structures. Since we use one
3635  *  rx_buffer per received packet, the maximum number of rx_buffer's
3636  *  that we'll need is equal to the number of receive descriptors
3637  *  that we've allocated.
3638  *
3639  **********************************************************************/
3640 static int
3641 igb_allocate_receive_buffers(struct rx_ring *rxr)
3642 {
3643         struct  adapter         *adapter = rxr->adapter;
3644         device_t                dev = adapter->dev;
3645         struct igb_rx_buf       *rxbuf;
3646         int                     i, bsize, error;
3647
3648         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3649         if (!(rxr->rx_buffers =
3650             (struct igb_rx_buf *) malloc(bsize,
3651             M_DEVBUF, M_NOWAIT | M_ZERO))) {
3652                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3653                 error = ENOMEM;
3654                 goto fail;
3655         }
3656
3657         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3658                                    1, 0,                /* alignment, bounds */
3659                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3660                                    BUS_SPACE_MAXADDR,   /* highaddr */
3661                                    NULL, NULL,          /* filter, filterarg */
3662                                    MSIZE,               /* maxsize */
3663                                    1,                   /* nsegments */
3664                                    MSIZE,               /* maxsegsize */
3665                                    0,                   /* flags */
3666                                    NULL,                /* lockfunc */
3667                                    NULL,                /* lockfuncarg */
3668                                    &rxr->htag))) {
3669                 device_printf(dev, "Unable to create RX DMA tag\n");
3670                 goto fail;
3671         }
3672
3673         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3674                                    1, 0,                /* alignment, bounds */
3675                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3676                                    BUS_SPACE_MAXADDR,   /* highaddr */
3677                                    NULL, NULL,          /* filter, filterarg */
3678                                    MJUMPAGESIZE,        /* maxsize */
3679                                    1,                   /* nsegments */
3680                                    MJUMPAGESIZE,        /* maxsegsize */
3681                                    0,                   /* flags */
3682                                    NULL,                /* lockfunc */
3683                                    NULL,                /* lockfuncarg */
3684                                    &rxr->ptag))) {
3685                 device_printf(dev, "Unable to create RX payload DMA tag\n");
3686                 goto fail;
3687         }
3688
3689         for (i = 0; i < adapter->num_rx_desc; i++) {
3690                 rxbuf = &rxr->rx_buffers[i];
3691                 error = bus_dmamap_create(rxr->htag,
3692                     BUS_DMA_NOWAIT, &rxbuf->hmap);
3693                 if (error) {
3694                         device_printf(dev,
3695                             "Unable to create RX head DMA maps\n");
3696                         goto fail;
3697                 }
3698                 error = bus_dmamap_create(rxr->ptag,
3699                     BUS_DMA_NOWAIT, &rxbuf->pmap);
3700                 if (error) {
3701                         device_printf(dev,
3702                             "Unable to create RX packet DMA maps\n");
3703                         goto fail;
3704                 }
3705         }
3706
3707         return (0);
3708
3709 fail:
3710         /* Frees all, but can handle partial completion */
3711         igb_free_receive_structures(adapter);
3712         return (error);
3713 }
3714
3715
3716 static void
3717 igb_free_receive_ring(struct rx_ring *rxr)
3718 {
3719         struct  adapter         *adapter;
3720         struct igb_rx_buf       *rxbuf;
3721         int i;
3722
3723         adapter = rxr->adapter;
3724         for (i = 0; i < adapter->num_rx_desc; i++) {
3725                 rxbuf = &rxr->rx_buffers[i];
3726                 if (rxbuf->m_head != NULL) {
3727                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3728                             BUS_DMASYNC_POSTREAD);
3729                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3730                         rxbuf->m_head->m_flags |= M_PKTHDR;
3731                         m_freem(rxbuf->m_head);
3732                 }
3733                 if (rxbuf->m_pack != NULL) {
3734                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3735                             BUS_DMASYNC_POSTREAD);
3736                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3737                         rxbuf->m_pack->m_flags |= M_PKTHDR;
3738                         m_freem(rxbuf->m_pack);
3739                 }
3740                 rxbuf->m_head = NULL;
3741                 rxbuf->m_pack = NULL;
3742         }
3743 }
3744
3745
3746 /*********************************************************************
3747  *
3748  *  Initialize a receive ring and its buffers.
3749  *
3750  **********************************************************************/
3751 static int
3752 igb_setup_receive_ring(struct rx_ring *rxr)
3753 {
3754         struct  adapter         *adapter;
3755         struct  ifnet           *ifp;
3756         device_t                dev;
3757         struct igb_rx_buf       *rxbuf;
3758         bus_dma_segment_t       pseg[1], hseg[1];
3759         struct lro_ctrl         *lro = &rxr->lro;
3760         int                     rsize, nsegs, error = 0;
3761
3762         adapter = rxr->adapter;
3763         dev = adapter->dev;
3764         ifp = adapter->ifp;
3765
3766         /* Clear the ring contents */
3767         IGB_RX_LOCK(rxr);
3768         rsize = roundup2(adapter->num_rx_desc *
3769             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3770         bzero((void *)rxr->rx_base, rsize);
3771
3772         /*
3773         ** Free current RX buffer structures and their mbufs
3774         */
3775         igb_free_receive_ring(rxr);
3776
3777         /* Now replenish the ring mbufs */
3778         for (int j = 0; j != adapter->num_rx_desc; ++j) {
3779                 struct mbuf     *mh, *mp;
3780
3781                 rxbuf = &rxr->rx_buffers[j];
3782
3783                 /* First the header */
3784                 rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
3785                 if (rxbuf->m_head == NULL)
3786                         goto fail;
3787                 m_adj(rxbuf->m_head, ETHER_ALIGN);
3788                 mh = rxbuf->m_head;
3789                 mh->m_len = mh->m_pkthdr.len = MHLEN;
3790                 mh->m_flags |= M_PKTHDR;
3791                 /* Get the memory mapping */
3792                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
3793                     rxbuf->hmap, rxbuf->m_head, hseg,
3794                     &nsegs, BUS_DMA_NOWAIT);
3795                 if (error != 0) /* Nothing elegant to do here */
3796                         goto fail;
3797                 bus_dmamap_sync(rxr->htag,
3798                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
3799                 /* Update descriptor */
3800                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
3801
3802                 /* Now the payload cluster */
3803                 rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
3804                     M_PKTHDR, adapter->rx_mbuf_sz);
3805                 if (rxbuf->m_pack == NULL)
3806                         goto fail;
3807                 mp = rxbuf->m_pack;
3808                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3809                 /* Get the memory mapping */
3810                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3811                     rxbuf->pmap, mp, pseg,
3812                     &nsegs, BUS_DMA_NOWAIT);
3813                 if (error != 0)
3814                         goto fail;
3815                 bus_dmamap_sync(rxr->ptag,
3816                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
3817                 /* Update descriptor */
3818                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
3819         }
3820
3821         /* Setup our descriptor indices */
3822         rxr->next_to_check = 0;
3823         rxr->next_to_refresh = 0;
3824         rxr->lro_enabled = FALSE;
3825
3826         if (igb_header_split)
3827                 rxr->hdr_split = TRUE;
3828         else
3829                 ifp->if_capabilities &= ~IFCAP_LRO;
3830
3831         rxr->fmp = NULL;
3832         rxr->lmp = NULL;
3833         rxr->discard = FALSE;
3834
3835         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3836             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3837
3838         /*
3839         ** Now set up the LRO interface, we
3840         ** also only do head split when LRO
3841         ** is enabled, since so often they
3842         ** are undesireable in similar setups.
3843         */
3844         if (ifp->if_capenable & IFCAP_LRO) {
3845                 int err = tcp_lro_init(lro);
3846                 if (err) {
3847                         device_printf(dev, "LRO Initialization failed!\n");
3848                         goto fail;
3849                 }
3850                 INIT_DEBUGOUT("RX LRO Initialized\n");
3851                 rxr->lro_enabled = TRUE;
3852                 lro->ifp = adapter->ifp;
3853         }
3854
3855         IGB_RX_UNLOCK(rxr);
3856         return (0);
3857
3858 fail:
3859         igb_free_receive_ring(rxr);
3860         IGB_RX_UNLOCK(rxr);
3861         return (error);
3862 }
3863
3864 /*********************************************************************
3865  *
3866  *  Initialize all receive rings.
3867  *
3868  **********************************************************************/
3869 static int
3870 igb_setup_receive_structures(struct adapter *adapter)
3871 {
3872         struct rx_ring *rxr = adapter->rx_rings;
3873         int i, j;
3874
3875         for (i = 0; i < adapter->num_queues; i++, rxr++)
3876                 if (igb_setup_receive_ring(rxr))
3877                         goto fail;
3878
3879         return (0);
3880 fail:
3881         /*
3882          * Free RX buffers allocated so far, we will only handle
3883          * the rings that completed, the failing case will have
3884          * cleaned up for itself. The value of 'i' will be the
3885          * failed ring so we must pre-decrement it.
3886          */
3887         rxr = adapter->rx_rings;
3888         for (--i; i > 0; i--, rxr++) {
3889                 for (j = 0; j < adapter->num_rx_desc; j++)
3890                         igb_free_receive_ring(rxr);
3891         }
3892
3893         return (ENOBUFS);
3894 }
3895
3896 /*********************************************************************
3897  *
3898  *  Enable receive unit.
3899  *
3900  **********************************************************************/
3901 static void
3902 igb_initialize_receive_units(struct adapter *adapter)
3903 {
3904         struct rx_ring  *rxr = adapter->rx_rings;
3905         struct ifnet    *ifp = adapter->ifp;
3906         struct e1000_hw *hw = &adapter->hw;
3907         u32             rctl, rxcsum, psize, srrctl = 0;
3908
3909         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3910
3911         /*
3912          * Make sure receives are disabled while setting
3913          * up the descriptor ring
3914          */
3915         rctl = E1000_READ_REG(hw, E1000_RCTL);
3916         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3917
3918         /*
3919         ** Set up for header split
3920         */
3921         if (rxr->hdr_split) {
3922                 /* Use a standard mbuf for the header */
3923                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3924                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3925         } else
3926                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3927
3928         /*
3929         ** Set up for jumbo frames
3930         */
3931         if (ifp->if_mtu > ETHERMTU) {
3932                 rctl |= E1000_RCTL_LPE;
3933                 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3934                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3935
3936                 /* Set maximum packet len */
3937                 psize = adapter->max_frame_size;
3938                 /* are we on a vlan? */
3939                 if (adapter->ifp->if_vlantrunk != NULL)
3940                         psize += VLAN_TAG_SIZE;
3941                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3942         } else {
3943                 rctl &= ~E1000_RCTL_LPE;
3944                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3945                 rctl |= E1000_RCTL_SZ_2048;
3946         }
3947
3948         /* Setup the Base and Length of the Rx Descriptor Rings */
3949         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3950                 u64 bus_addr = rxr->rxdma.dma_paddr;
3951                 u32 rxdctl;
3952
3953                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
3954                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3955                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
3956                     (uint32_t)(bus_addr >> 32));
3957                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
3958                     (uint32_t)bus_addr);
3959                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
3960                 /* Enable this Queue */
3961                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
3962                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3963                 rxdctl &= 0xFFF00000;
3964                 rxdctl |= IGB_RX_PTHRESH;
3965                 rxdctl |= IGB_RX_HTHRESH << 8;
3966                 rxdctl |= IGB_RX_WTHRESH << 16;
3967                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
3968         }
3969
3970         /*
3971         ** Setup for RX MultiQueue
3972         */
3973         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
3974         if (adapter->num_queues >1) {
3975                 u32 random[10], mrqc, shift = 0;
3976                 union igb_reta {
3977                         u32 dword;
3978                         u8  bytes[4];
3979                 } reta;
3980
3981                 arc4rand(&random, sizeof(random), 0);
3982                 if (adapter->hw.mac.type == e1000_82575)
3983                         shift = 6;
3984                 /* Warning FM follows */
3985                 for (int i = 0; i < 128; i++) {
3986                         reta.bytes[i & 3] =
3987                             (i % adapter->num_queues) << shift;
3988                         if ((i & 3) == 3)
3989                                 E1000_WRITE_REG(hw,
3990                                     E1000_RETA(i >> 2), reta.dword);
3991                 }
3992                 /* Now fill in hash table */
3993                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3994                 for (int i = 0; i < 10; i++)
3995                         E1000_WRITE_REG_ARRAY(hw,
3996                             E1000_RSSRK(0), i, random[i]);
3997
3998                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
3999                     E1000_MRQC_RSS_FIELD_IPV4_TCP);
4000                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4001                     E1000_MRQC_RSS_FIELD_IPV6_TCP);
4002                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4003                     E1000_MRQC_RSS_FIELD_IPV6_UDP);
4004                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4005                     E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4006
4007                 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4008
4009                 /*
4010                 ** NOTE: Receive Full-Packet Checksum Offload 
4011                 ** is mutually exclusive with Multiqueue. However
4012                 ** this is not the same as TCP/IP checksums which
4013                 ** still work.
4014                 */
4015                 rxcsum |= E1000_RXCSUM_PCSD;
4016 #if __FreeBSD_version >= 800000
4017                 /* For SCTP Offload */
4018                 if ((hw->mac.type == e1000_82576)
4019                     && (ifp->if_capenable & IFCAP_RXCSUM))
4020                         rxcsum |= E1000_RXCSUM_CRCOFL;
4021 #endif
4022         } else {
4023                 /* Non RSS setup */
4024                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4025                         rxcsum |= E1000_RXCSUM_IPPCSE;
4026 #if __FreeBSD_version >= 800000
4027                         if (adapter->hw.mac.type == e1000_82576)
4028                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4029 #endif
4030                 } else
4031                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4032         }
4033         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4034
4035         /* Setup the Receive Control Register */
4036         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4037         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4038                    E1000_RCTL_RDMTS_HALF |
4039                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4040         /* Strip CRC bytes. */
4041         rctl |= E1000_RCTL_SECRC;
4042         /* Make sure VLAN Filters are off */
4043         rctl &= ~E1000_RCTL_VFE;
4044         /* Don't store bad packets */
4045         rctl &= ~E1000_RCTL_SBP;
4046
4047         /* Enable Receives */
4048         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4049
4050         /*
4051          * Setup the HW Rx Head and Tail Descriptor Pointers
4052          *   - needs to be after enable
4053          */
4054         for (int i = 0; i < adapter->num_queues; i++) {
4055                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4056                 E1000_WRITE_REG(hw, E1000_RDT(i),
4057                      adapter->num_rx_desc - 1);
4058         }
4059         return;
4060 }
4061
4062 /*********************************************************************
4063  *
4064  *  Free receive rings.
4065  *
4066  **********************************************************************/
4067 static void
4068 igb_free_receive_structures(struct adapter *adapter)
4069 {
4070         struct rx_ring *rxr = adapter->rx_rings;
4071
4072         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4073                 struct lro_ctrl *lro = &rxr->lro;
4074                 igb_free_receive_buffers(rxr);
4075                 tcp_lro_free(lro);
4076                 igb_dma_free(adapter, &rxr->rxdma);
4077         }
4078
4079         free(adapter->rx_rings, M_DEVBUF);
4080 }
4081
4082 /*********************************************************************
4083  *
4084  *  Free receive ring data structures.
4085  *
4086  **********************************************************************/
4087 static void
4088 igb_free_receive_buffers(struct rx_ring *rxr)
4089 {
4090         struct adapter          *adapter = rxr->adapter;
4091         struct igb_rx_buf       *rxbuf;
4092         int i;
4093
4094         INIT_DEBUGOUT("free_receive_structures: begin");
4095
4096         /* Cleanup any existing buffers */
4097         if (rxr->rx_buffers != NULL) {
4098                 for (i = 0; i < adapter->num_rx_desc; i++) {
4099                         rxbuf = &rxr->rx_buffers[i];
4100                         if (rxbuf->m_head != NULL) {
4101                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4102                                     BUS_DMASYNC_POSTREAD);
4103                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4104                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4105                                 m_freem(rxbuf->m_head);
4106                         }
4107                         if (rxbuf->m_pack != NULL) {
4108                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4109                                     BUS_DMASYNC_POSTREAD);
4110                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4111                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4112                                 m_freem(rxbuf->m_pack);
4113                         }
4114                         rxbuf->m_head = NULL;
4115                         rxbuf->m_pack = NULL;
4116                         if (rxbuf->hmap != NULL) {
4117                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4118                                 rxbuf->hmap = NULL;
4119                         }
4120                         if (rxbuf->pmap != NULL) {
4121                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4122                                 rxbuf->pmap = NULL;
4123                         }
4124                 }
4125                 if (rxr->rx_buffers != NULL) {
4126                         free(rxr->rx_buffers, M_DEVBUF);
4127                         rxr->rx_buffers = NULL;
4128                 }
4129         }
4130
4131         if (rxr->htag != NULL) {
4132                 bus_dma_tag_destroy(rxr->htag);
4133                 rxr->htag = NULL;
4134         }
4135         if (rxr->ptag != NULL) {
4136                 bus_dma_tag_destroy(rxr->ptag);
4137                 rxr->ptag = NULL;
4138         }
4139 }
4140
4141 static __inline void
4142 igb_rx_discard(struct rx_ring *rxr, int i)
4143 {
4144         struct adapter          *adapter = rxr->adapter;
4145         struct igb_rx_buf       *rbuf;
4146         struct mbuf             *mh, *mp;
4147
4148         rbuf = &rxr->rx_buffers[i];
4149         if (rxr->fmp != NULL) {
4150                 rxr->fmp->m_flags |= M_PKTHDR;
4151                 m_freem(rxr->fmp);
4152                 rxr->fmp = NULL;
4153                 rxr->lmp = NULL;
4154         }
4155
4156         mh = rbuf->m_head;
4157         mp = rbuf->m_pack;
4158
4159         /* Reuse loaded DMA map and just update mbuf chain */
4160         mh->m_len = MHLEN;
4161         mh->m_flags |= M_PKTHDR;
4162         mh->m_next = NULL;
4163
4164         mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz;
4165         mp->m_data = mp->m_ext.ext_buf;
4166         mp->m_next = NULL;
4167         return;
4168 }
4169
4170 static __inline void
4171 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4172 {
4173
4174         /*
4175          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4176          * should be computed by hardware. Also it should not have VLAN tag in
4177          * ethernet header.
4178          */
4179         if (rxr->lro_enabled &&
4180             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4181             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4182             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4183             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4184             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4185             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4186                 /*
4187                  * Send to the stack if:
4188                  **  - LRO not enabled, or
4189                  **  - no LRO resources, or
4190                  **  - lro enqueue fails
4191                  */
4192                 if (rxr->lro.lro_cnt != 0)
4193                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4194                                 return;
4195         }
4196         (*ifp->if_input)(ifp, m);
4197 }
4198
4199 /*********************************************************************
4200  *
4201  *  This routine executes in interrupt context. It replenishes
4202  *  the mbufs in the descriptor and sends data which has been
4203  *  dma'ed into host memory to upper layer.
4204  *
4205  *  We loop at most count times if count is > 0, or until done if
4206  *  count < 0.
4207  *
4208  *  Return TRUE if more to clean, FALSE otherwise
4209  *********************************************************************/
4210 static bool
4211 igb_rxeof(struct igb_queue *que, int count)
4212 {
4213         struct adapter          *adapter = que->adapter;
4214         struct rx_ring          *rxr = que->rxr;
4215         struct ifnet            *ifp = adapter->ifp;
4216         struct lro_ctrl         *lro = &rxr->lro;
4217         struct lro_entry        *queued;
4218         int                     i, processed = 0;
4219         u32                     ptype, staterr = 0;
4220         union e1000_adv_rx_desc *cur;
4221
4222         IGB_RX_LOCK(rxr);
4223         /* Sync the ring. */
4224         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4225             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4226
4227         /* Main clean loop */
4228         for (i = rxr->next_to_check; count != 0;) {
4229                 struct mbuf             *sendmp, *mh, *mp;
4230                 struct igb_rx_buf       *rxbuf;
4231                 u16                     hlen, plen, hdr, vtag;
4232                 bool                    eop = FALSE;
4233  
4234                 cur = &rxr->rx_base[i];
4235                 staterr = le32toh(cur->wb.upper.status_error);
4236                 if ((staterr & E1000_RXD_STAT_DD) == 0)
4237                         break;
4238                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4239                         break;
4240                 count--;
4241                 sendmp = mh = mp = NULL;
4242                 cur->wb.upper.status_error = 0;
4243                 rxbuf = &rxr->rx_buffers[i];
4244                 plen = le16toh(cur->wb.upper.length);
4245                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4246                 vtag = le16toh(cur->wb.upper.vlan);
4247                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4248                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4249
4250                 /* Make sure all segments of a bad packet are discarded */
4251                 if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4252                     (rxr->discard)) {
4253                         ifp->if_ierrors++;
4254                         ++rxr->rx_discarded;
4255                         if (!eop) /* Catch subsequent segs */
4256                                 rxr->discard = TRUE;
4257                         else
4258                                 rxr->discard = FALSE;
4259                         igb_rx_discard(rxr, i);
4260                         goto next_desc;
4261                 }
4262
4263                 /*
4264                 ** The way the hardware is configured to
4265                 ** split, it will ONLY use the header buffer
4266                 ** when header split is enabled, otherwise we
4267                 ** get normal behavior, ie, both header and
4268                 ** payload are DMA'd into the payload buffer.
4269                 **
4270                 ** The fmp test is to catch the case where a
4271                 ** packet spans multiple descriptors, in that
4272                 ** case only the first header is valid.
4273                 */
4274                 if (rxr->hdr_split && rxr->fmp == NULL) {
4275                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4276                             E1000_RXDADV_HDRBUFLEN_SHIFT;
4277                         if (hlen > IGB_HDR_BUF)
4278                                 hlen = IGB_HDR_BUF;
4279                         /* Handle the header mbuf */
4280                         mh = rxr->rx_buffers[i].m_head;
4281                         mh->m_len = hlen;
4282                         /* clear buf info for refresh */
4283                         rxbuf->m_head = NULL;
4284                         /*
4285                         ** Get the payload length, this
4286                         ** could be zero if its a small
4287                         ** packet.
4288                         */
4289                         if (plen > 0) {
4290                                 mp = rxr->rx_buffers[i].m_pack;
4291                                 mp->m_len = plen;
4292                                 mh->m_next = mp;
4293                                 /* clear buf info for refresh */
4294                                 rxbuf->m_pack = NULL;
4295                                 rxr->rx_split_packets++;
4296                         }
4297                 } else {
4298                         /*
4299                         ** Either no header split, or a
4300                         ** secondary piece of a fragmented
4301                         ** split packet.
4302                         */
4303                         mh = rxr->rx_buffers[i].m_pack;
4304                         mh->m_len = plen;
4305                         /* clear buf info for refresh */
4306                         rxbuf->m_pack = NULL;
4307                 }
4308
4309                 ++processed; /* So we know when to refresh */
4310
4311                 /* Initial frame - setup */
4312                 if (rxr->fmp == NULL) {
4313                         mh->m_pkthdr.len = mh->m_len;
4314                         /* Store the first mbuf */
4315                         rxr->fmp = mh;
4316                         rxr->lmp = mh;
4317                         if (mp != NULL) {
4318                                 /* Add payload if split */
4319                                 mh->m_pkthdr.len += mp->m_len;
4320                                 rxr->lmp = mh->m_next;
4321                         }
4322                 } else {
4323                         /* Chain mbuf's together */
4324                         rxr->lmp->m_next = mh;
4325                         rxr->lmp = rxr->lmp->m_next;
4326                         rxr->fmp->m_pkthdr.len += mh->m_len;
4327                 }
4328
4329                 if (eop) {
4330                         rxr->fmp->m_pkthdr.rcvif = ifp;
4331                         ifp->if_ipackets++;
4332                         rxr->rx_packets++;
4333                         /* capture data for AIM */
4334                         rxr->packets++;
4335                         rxr->bytes += rxr->fmp->m_pkthdr.len;
4336                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4337
4338                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4339                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
4340
4341                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4342                             (staterr & E1000_RXD_STAT_VP) != 0) {
4343                                 rxr->fmp->m_pkthdr.ether_vtag = vtag;
4344                                 rxr->fmp->m_flags |= M_VLANTAG;
4345                         }
4346 #if __FreeBSD_version >= 800000
4347                         rxr->fmp->m_pkthdr.flowid = que->msix;
4348                         rxr->fmp->m_flags |= M_FLOWID;
4349 #endif
4350                         sendmp = rxr->fmp;
4351                         /* Make sure to set M_PKTHDR. */
4352                         sendmp->m_flags |= M_PKTHDR;
4353                         rxr->fmp = NULL;
4354                         rxr->lmp = NULL;
4355                 }
4356
4357 next_desc:
4358                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4359                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4360
4361                 /* Advance our pointers to the next descriptor. */
4362                 if (++i == adapter->num_rx_desc)
4363                         i = 0;
4364                 /*
4365                 ** Send to the stack or LRO
4366                 */
4367                 if (sendmp != NULL)
4368                         igb_rx_input(rxr, ifp, sendmp, ptype);
4369
4370                 /* Every 8 descriptors we go to refresh mbufs */
4371                 if (processed == 8) {
4372                         igb_refresh_mbufs(rxr, i);
4373                         processed = 0;
4374                 }
4375         }
4376
4377         /* Catch any remainders */
4378         if (processed != 0) {
4379                 igb_refresh_mbufs(rxr, i);
4380                 processed = 0;
4381         }
4382
4383         rxr->next_to_check = i;
4384
4385         /*
4386          * Flush any outstanding LRO work
4387          */
4388         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4389                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4390                 tcp_lro_flush(lro, queued);
4391         }
4392
4393         IGB_RX_UNLOCK(rxr);
4394
4395         /*
4396         ** We still have cleaning to do?
4397         ** Schedule another interrupt if so.
4398         */
4399         if ((staterr & E1000_RXD_STAT_DD) != 0)
4400                 return (TRUE);
4401
4402         return (FALSE);
4403 }
4404
4405 /*********************************************************************
4406  *
4407  *  Verify that the hardware indicated that the checksum is valid.
4408  *  Inform the stack about the status of checksum so that stack
4409  *  doesn't spend time verifying the checksum.
4410  *
4411  *********************************************************************/
4412 static void
4413 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4414 {
4415         u16 status = (u16)staterr;
4416         u8  errors = (u8) (staterr >> 24);
4417         int sctp;
4418
4419         /* Ignore Checksum bit is set */
4420         if (status & E1000_RXD_STAT_IXSM) {
4421                 mp->m_pkthdr.csum_flags = 0;
4422                 return;
4423         }
4424
4425         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4426             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4427                 sctp = 1;
4428         else
4429                 sctp = 0;
4430         if (status & E1000_RXD_STAT_IPCS) {
4431                 /* Did it pass? */
4432                 if (!(errors & E1000_RXD_ERR_IPE)) {
4433                         /* IP Checksum Good */
4434                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4435                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4436                 } else
4437                         mp->m_pkthdr.csum_flags = 0;
4438         }
4439
4440         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4441                 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4442 #if __FreeBSD_version >= 800000
4443                 if (sctp) /* reassign */
4444                         type = CSUM_SCTP_VALID;
4445 #endif
4446                 /* Did it pass? */
4447                 if (!(errors & E1000_RXD_ERR_TCPE)) {
4448                         mp->m_pkthdr.csum_flags |= type;
4449                         if (sctp == 0)
4450                                 mp->m_pkthdr.csum_data = htons(0xffff);
4451                 }
4452         }
4453         return;
4454 }
4455
4456 /*
4457  * This routine is run via an vlan
4458  * config EVENT
4459  */
4460 static void
4461 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4462 {
4463         struct adapter  *adapter = ifp->if_softc;
4464         u32             index, bit;
4465
4466         if (ifp->if_softc !=  arg)   /* Not our event */
4467                 return;
4468
4469         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4470                 return;
4471
4472         index = (vtag >> 5) & 0x7F;
4473         bit = vtag & 0x1F;
4474         igb_shadow_vfta[index] |= (1 << bit);
4475         ++adapter->num_vlans;
4476         /* Re-init to load the changes */
4477         igb_init(adapter);
4478 }
4479
4480 /*
4481  * This routine is run via an vlan
4482  * unconfig EVENT
4483  */
4484 static void
4485 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4486 {
4487         struct adapter  *adapter = ifp->if_softc;
4488         u32             index, bit;
4489
4490         if (ifp->if_softc !=  arg)
4491                 return;
4492
4493         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4494                 return;
4495
4496         index = (vtag >> 5) & 0x7F;
4497         bit = vtag & 0x1F;
4498         igb_shadow_vfta[index] &= ~(1 << bit);
4499         --adapter->num_vlans;
4500         /* Re-init to load the changes */
4501         igb_init(adapter);
4502 }
4503
4504 static void
4505 igb_setup_vlan_hw_support(struct adapter *adapter)
4506 {
4507         struct e1000_hw *hw = &adapter->hw;
4508         u32             reg;
4509
4510         /*
4511         ** We get here thru init_locked, meaning
4512         ** a soft reset, this has already cleared
4513         ** the VFTA and other state, so if there
4514         ** have been no vlan's registered do nothing.
4515         */
4516         if (adapter->num_vlans == 0)
4517                 return;
4518
4519         /*
4520         ** A soft reset zero's out the VFTA, so
4521         ** we need to repopulate it now.
4522         */
4523         for (int i = 0; i < IGB_VFTA_SIZE; i++)
4524                 if (igb_shadow_vfta[i] != 0)
4525                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4526                             i, igb_shadow_vfta[i]);
4527
4528         reg = E1000_READ_REG(hw, E1000_CTRL);
4529         reg |= E1000_CTRL_VME;
4530         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4531
4532         /* Enable the Filter Table */
4533         reg = E1000_READ_REG(hw, E1000_RCTL);
4534         reg &= ~E1000_RCTL_CFIEN;
4535         reg |= E1000_RCTL_VFE;
4536         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4537
4538         /* Update the frame size */
4539         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4540             adapter->max_frame_size + VLAN_TAG_SIZE);
4541 }
4542
4543 static void
4544 igb_enable_intr(struct adapter *adapter)
4545 {
4546         /* With RSS set up what to auto clear */
4547         if (adapter->msix_mem) {
4548                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4549                     adapter->eims_mask);
4550                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4551                     adapter->eims_mask);
4552                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4553                     adapter->eims_mask);
4554                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4555                     E1000_IMS_LSC);
4556         } else {
4557                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4558                     IMS_ENABLE_MASK);
4559         }
4560         E1000_WRITE_FLUSH(&adapter->hw);
4561
4562         return;
4563 }
4564
4565 static void
4566 igb_disable_intr(struct adapter *adapter)
4567 {
4568         if (adapter->msix_mem) {
4569                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4570                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4571         } 
4572         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4573         E1000_WRITE_FLUSH(&adapter->hw);
4574         return;
4575 }
4576
4577 /*
4578  * Bit of a misnomer, what this really means is
4579  * to enable OS management of the system... aka
4580  * to disable special hardware management features 
4581  */
4582 static void
4583 igb_init_manageability(struct adapter *adapter)
4584 {
4585         if (adapter->has_manage) {
4586                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4587                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4588
4589                 /* disable hardware interception of ARP */
4590                 manc &= ~(E1000_MANC_ARP_EN);
4591
4592                 /* enable receiving management packets to the host */
4593                 manc |= E1000_MANC_EN_MNG2HOST;
4594                 manc2h |= 1 << 5;  /* Mng Port 623 */
4595                 manc2h |= 1 << 6;  /* Mng Port 664 */
4596                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4597                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4598         }
4599 }
4600
4601 /*
4602  * Give control back to hardware management
4603  * controller if there is one.
4604  */
4605 static void
4606 igb_release_manageability(struct adapter *adapter)
4607 {
4608         if (adapter->has_manage) {
4609                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4610
4611                 /* re-enable hardware interception of ARP */
4612                 manc |= E1000_MANC_ARP_EN;
4613                 manc &= ~E1000_MANC_EN_MNG2HOST;
4614
4615                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4616         }
4617 }
4618
4619 /*
4620  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4621  * For ASF and Pass Through versions of f/w this means that
4622  * the driver is loaded. 
4623  *
4624  */
4625 static void
4626 igb_get_hw_control(struct adapter *adapter)
4627 {
4628         u32 ctrl_ext;
4629
4630         /* Let firmware know the driver has taken over */
4631         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4632         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4633             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4634 }
4635
4636 /*
4637  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4638  * For ASF and Pass Through versions of f/w this means that the
4639  * driver is no longer loaded.
4640  *
4641  */
4642 static void
4643 igb_release_hw_control(struct adapter *adapter)
4644 {
4645         u32 ctrl_ext;
4646
4647         /* Let firmware taken over control of h/w */
4648         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4649         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4650             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4651 }
4652
4653 static int
4654 igb_is_valid_ether_addr(uint8_t *addr)
4655 {
4656         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4657
4658         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4659                 return (FALSE);
4660         }
4661
4662         return (TRUE);
4663 }
4664
4665
4666 /*
4667  * Enable PCI Wake On Lan capability
4668  */
4669 static void
4670 igb_enable_wakeup(device_t dev)
4671 {
4672         u16     cap, status;
4673         u8      id;
4674
4675         /* First find the capabilities pointer*/
4676         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4677         /* Read the PM Capabilities */
4678         id = pci_read_config(dev, cap, 1);
4679         if (id != PCIY_PMG)     /* Something wrong */
4680                 return;
4681         /* OK, we have the power capabilities, so
4682            now get the status register */
4683         cap += PCIR_POWER_STATUS;
4684         status = pci_read_config(dev, cap, 2);
4685         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4686         pci_write_config(dev, cap, status, 2);
4687         return;
4688 }
4689
4690 static void
4691 igb_led_func(void *arg, int onoff)
4692 {
4693         struct adapter  *adapter = arg;
4694
4695         IGB_CORE_LOCK(adapter);
4696         if (onoff) {
4697                 e1000_setup_led(&adapter->hw);
4698                 e1000_led_on(&adapter->hw);
4699         } else {
4700                 e1000_led_off(&adapter->hw);
4701                 e1000_cleanup_led(&adapter->hw);
4702         }
4703         IGB_CORE_UNLOCK(adapter);
4704 }
4705
4706 /**********************************************************************
4707  *
4708  *  Update the board statistics counters.
4709  *
4710  **********************************************************************/
4711 static void
4712 igb_update_stats_counters(struct adapter *adapter)
4713 {
4714         struct ifnet   *ifp;
4715
4716         if (adapter->hw.phy.media_type == e1000_media_type_copper ||
4717            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4718                 adapter->stats.symerrs +=
4719                     E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4720                 adapter->stats.sec +=
4721                     E1000_READ_REG(&adapter->hw, E1000_SEC);
4722         }
4723         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4724         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4725         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4726         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4727
4728         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4729         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4730         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4731         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4732         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4733         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4734         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4735         adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4736         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4737         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4738         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4739         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4740         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4741         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4742         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4743         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4744         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4745         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4746         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4747         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4748
4749         /* For the 64-bit byte counters the low dword must be read first. */
4750         /* Both registers clear on the read of the high dword */
4751
4752         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4753         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4754
4755         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4756         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4757         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4758         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4759         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4760
4761         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4762         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4763
4764         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4765         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4766         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4767         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4768         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4769         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4770         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4771         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4772         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4773         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4774
4775         adapter->stats.algnerrc += 
4776                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4777         adapter->stats.rxerrc += 
4778                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4779         adapter->stats.tncrs += 
4780                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4781         adapter->stats.cexterr += 
4782                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4783         adapter->stats.tsctc += 
4784                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4785         adapter->stats.tsctfc += 
4786                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4787         ifp = adapter->ifp;
4788
4789         ifp->if_collisions = adapter->stats.colc;
4790
4791         /* Rx Errors */
4792         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4793             adapter->stats.crcerrs + adapter->stats.algnerrc +
4794             adapter->stats.ruc + adapter->stats.roc +
4795             adapter->stats.mpc + adapter->stats.cexterr;
4796
4797         /* Tx Errors */
4798         ifp->if_oerrors = adapter->stats.ecol +
4799             adapter->stats.latecol + adapter->watchdog_events;
4800 }
4801
4802
4803 /**********************************************************************
4804  *
4805  *  This routine is called only when igb_display_debug_stats is enabled.
4806  *  This routine provides a way to take a look at important statistics
4807  *  maintained by the driver and hardware.
4808  *
4809  **********************************************************************/
4810 static void
4811 igb_print_debug_info(struct adapter *adapter)
4812 {
4813         device_t dev = adapter->dev;
4814         struct igb_queue *que = adapter->queues;
4815         struct rx_ring *rxr = adapter->rx_rings;
4816         struct tx_ring *txr = adapter->tx_rings;
4817         uint8_t *hw_addr = adapter->hw.hw_addr;
4818
4819         device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4820         device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4821             E1000_READ_REG(&adapter->hw, E1000_CTRL),
4822             E1000_READ_REG(&adapter->hw, E1000_RCTL));
4823
4824 #if     (DEBUG_HW > 0)  /* Dont output these errors normally */
4825         device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4826             E1000_READ_REG(&adapter->hw, E1000_IMS),
4827             E1000_READ_REG(&adapter->hw, E1000_EIMS));
4828 #endif
4829
4830         device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4831             ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4832             (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4833         device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4834             adapter->hw.fc.high_water,
4835             adapter->hw.fc.low_water);
4836
4837         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
4838                 device_printf(dev, "Queue(%d) tdh = %d, tdt = %d  ", i,
4839                     E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4840                     E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4841                 device_printf(dev, "rdh = %d, rdt = %d\n",
4842                     E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4843                     E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4844                 device_printf(dev, "TX(%d) no descriptors avail event = %lld\n",
4845                     txr->me, (long long)txr->no_desc_avail);
4846                 device_printf(dev, "TX(%d) Packets sent = %lld\n",
4847                     txr->me, (long long)txr->tx_packets);
4848                 device_printf(dev, "RX(%d) Packets received = %lld  ",
4849                     rxr->me, (long long)rxr->rx_packets);
4850         }
4851
4852         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4853                 struct lro_ctrl *lro = &rxr->lro;
4854                 device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4855                     E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4856                     E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4857                 device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4858                     (long long)rxr->rx_packets);
4859                 device_printf(dev, " Split Packets = %lld ",
4860                     (long long)rxr->rx_split_packets);
4861                 device_printf(dev, " Byte count = %lld\n",
4862                     (long long)rxr->rx_bytes);
4863                 device_printf(dev,"RX(%d) LRO Queued= %d  ",
4864                     i, lro->lro_queued);
4865                 device_printf(dev,"LRO Flushed= %d\n",lro->lro_flushed);
4866         }
4867
4868         for (int i = 0; i < adapter->num_queues; i++, que++)
4869                 device_printf(dev,"QUE(%d) IRQs = %llx\n",
4870                     i, (long long)que->irqs);
4871
4872         device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4873         device_printf(dev, "Mbuf defrag failed = %ld\n",
4874             adapter->mbuf_defrag_failed);
4875         device_printf(dev, "Std mbuf header failed = %ld\n",
4876             adapter->mbuf_header_failed);
4877         device_printf(dev, "Std mbuf packet failed = %ld\n",
4878             adapter->mbuf_packet_failed);
4879         device_printf(dev, "Driver dropped packets = %ld\n",
4880             adapter->dropped_pkts);
4881         device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4882                 adapter->no_tx_dma_setup);
4883 }
4884
4885 static void
4886 igb_print_hw_stats(struct adapter *adapter)
4887 {
4888         device_t dev = adapter->dev;
4889
4890         device_printf(dev, "Excessive collisions = %lld\n",
4891             (long long)adapter->stats.ecol);
4892 #if     (DEBUG_HW > 0)  /* Dont output these errors normally */
4893         device_printf(dev, "Symbol errors = %lld\n",
4894             (long long)adapter->stats.symerrs);
4895 #endif
4896         device_printf(dev, "Sequence errors = %lld\n",
4897             (long long)adapter->stats.sec);
4898         device_printf(dev, "Defer count = %lld\n",
4899             (long long)adapter->stats.dc);
4900         device_printf(dev, "Missed Packets = %lld\n",
4901             (long long)adapter->stats.mpc);
4902         device_printf(dev, "Receive No Buffers = %lld\n",
4903             (long long)adapter->stats.rnbc);
4904         /* RLEC is inaccurate on some hardware, calculate our own. */
4905         device_printf(dev, "Receive Length Errors = %lld\n",
4906             ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4907         device_printf(dev, "Receive errors = %lld\n",
4908             (long long)adapter->stats.rxerrc);
4909         device_printf(dev, "Crc errors = %lld\n",
4910             (long long)adapter->stats.crcerrs);
4911         device_printf(dev, "Alignment errors = %lld\n",
4912             (long long)adapter->stats.algnerrc);
4913         /* On 82575 these are collision counts */
4914         device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4915             (long long)adapter->stats.cexterr);
4916         device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4917         device_printf(dev, "watchdog timeouts = %ld\n",
4918             adapter->watchdog_events);
4919         device_printf(dev, "XON Rcvd = %lld\n",
4920             (long long)adapter->stats.xonrxc);
4921         device_printf(dev, "XON Xmtd = %lld\n",
4922             (long long)adapter->stats.xontxc);
4923         device_printf(dev, "XOFF Rcvd = %lld\n",
4924             (long long)adapter->stats.xoffrxc);
4925         device_printf(dev, "XOFF Xmtd = %lld\n",
4926             (long long)adapter->stats.xofftxc);
4927         device_printf(dev, "Good Packets Rcvd = %lld\n",
4928             (long long)adapter->stats.gprc);
4929         device_printf(dev, "Good Packets Xmtd = %lld\n",
4930             (long long)adapter->stats.gptc);
4931         device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4932             (long long)adapter->stats.tsctc);
4933         device_printf(dev, "TSO Contexts Failed = %lld\n",
4934             (long long)adapter->stats.tsctfc);
4935 }
4936
4937 /**********************************************************************
4938  *
4939  *  This routine provides a way to dump out the adapter eeprom,
4940  *  often a useful debug/service tool. This only dumps the first
4941  *  32 words, stuff that matters is in that extent.
4942  *
4943  **********************************************************************/
4944 static void
4945 igb_print_nvm_info(struct adapter *adapter)
4946 {
4947         u16     eeprom_data;
4948         int     i, j, row = 0;
4949
4950         /* Its a bit crude, but it gets the job done */
4951         printf("\nInterface EEPROM Dump:\n");
4952         printf("Offset\n0x0000  ");
4953         for (i = 0, j = 0; i < 32; i++, j++) {
4954                 if (j == 8) { /* Make the offset block */
4955                         j = 0; ++row;
4956                         printf("\n0x00%x0  ",row);
4957                 }
4958                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4959                 printf("%04x ", eeprom_data);
4960         }
4961         printf("\n");
4962 }
4963
4964 static int
4965 igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4966 {
4967         struct adapter *adapter;
4968         int error;
4969         int result;
4970
4971         result = -1;
4972         error = sysctl_handle_int(oidp, &result, 0, req);
4973
4974         if (error || !req->newptr)
4975                 return (error);
4976
4977         if (result == 1) {
4978                 adapter = (struct adapter *)arg1;
4979                 igb_print_debug_info(adapter);
4980         }
4981         /*
4982          * This value will cause a hex dump of the
4983          * first 32 16-bit words of the EEPROM to
4984          * the screen.
4985          */
4986         if (result == 2) {
4987                 adapter = (struct adapter *)arg1;
4988                 igb_print_nvm_info(adapter);
4989         }
4990
4991         return (error);
4992 }
4993
4994
4995 static int
4996 igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
4997 {
4998         struct adapter *adapter;
4999         int error;
5000         int result;
5001
5002         result = -1;
5003         error = sysctl_handle_int(oidp, &result, 0, req);
5004
5005         if (error || !req->newptr)
5006                 return (error);
5007
5008         if (result == 1) {
5009                 adapter = (struct adapter *)arg1;
5010                 igb_print_hw_stats(adapter);
5011         }
5012
5013         return (error);
5014 }
5015
5016 static void
5017 igb_add_rx_process_limit(struct adapter *adapter, const char *name,
5018         const char *description, int *limit, int value)
5019 {
5020         *limit = value;
5021         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5022             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5023             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5024 }