]> CyberLeo.Net >> Repos - FreeBSD/stable/8.git/blob - sys/dev/e1000/if_igb.c
Copy head to stable/8 as part of 8.0 Release cycle.
[FreeBSD/stable/8.git] / sys / dev / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2009, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #ifdef HAVE_KERNEL_OPTION_HEADERS
37 #include "opt_device_polling.h"
38 #include "opt_inet.h"
39 #endif
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #if __FreeBSD_version >= 800000
44 #include <sys/buf_ring.h>
45 #endif
46 #include <sys/bus.h>
47 #include <sys/endian.h>
48 #include <sys/kernel.h>
49 #include <sys/kthread.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/rman.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/taskqueue.h>
58 #include <sys/eventhandler.h>
59 #include <sys/pcpu.h>
60 #include <sys/smp.h>
61 #include <machine/smp.h>
62 #include <machine/bus.h>
63 #include <machine/resource.h>
64
65 #ifdef IGB_IEEE1588
66 #include <sys/ieee1588.h>
67 #endif
68
69 #include <net/bpf.h>
70 #include <net/ethernet.h>
71 #include <net/if.h>
72 #include <net/if_arp.h>
73 #include <net/if_dl.h>
74 #include <net/if_media.h>
75
76 #include <net/if_types.h>
77 #include <net/if_vlan_var.h>
78
79 #include <netinet/in_systm.h>
80 #include <netinet/in.h>
81 #include <netinet/if_ether.h>
82 #include <netinet/ip.h>
83 #include <netinet/ip6.h>
84 #include <netinet/tcp.h>
85 #include <netinet/tcp_lro.h>
86 #include <netinet/udp.h>
87
88 #include <machine/in_cksum.h>
89 #include <dev/pci/pcivar.h>
90 #include <dev/pci/pcireg.h>
91
92 #include "e1000_api.h"
93 #include "e1000_82575.h"
94 #include "if_igb.h"
95
96 /*********************************************************************
97  *  Set this to one to display debug statistics
98  *********************************************************************/
99 int     igb_display_debug_stats = 0;
100
101 /*********************************************************************
102  *  Driver version:
103  *********************************************************************/
104 char igb_driver_version[] = "version - 1.7.3";
105
106
107 /*********************************************************************
108  *  PCI Device ID Table
109  *
110  *  Used by probe to select devices to load on
111  *  Last field stores an index into e1000_strings
112  *  Last entry must be all 0s
113  *
114  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
115  *********************************************************************/
116
117 static igb_vendor_info_t igb_vendor_info_array[] =
118 {
119         { 0x8086, E1000_DEV_ID_82575EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
123                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82576,           PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82576_NS,        PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82576_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82576_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
129                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
131                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
132         /* required last entry */
133         { 0, 0, 0, 0, 0}
134 };
135
136 /*********************************************************************
137  *  Table of branding strings for all supported NICs.
138  *********************************************************************/
139
140 static char *igb_strings[] = {
141         "Intel(R) PRO/1000 Network Connection"
142 };
143
144 /*********************************************************************
145  *  Function prototypes
146  *********************************************************************/
147 static int      igb_probe(device_t);
148 static int      igb_attach(device_t);
149 static int      igb_detach(device_t);
150 static int      igb_shutdown(device_t);
151 static int      igb_suspend(device_t);
152 static int      igb_resume(device_t);
153 static void     igb_start(struct ifnet *);
154 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
155 #if __FreeBSD_version >= 800000
156 static int      igb_mq_start(struct ifnet *, struct mbuf *);
157 static int      igb_mq_start_locked(struct ifnet *,
158                     struct tx_ring *, struct mbuf *);
159 static void     igb_qflush(struct ifnet *);
160 #endif
161 static int      igb_ioctl(struct ifnet *, u_long, caddr_t);
162 static void     igb_watchdog(struct adapter *);
163 static void     igb_init(void *);
164 static void     igb_init_locked(struct adapter *);
165 static void     igb_stop(void *);
166 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
167 static int      igb_media_change(struct ifnet *);
168 static void     igb_identify_hardware(struct adapter *);
169 static int      igb_allocate_pci_resources(struct adapter *);
170 static int      igb_allocate_msix(struct adapter *);
171 static int      igb_allocate_legacy(struct adapter *);
172 static int      igb_setup_msix(struct adapter *);
173 static void     igb_free_pci_resources(struct adapter *);
174 static void     igb_local_timer(void *);
175 static int      igb_hardware_init(struct adapter *);
176 static void     igb_setup_interface(device_t, struct adapter *);
177 static int      igb_allocate_queues(struct adapter *);
178 static void     igb_configure_queues(struct adapter *);
179
180 static int      igb_allocate_transmit_buffers(struct tx_ring *);
181 static void     igb_setup_transmit_structures(struct adapter *);
182 static void     igb_setup_transmit_ring(struct tx_ring *);
183 static void     igb_initialize_transmit_units(struct adapter *);
184 static void     igb_free_transmit_structures(struct adapter *);
185 static void     igb_free_transmit_buffers(struct tx_ring *);
186
187 static int      igb_allocate_receive_buffers(struct rx_ring *);
188 static int      igb_setup_receive_structures(struct adapter *);
189 static int      igb_setup_receive_ring(struct rx_ring *);
190 static void     igb_initialize_receive_units(struct adapter *);
191 static void     igb_free_receive_structures(struct adapter *);
192 static void     igb_free_receive_buffers(struct rx_ring *);
193
194 static void     igb_enable_intr(struct adapter *);
195 static void     igb_disable_intr(struct adapter *);
196 static void     igb_update_stats_counters(struct adapter *);
197 static bool     igb_txeof(struct tx_ring *);
198 static bool     igb_rxeof(struct rx_ring *, int);
199 static void     igb_rx_checksum(u32, struct mbuf *, bool);
200 static int      igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
201 static bool     igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
202 static void     igb_set_promisc(struct adapter *);
203 static void     igb_disable_promisc(struct adapter *);
204 static void     igb_set_multi(struct adapter *);
205 static void     igb_print_hw_stats(struct adapter *);
206 static void     igb_update_link_status(struct adapter *);
207 static int      igb_get_buf(struct rx_ring *, int, u8);
208
209 static void     igb_register_vlan(void *, struct ifnet *, u16);
210 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
211 static void     igb_setup_vlan_hw_support(struct adapter *);
212
213 static int      igb_xmit(struct tx_ring *, struct mbuf **);
214 static int      igb_dma_malloc(struct adapter *, bus_size_t,
215                     struct igb_dma_alloc *, int);
216 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
217 static void     igb_print_debug_info(struct adapter *);
218 static void     igb_print_nvm_info(struct adapter *);
219 static int      igb_is_valid_ether_addr(u8 *);
220 static int      igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
221 static int      igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
222 /* Management and WOL Support */
223 static void     igb_init_manageability(struct adapter *);
224 static void     igb_release_manageability(struct adapter *);
225 static void     igb_get_hw_control(struct adapter *);
226 static void     igb_release_hw_control(struct adapter *);
227 static void     igb_enable_wakeup(device_t);
228
229 static int      igb_irq_fast(void *);
230 static void     igb_add_rx_process_limit(struct adapter *, const char *,
231                     const char *, int *, int);
232 static void     igb_handle_rxtx(void *context, int pending);
233 static void     igb_handle_tx(void *context, int pending);
234 static void     igb_handle_rx(void *context, int pending);
235
236 /* These are MSIX only irq handlers */
237 static void     igb_msix_rx(void *);
238 static void     igb_msix_tx(void *);
239 static void     igb_msix_link(void *);
240
241 /* Adaptive Interrupt Moderation */
242 static void     igb_update_aim(struct rx_ring *);
243
244 /*********************************************************************
245  *  FreeBSD Device Interface Entry Points
246  *********************************************************************/
247
248 static device_method_t igb_methods[] = {
249         /* Device interface */
250         DEVMETHOD(device_probe, igb_probe),
251         DEVMETHOD(device_attach, igb_attach),
252         DEVMETHOD(device_detach, igb_detach),
253         DEVMETHOD(device_shutdown, igb_shutdown),
254         DEVMETHOD(device_suspend, igb_suspend),
255         DEVMETHOD(device_resume, igb_resume),
256         {0, 0}
257 };
258
259 static driver_t igb_driver = {
260         "igb", igb_methods, sizeof(struct adapter),
261 };
262
263 static devclass_t igb_devclass;
264 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
265 MODULE_DEPEND(igb, pci, 1, 1, 1);
266 MODULE_DEPEND(igb, ether, 1, 1, 1);
267
268 /*********************************************************************
269  *  Tunable default values.
270  *********************************************************************/
271
272 /* Descriptor defaults */
273 static int igb_rxd = IGB_DEFAULT_RXD;
274 static int igb_txd = IGB_DEFAULT_TXD;
275 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
276 TUNABLE_INT("hw.igb.txd", &igb_txd);
277
278 /*
279 ** These parameters are used in Adaptive
280 ** Interrupt Moderation. The value is set
281 ** into EITR and controls the interrupt
282 ** frequency. A variable static scheme can
283 ** be created by changing the assigned value
284 ** of igb_ave_latency to the desired value,
285 ** and then set igb_enable_aim to FALSE.
286 ** This will result in all EITR registers
287 ** getting set to that value statically.
288 */
289 static int igb_enable_aim = TRUE;
290 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
291 static int igb_low_latency = IGB_LOW_LATENCY;
292 TUNABLE_INT("hw.igb.low_latency", &igb_low_latency);
293 static int igb_ave_latency = IGB_AVE_LATENCY;
294 TUNABLE_INT("hw.igb.ave_latency", &igb_ave_latency);
295 static int igb_bulk_latency = IGB_BULK_LATENCY;
296 TUNABLE_INT("hw.igb.bulk_latency", &igb_bulk_latency);
297                 
298 /*
299 ** This will autoconfigure based on the number
300 ** of CPUs if set to 0. Only a matched pair of
301 ** TX and RX rings are allowed.
302 */
303 static int igb_num_queues = 1;
304 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
305
306 /* How many packets rxeof tries to clean at a time */
307 static int igb_rx_process_limit = 100;
308 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
309
310 /* Flow control setting - default to FULL */
311 static int igb_fc_setting = e1000_fc_full;
312 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
313
314 /*
315 ** Shadow VFTA table, this is needed because
316 ** the real filter table gets cleared during
317 ** a soft reset and the driver needs to be able
318 ** to repopulate it.
319 */
320 static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
321
322
323 /*********************************************************************
324  *  Device identification routine
325  *
326  *  igb_probe determines if the driver should be loaded on
327  *  adapter based on PCI vendor/device id of the adapter.
328  *
329  *  return BUS_PROBE_DEFAULT on success, positive on failure
330  *********************************************************************/
331
332 static int
333 igb_probe(device_t dev)
334 {
335         char            adapter_name[60];
336         uint16_t        pci_vendor_id = 0;
337         uint16_t        pci_device_id = 0;
338         uint16_t        pci_subvendor_id = 0;
339         uint16_t        pci_subdevice_id = 0;
340         igb_vendor_info_t *ent;
341
342         INIT_DEBUGOUT("igb_probe: begin");
343
344         pci_vendor_id = pci_get_vendor(dev);
345         if (pci_vendor_id != IGB_VENDOR_ID)
346                 return (ENXIO);
347
348         pci_device_id = pci_get_device(dev);
349         pci_subvendor_id = pci_get_subvendor(dev);
350         pci_subdevice_id = pci_get_subdevice(dev);
351
352         ent = igb_vendor_info_array;
353         while (ent->vendor_id != 0) {
354                 if ((pci_vendor_id == ent->vendor_id) &&
355                     (pci_device_id == ent->device_id) &&
356
357                     ((pci_subvendor_id == ent->subvendor_id) ||
358                     (ent->subvendor_id == PCI_ANY_ID)) &&
359
360                     ((pci_subdevice_id == ent->subdevice_id) ||
361                     (ent->subdevice_id == PCI_ANY_ID))) {
362                         sprintf(adapter_name, "%s %s",
363                                 igb_strings[ent->index],
364                                 igb_driver_version);
365                         device_set_desc_copy(dev, adapter_name);
366                         return (BUS_PROBE_DEFAULT);
367                 }
368                 ent++;
369         }
370
371         return (ENXIO);
372 }
373
374 /*********************************************************************
375  *  Device initialization routine
376  *
377  *  The attach entry point is called when the driver is being loaded.
378  *  This routine identifies the type of hardware, allocates all resources
379  *  and initializes the hardware.
380  *
381  *  return 0 on success, positive on failure
382  *********************************************************************/
383
384 static int
385 igb_attach(device_t dev)
386 {
387         struct adapter  *adapter;
388         int             error = 0;
389         u16             eeprom_data;
390
391         INIT_DEBUGOUT("igb_attach: begin");
392
393         adapter = device_get_softc(dev);
394         adapter->dev = adapter->osdep.dev = dev;
395         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
396
397         /* SYSCTL stuff */
398         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
399             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
400             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
401             igb_sysctl_debug_info, "I", "Debug Information");
402
403         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
404             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
405             OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
406             igb_sysctl_stats, "I", "Statistics");
407
408         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
409             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
410             OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
411             &igb_fc_setting, 0, "Flow Control");
412
413         SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
414             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
415             OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
416             &igb_enable_aim, 1, "Interrupt Moderation");
417
418         SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
419             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
420             OID_AUTO, "low_latency", CTLTYPE_INT|CTLFLAG_RW,
421             &igb_low_latency, 1, "Low Latency");
422                 
423         SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
424             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
425             OID_AUTO, "ave_latency", CTLTYPE_INT|CTLFLAG_RW,
426             &igb_ave_latency, 1, "Average Latency");
427
428         SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
429             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
430             OID_AUTO, "bulk_latency", CTLTYPE_INT|CTLFLAG_RW,
431             &igb_bulk_latency, 1, "Bulk Latency");
432  
433         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
434
435         /* Determine hardware and mac info */
436         igb_identify_hardware(adapter);
437
438         /* Setup PCI resources */
439         if (igb_allocate_pci_resources(adapter)) {
440                 device_printf(dev, "Allocation of PCI resources failed\n");
441                 error = ENXIO;
442                 goto err_pci;
443         }
444
445         /* Do Shared Code initialization */
446         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
447                 device_printf(dev, "Setup of Shared code failed\n");
448                 error = ENXIO;
449                 goto err_pci;
450         }
451
452         e1000_get_bus_info(&adapter->hw);
453
454         /* Sysctls for limiting the amount of work done in the taskqueue */
455         igb_add_rx_process_limit(adapter, "rx_processing_limit",
456             "max number of rx packets to process", &adapter->rx_process_limit,
457             igb_rx_process_limit);
458
459         /*
460          * Validate number of transmit and receive descriptors. It
461          * must not exceed hardware maximum, and must be multiple
462          * of E1000_DBA_ALIGN.
463          */
464         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
465             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
466                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
467                     IGB_DEFAULT_TXD, igb_txd);
468                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
469         } else
470                 adapter->num_tx_desc = igb_txd;
471         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
472             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
473                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
474                     IGB_DEFAULT_RXD, igb_rxd);
475                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
476         } else
477                 adapter->num_rx_desc = igb_rxd;
478
479         adapter->hw.mac.autoneg = DO_AUTO_NEG;
480         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
481         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
482
483         /* Copper options */
484         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
485                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
486                 adapter->hw.phy.disable_polarity_correction = FALSE;
487                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
488         }
489
490         /*
491          * Set the frame limits assuming
492          * standard ethernet sized frames.
493          */
494         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
495         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
496
497         /*
498         ** Allocate and Setup Queues
499         */
500         if (igb_allocate_queues(adapter)) {
501                 error = ENOMEM;
502                 goto err_pci;
503         }
504
505         /*
506         ** Start from a known state, this is
507         ** important in reading the nvm and
508         ** mac from that.
509         */
510         e1000_reset_hw(&adapter->hw);
511
512         /* Make sure we have a good EEPROM before we read from it */
513         if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
514                 /*
515                 ** Some PCI-E parts fail the first check due to
516                 ** the link being in sleep state, call it again,
517                 ** if it fails a second time its a real issue.
518                 */
519                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
520                         device_printf(dev,
521                             "The EEPROM Checksum Is Not Valid\n");
522                         error = EIO;
523                         goto err_late;
524                 }
525         }
526
527         /*
528         ** Copy the permanent MAC address out of the EEPROM
529         */
530         if (e1000_read_mac_addr(&adapter->hw) < 0) {
531                 device_printf(dev, "EEPROM read error while reading MAC"
532                     " address\n");
533                 error = EIO;
534                 goto err_late;
535         }
536         /* Check its sanity */
537         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
538                 device_printf(dev, "Invalid MAC address\n");
539                 error = EIO;
540                 goto err_late;
541         }
542
543         /* Now Initialize the hardware */
544         if (igb_hardware_init(adapter)) {
545                 device_printf(dev, "Unable to initialize the hardware\n");
546                 error = EIO;
547                 goto err_late;
548         }
549
550         /* 
551         ** Configure Interrupts
552         */
553         if (adapter->msix > 1) /* MSIX */
554                 error = igb_allocate_msix(adapter);
555         else /* MSI or Legacy */
556                 error = igb_allocate_legacy(adapter);
557         if (error)
558                 goto err_late;
559
560         /* Setup OS specific network interface */
561         igb_setup_interface(dev, adapter);
562
563 #ifdef IGB_IEEE1588
564         /*
565         ** Setup the timer: IEEE 1588 support
566         */
567         adapter->cycles.read = igb_read_clock;
568         adapter->cycles.mask = (u64)-1;
569         adapter->cycles.mult = 1;
570         adapter->cycles.shift = IGB_TSYNC_SHIFT;
571         E1000_WRITE_REG(&adapter->hw, E1000_TIMINCA, (1<<24) |
572             IGB_TSYNC_CYCLE_TIME * IGB_TSYNC_SHIFT);
573         E1000_WRITE_REG(&adapter->hw, E1000_SYSTIML, 0x00000000);
574         E1000_WRITE_REG(&adapter->hw, E1000_SYSTIMH, 0xFF800000);
575
576         // JFV - this is not complete yet
577 #endif
578
579         /* Initialize statistics */
580         igb_update_stats_counters(adapter);
581
582         adapter->hw.mac.get_link_status = 1;
583         igb_update_link_status(adapter);
584
585         /* Indicate SOL/IDER usage */
586         if (e1000_check_reset_block(&adapter->hw))
587                 device_printf(dev,
588                     "PHY reset is blocked due to SOL/IDER session.\n");
589
590         /* Determine if we have to control management hardware */
591         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
592
593         /*
594          * Setup Wake-on-Lan
595          */
596         /* APME bit in EEPROM is mapped to WUC.APME */
597         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
598         if (eeprom_data)
599                 adapter->wol = E1000_WUFC_MAG;
600
601         /* Register for VLAN events */
602         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
603              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
604         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
605              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
606
607         /* Tell the stack that the interface is not active */
608         adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
609
610         INIT_DEBUGOUT("igb_attach: end");
611
612         return (0);
613
614 err_late:
615         igb_free_transmit_structures(adapter);
616         igb_free_receive_structures(adapter);
617         igb_release_hw_control(adapter);
618 err_pci:
619         igb_free_pci_resources(adapter);
620         IGB_CORE_LOCK_DESTROY(adapter);
621
622         return (error);
623 }
624
625 /*********************************************************************
626  *  Device removal routine
627  *
628  *  The detach entry point is called when the driver is being removed.
629  *  This routine stops the adapter and deallocates all the resources
630  *  that were allocated for driver operation.
631  *
632  *  return 0 on success, positive on failure
633  *********************************************************************/
634
635 static int
636 igb_detach(device_t dev)
637 {
638         struct adapter  *adapter = device_get_softc(dev);
639         struct ifnet    *ifp = adapter->ifp;
640
641         INIT_DEBUGOUT("igb_detach: begin");
642
643         /* Make sure VLANS are not using driver */
644         if (adapter->ifp->if_vlantrunk != NULL) {
645                 device_printf(dev,"Vlan in use, detach first\n");
646                 return (EBUSY);
647         }
648
649         IGB_CORE_LOCK(adapter);
650         adapter->in_detach = 1;
651         igb_stop(adapter);
652         IGB_CORE_UNLOCK(adapter);
653
654         e1000_phy_hw_reset(&adapter->hw);
655
656         /* Give control back to firmware */
657         igb_release_manageability(adapter);
658         igb_release_hw_control(adapter);
659
660         if (adapter->wol) {
661                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
662                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
663                 igb_enable_wakeup(dev);
664         }
665
666         /* Unregister VLAN events */
667         if (adapter->vlan_attach != NULL)
668                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
669         if (adapter->vlan_detach != NULL)
670                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
671
672         ether_ifdetach(adapter->ifp);
673
674         callout_drain(&adapter->timer);
675
676         igb_free_pci_resources(adapter);
677         bus_generic_detach(dev);
678         if_free(ifp);
679
680         igb_free_transmit_structures(adapter);
681         igb_free_receive_structures(adapter);
682
683         IGB_CORE_LOCK_DESTROY(adapter);
684
685         return (0);
686 }
687
688 /*********************************************************************
689  *
690  *  Shutdown entry point
691  *
692  **********************************************************************/
693
694 static int
695 igb_shutdown(device_t dev)
696 {
697         return igb_suspend(dev);
698 }
699
700 /*
701  * Suspend/resume device methods.
702  */
703 static int
704 igb_suspend(device_t dev)
705 {
706         struct adapter *adapter = device_get_softc(dev);
707
708         IGB_CORE_LOCK(adapter);
709
710         igb_stop(adapter);
711
712         igb_release_manageability(adapter);
713         igb_release_hw_control(adapter);
714
715         if (adapter->wol) {
716                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
717                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
718                 igb_enable_wakeup(dev);
719         }
720
721         IGB_CORE_UNLOCK(adapter);
722
723         return bus_generic_suspend(dev);
724 }
725
726 static int
727 igb_resume(device_t dev)
728 {
729         struct adapter *adapter = device_get_softc(dev);
730         struct ifnet *ifp = adapter->ifp;
731
732         IGB_CORE_LOCK(adapter);
733         igb_init_locked(adapter);
734         igb_init_manageability(adapter);
735
736         if ((ifp->if_flags & IFF_UP) &&
737             (ifp->if_drv_flags & IFF_DRV_RUNNING))
738                 igb_start(ifp);
739
740         IGB_CORE_UNLOCK(adapter);
741
742         return bus_generic_resume(dev);
743 }
744
745
746 /*********************************************************************
747  *  Transmit entry point
748  *
749  *  igb_start is called by the stack to initiate a transmit.
750  *  The driver will remain in this routine as long as there are
751  *  packets to transmit and transmit resources are available.
752  *  In case resources are not available stack is notified and
753  *  the packet is requeued.
754  **********************************************************************/
755
756 static void
757 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
758 {
759         struct adapter  *adapter = ifp->if_softc;
760         struct mbuf     *m_head;
761
762         IGB_TX_LOCK_ASSERT(txr);
763
764         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
765             IFF_DRV_RUNNING)
766                 return;
767         if (!adapter->link_active)
768                 return;
769
770         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
771
772                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
773                 if (m_head == NULL)
774                         break;
775                 /*
776                  *  Encapsulation can modify our pointer, and or make it
777                  *  NULL on failure.  In that event, we can't requeue.
778                  */
779                 if (igb_xmit(txr, &m_head)) {
780                         if (m_head == NULL)
781                                 break;
782                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
783                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
784                         break;
785                 }
786
787                 /* Send a copy of the frame to the BPF listener */
788                 ETHER_BPF_MTAP(ifp, m_head);
789
790                 /* Set timeout in case hardware has problems transmitting. */
791                 txr->watchdog_timer = IGB_TX_TIMEOUT;
792         }
793 }
794  
795 /*
796  * Legacy TX driver routine, called from the
797  * stack, always uses tx[0], and spins for it.
798  * Should not be used with multiqueue tx
799  */
800 static void
801 igb_start(struct ifnet *ifp)
802 {
803         struct adapter  *adapter = ifp->if_softc;
804         struct tx_ring  *txr = adapter->tx_rings;
805
806         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
807                 IGB_TX_LOCK(txr);
808                 igb_start_locked(txr, ifp);
809                 IGB_TX_UNLOCK(txr);
810         }
811         return;
812 }
813
814 #if __FreeBSD_version >= 800000
815 /*
816 ** Multiqueue Transmit driver
817 **
818 */
819 static int
820 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
821 {
822         struct adapter  *adapter = ifp->if_softc;
823         struct tx_ring  *txr;
824         int             i = 0, err = 0;
825
826         /* Which queue to use */
827         if ((m->m_flags & M_FLOWID) != 0)
828                 i = m->m_pkthdr.flowid % adapter->num_queues;
829         txr = &adapter->tx_rings[i];
830
831         if (IGB_TX_TRYLOCK(txr)) {
832                 err = igb_mq_start_locked(ifp, txr, m);
833                 IGB_TX_UNLOCK(txr);
834         } else
835                 err = drbr_enqueue(ifp, txr->br, m);
836
837         return (err);
838 }
839
840 static int
841 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
842 {
843         struct adapter  *adapter = txr->adapter;
844         struct mbuf     *next;
845         int             err = 0;
846
847         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
848                 err = drbr_enqueue(ifp, txr->br, m);
849                 return (err);
850         }
851
852         if (m == NULL) /* Called by tasklet */
853                 goto process;
854
855         /* If nothing queued go right to xmit */
856         if (drbr_empty(ifp, txr->br)) {
857                 if (igb_xmit(txr, &m)) {
858                         if (m && (err = drbr_enqueue(ifp, txr->br, m)) != 0)
859                                 return (err);
860                 } else {
861                         /* Success, update stats */
862                         drbr_stats_update(ifp, m->m_pkthdr.len, m->m_flags);
863                         /* Send a copy of the frame to the BPF listener */
864                         ETHER_BPF_MTAP(ifp, m);
865                         /* Set the watchdog */
866                         txr->watchdog_timer = IGB_TX_TIMEOUT;
867                 }
868
869         } else if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
870                 return (err);
871
872 process:
873         if (drbr_empty(ifp, txr->br))
874                 return (err);
875
876         /* Process the queue */
877         while (TRUE) {
878                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
879                         break;
880                 next = drbr_dequeue(ifp, txr->br);
881                 if (next == NULL)
882                         break;
883                 if (igb_xmit(txr, &next))
884                         break;
885                 ETHER_BPF_MTAP(ifp, next);
886                 /* Set the watchdog */
887                 txr->watchdog_timer = IGB_TX_TIMEOUT;
888         }
889                 
890         if (txr->tx_avail <= IGB_TX_OP_THRESHOLD)
891                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
892
893         return (err);
894 }
895
896 /*
897 ** Flush all ring buffers
898 */
899 static void
900 igb_qflush(struct ifnet *ifp)
901 {
902         struct adapter  *adapter = ifp->if_softc;
903         struct tx_ring  *txr = adapter->tx_rings;
904         struct mbuf     *m;
905
906         for (int i = 0; i < adapter->num_queues; i++, txr++) {
907                 IGB_TX_LOCK(txr);
908                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
909                         m_freem(m);
910                 IGB_TX_UNLOCK(txr);
911         }
912         if_qflush(ifp);
913 }
914 #endif /* __FreeBSD_version >= 800000 */
915
916 /*********************************************************************
917  *  Ioctl entry point
918  *
919  *  igb_ioctl is called when the user wants to configure the
920  *  interface.
921  *
922  *  return 0 on success, positive on failure
923  **********************************************************************/
924
925 static int
926 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
927 {
928         struct adapter  *adapter = ifp->if_softc;
929         struct ifreq *ifr = (struct ifreq *)data;
930 #ifdef INET
931         struct ifaddr *ifa = (struct ifaddr *)data;
932 #endif
933         int error = 0;
934
935         if (adapter->in_detach)
936                 return (error);
937
938         switch (command) {
939         case SIOCSIFADDR:
940 #ifdef INET
941                 if (ifa->ifa_addr->sa_family == AF_INET) {
942                         /*
943                          * XXX
944                          * Since resetting hardware takes a very long time
945                          * and results in link renegotiation we only
946                          * initialize the hardware only when it is absolutely
947                          * required.
948                          */
949                         ifp->if_flags |= IFF_UP;
950                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
951                                 IGB_CORE_LOCK(adapter);
952                                 igb_init_locked(adapter);
953                                 IGB_CORE_UNLOCK(adapter);
954                         }
955                         arp_ifinit(ifp, ifa);
956                 } else
957 #endif
958                         error = ether_ioctl(ifp, command, data);
959                 break;
960         case SIOCSIFMTU:
961             {
962                 int max_frame_size;
963
964                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
965
966                 IGB_CORE_LOCK(adapter);
967                 max_frame_size = 9234;
968                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
969                     ETHER_CRC_LEN) {
970                         IGB_CORE_UNLOCK(adapter);
971                         error = EINVAL;
972                         break;
973                 }
974
975                 ifp->if_mtu = ifr->ifr_mtu;
976                 adapter->max_frame_size =
977                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
978                 igb_init_locked(adapter);
979                 IGB_CORE_UNLOCK(adapter);
980                 break;
981             }
982         case SIOCSIFFLAGS:
983                 IOCTL_DEBUGOUT("ioctl rcv'd:\
984                     SIOCSIFFLAGS (Set Interface Flags)");
985                 IGB_CORE_LOCK(adapter);
986                 if (ifp->if_flags & IFF_UP) {
987                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
988                                 if ((ifp->if_flags ^ adapter->if_flags) &
989                                     (IFF_PROMISC | IFF_ALLMULTI)) {
990                                         igb_disable_promisc(adapter);
991                                         igb_set_promisc(adapter);
992                                 }
993                         } else
994                                 igb_init_locked(adapter);
995                 } else
996                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
997                                 igb_stop(adapter);
998                 adapter->if_flags = ifp->if_flags;
999                 IGB_CORE_UNLOCK(adapter);
1000                 break;
1001         case SIOCADDMULTI:
1002         case SIOCDELMULTI:
1003                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1004                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1005                         IGB_CORE_LOCK(adapter);
1006                         igb_disable_intr(adapter);
1007                         igb_set_multi(adapter);
1008                                 igb_enable_intr(adapter);
1009                         IGB_CORE_UNLOCK(adapter);
1010                 }
1011                 break;
1012         case SIOCSIFMEDIA:
1013                 /* Check SOL/IDER usage */
1014                 IGB_CORE_LOCK(adapter);
1015                 if (e1000_check_reset_block(&adapter->hw)) {
1016                         IGB_CORE_UNLOCK(adapter);
1017                         device_printf(adapter->dev, "Media change is"
1018                             " blocked due to SOL/IDER session.\n");
1019                         break;
1020                 }
1021                 IGB_CORE_UNLOCK(adapter);
1022         case SIOCGIFMEDIA:
1023                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1024                     SIOCxIFMEDIA (Get/Set Interface Media)");
1025                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1026                 break;
1027         case SIOCSIFCAP:
1028             {
1029                 int mask, reinit;
1030
1031                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1032                 reinit = 0;
1033                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1034                 if (mask & IFCAP_HWCSUM) {
1035                         ifp->if_capenable ^= IFCAP_HWCSUM;
1036                         reinit = 1;
1037                 }
1038                 if (mask & IFCAP_TSO4) {
1039                         ifp->if_capenable ^= IFCAP_TSO4;
1040                         reinit = 1;
1041                 }
1042                 if (mask & IFCAP_VLAN_HWTAGGING) {
1043                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1044                         reinit = 1;
1045                 }
1046                 if (mask & IFCAP_LRO) {
1047                         ifp->if_capenable ^= IFCAP_LRO;
1048                         reinit = 1;
1049                 }
1050                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1051                         igb_init(adapter);
1052                 VLAN_CAPABILITIES(ifp);
1053                 break;
1054             }
1055
1056 #ifdef IGB_IEEE1588
1057         /*
1058         ** IOCTL support for Precision Time (IEEE 1588) Support
1059         */
1060         case SIOCSHWTSTAMP:
1061                 error = igb_hwtstamp_ioctl(adapter, ifp);
1062                 break;
1063 #endif
1064
1065         default:
1066                 error = ether_ioctl(ifp, command, data);
1067                 break;
1068         }
1069
1070         return (error);
1071 }
1072
1073 /*********************************************************************
1074  *  Watchdog timer:
1075  *
1076  *  This routine is called from the local timer every second.
1077  *  As long as transmit descriptors are being cleaned the value
1078  *  is non-zero and we do nothing. Reaching 0 indicates a tx hang
1079  *  and we then reset the device.
1080  *
1081  **********************************************************************/
1082
1083 static void
1084 igb_watchdog(struct adapter *adapter)
1085 {
1086         struct tx_ring  *txr = adapter->tx_rings;
1087         bool            tx_hang = FALSE;
1088
1089         IGB_CORE_LOCK_ASSERT(adapter);
1090
1091         /*
1092         ** The timer is set to 5 every time start() queues a packet.
1093         ** Then txeof keeps resetting it as long as it cleans at
1094         ** least one descriptor.
1095         ** Finally, anytime all descriptors are clean the timer is
1096         ** set to 0.
1097         **
1098         ** With TX Multiqueue we need to check every queue's timer,
1099         ** if any time out we do the reset.
1100         */
1101         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1102                 IGB_TX_LOCK(txr);
1103                 if (txr->watchdog_timer == 0 ||
1104                     (--txr->watchdog_timer)) {
1105                         IGB_TX_UNLOCK(txr);
1106                         continue;
1107                 } else {
1108                         tx_hang = TRUE;
1109                         IGB_TX_UNLOCK(txr);
1110                         break;
1111                 }
1112         }
1113         if (tx_hang == FALSE)
1114                 return;
1115
1116         /* If we are in this routine because of pause frames, then
1117          * don't reset the hardware.
1118          */
1119         if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
1120             E1000_STATUS_TXOFF) {
1121                 txr = adapter->tx_rings; /* reset pointer */
1122                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1123                         IGB_TX_LOCK(txr);
1124                         txr->watchdog_timer = IGB_TX_TIMEOUT;
1125                         IGB_TX_UNLOCK(txr);
1126                 }
1127                 return;
1128         }
1129
1130         if (e1000_check_for_link(&adapter->hw) == 0)
1131                 device_printf(adapter->dev, "watchdog timeout -- resetting\n");
1132
1133         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1134                 device_printf(adapter->dev, "Queue(%d) tdh = %d, tdt = %d\n",
1135                     i, E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
1136                     E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
1137                 device_printf(adapter->dev, "Queue(%d) desc avail = %d,"
1138                     " Next Desc to Clean = %d\n", i, txr->tx_avail,
1139                     txr->next_to_clean);
1140         }
1141
1142         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1143         adapter->watchdog_events++;
1144
1145         igb_init_locked(adapter);
1146 }
1147
1148 /*********************************************************************
1149  *  Init entry point
1150  *
1151  *  This routine is used in two ways. It is used by the stack as
1152  *  init entry point in network interface structure. It is also used
1153  *  by the driver as a hw/sw initialization routine to get to a
1154  *  consistent state.
1155  *
1156  *  return 0 on success, positive on failure
1157  **********************************************************************/
1158
1159 static void
1160 igb_init_locked(struct adapter *adapter)
1161 {
1162         struct rx_ring *rxr = adapter->rx_rings;
1163         struct tx_ring *txr = adapter->tx_rings;
1164         struct ifnet    *ifp = adapter->ifp;
1165         device_t        dev = adapter->dev;
1166         u32             pba = 0;
1167
1168         INIT_DEBUGOUT("igb_init: begin");
1169
1170         IGB_CORE_LOCK_ASSERT(adapter);
1171
1172         igb_stop(adapter);
1173
1174         /*
1175          * Packet Buffer Allocation (PBA)
1176          * Writing PBA sets the receive portion of the buffer
1177          * the remainder is used for the transmit buffer.
1178          */
1179         if (adapter->hw.mac.type == e1000_82575) {
1180                 INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
1181                 pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1182                 E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1183         }
1184         
1185         /* Get the latest mac address, User can use a LAA */
1186         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1187               ETHER_ADDR_LEN);
1188
1189         /* Put the address into the Receive Address Array */
1190         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1191
1192         /* Initialize the hardware */
1193         if (igb_hardware_init(adapter)) {
1194                 device_printf(dev, "Unable to initialize the hardware\n");
1195                 return;
1196         }
1197         igb_update_link_status(adapter);
1198
1199         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1200
1201         /* Set hardware offload abilities */
1202         ifp->if_hwassist = 0;
1203         if (ifp->if_capenable & IFCAP_TXCSUM) {
1204                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1205 #if __FreeBSD_version >= 800000
1206                 if (adapter->hw.mac.type == e1000_82576)
1207                         ifp->if_hwassist |= CSUM_SCTP;
1208 #endif
1209         }
1210
1211         if (ifp->if_capenable & IFCAP_TSO4)
1212                 ifp->if_hwassist |= CSUM_TSO;
1213
1214         /* Configure for OS presence */
1215         igb_init_manageability(adapter);
1216
1217         /* Prepare transmit descriptors and buffers */
1218         igb_setup_transmit_structures(adapter);
1219         igb_initialize_transmit_units(adapter);
1220
1221         /* Setup Multicast table */
1222         igb_set_multi(adapter);
1223
1224         /*
1225         ** Figure out the desired mbuf pool
1226         ** for doing jumbo/packetsplit
1227         */
1228         if (ifp->if_mtu > ETHERMTU)
1229                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1230         else
1231                 adapter->rx_mbuf_sz = MCLBYTES;
1232
1233         /* Prepare receive descriptors and buffers */
1234         if (igb_setup_receive_structures(adapter)) {
1235                 device_printf(dev, "Could not setup receive structures\n");
1236                 igb_stop(adapter);
1237                 return;
1238         }
1239         igb_initialize_receive_units(adapter);
1240
1241         /* Don't lose promiscuous settings */
1242         igb_set_promisc(adapter);
1243
1244         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1245         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1246
1247         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1248         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1249
1250         if (adapter->msix > 1) /* Set up queue routing */
1251                 igb_configure_queues(adapter);
1252
1253         /* Set up VLAN tag offload and filter */
1254         igb_setup_vlan_hw_support(adapter);
1255
1256         /* Set default RX interrupt moderation */
1257         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1258                 E1000_WRITE_REG(&adapter->hw,
1259                     E1000_EITR(rxr->msix), igb_ave_latency);
1260                 rxr->eitr_setting = igb_ave_latency;
1261         }
1262
1263         /* Set TX interrupt rate & reset TX watchdog */
1264         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1265                 E1000_WRITE_REG(&adapter->hw, 
1266                     E1000_EITR(txr->msix), igb_ave_latency);
1267                 txr->watchdog_timer = FALSE;
1268         }
1269
1270         {
1271                 /* this clears any pending interrupts */
1272                 E1000_READ_REG(&adapter->hw, E1000_ICR);
1273                 igb_enable_intr(adapter);
1274                 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1275         }
1276
1277         /* Don't reset the phy next time init gets called */
1278         adapter->hw.phy.reset_disable = TRUE;
1279 }
1280
1281 static void
1282 igb_init(void *arg)
1283 {
1284         struct adapter *adapter = arg;
1285
1286         IGB_CORE_LOCK(adapter);
1287         igb_init_locked(adapter);
1288         IGB_CORE_UNLOCK(adapter);
1289 }
1290
1291
1292 static void
1293 igb_handle_rxtx(void *context, int pending)
1294 {
1295         struct adapter  *adapter = context;
1296         struct tx_ring  *txr = adapter->tx_rings;
1297         struct rx_ring  *rxr = adapter->rx_rings;
1298         struct ifnet    *ifp;
1299
1300         ifp = adapter->ifp;
1301
1302         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1303                 if (igb_rxeof(rxr, adapter->rx_process_limit))
1304                         taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1305                 IGB_TX_LOCK(txr);
1306                 igb_txeof(txr);
1307
1308 #if __FreeBSD_version >= 800000
1309                 if (!drbr_empty(ifp, txr->br))
1310                         igb_mq_start_locked(ifp, txr, NULL);
1311 #else
1312                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1313                         igb_start_locked(txr, ifp);
1314 #endif
1315                 IGB_TX_UNLOCK(txr);
1316         }
1317
1318         igb_enable_intr(adapter);
1319 }
1320
1321 static void
1322 igb_handle_rx(void *context, int pending)
1323 {
1324         struct rx_ring  *rxr = context;
1325         struct adapter  *adapter = rxr->adapter;
1326         struct ifnet    *ifp = adapter->ifp;
1327             
1328         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1329                 if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1330                         /* More to clean, schedule another task */
1331                         taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1332                 
1333 }
1334
1335 static void
1336 igb_handle_tx(void *context, int pending)
1337 {
1338         struct tx_ring  *txr = context;
1339         struct adapter  *adapter = txr->adapter;
1340         struct ifnet    *ifp = adapter->ifp;
1341
1342         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1343                 IGB_TX_LOCK(txr);
1344                 igb_txeof(txr);
1345 #if __FreeBSD_version >= 800000
1346                 if (!drbr_empty(ifp, txr->br))
1347                         igb_mq_start_locked(ifp, txr, NULL);
1348 #else
1349                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1350                         igb_start_locked(txr, ifp);
1351 #endif
1352                 IGB_TX_UNLOCK(txr);
1353         }
1354 }
1355
1356
1357 /*********************************************************************
1358  *
1359  *  MSI/Legacy Deferred
1360  *  Interrupt Service routine  
1361  *
1362  *********************************************************************/
1363 static int
1364 igb_irq_fast(void *arg)
1365 {
1366         struct adapter  *adapter = arg;
1367         uint32_t        reg_icr;
1368
1369
1370         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1371
1372         /* Hot eject?  */
1373         if (reg_icr == 0xffffffff)
1374                 return FILTER_STRAY;
1375
1376         /* Definitely not our interrupt.  */
1377         if (reg_icr == 0x0)
1378                 return FILTER_STRAY;
1379
1380         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1381                 return FILTER_STRAY;
1382
1383         /*
1384          * Mask interrupts until the taskqueue is finished running.  This is
1385          * cheap, just assume that it is needed.  This also works around the
1386          * MSI message reordering errata on certain systems.
1387          */
1388         igb_disable_intr(adapter);
1389         taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1390
1391         /* Link status change */
1392         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1393                 adapter->hw.mac.get_link_status = 1;
1394                 igb_update_link_status(adapter);
1395         }
1396
1397         if (reg_icr & E1000_ICR_RXO)
1398                 adapter->rx_overruns++;
1399         return FILTER_HANDLED;
1400 }
1401
1402
1403 /*********************************************************************
1404  *
1405  *  MSIX TX Interrupt Service routine
1406  *
1407  **********************************************************************/
1408 static void
1409 igb_msix_tx(void *arg)
1410 {
1411         struct tx_ring *txr = arg;
1412         struct adapter *adapter = txr->adapter;
1413         u32             loop = IGB_MAX_LOOP;
1414         bool            more;
1415
1416         ++txr->tx_irq;
1417         IGB_TX_LOCK(txr);
1418
1419         do {
1420                 more = igb_txeof(txr);
1421         } while (loop-- && more);
1422
1423         IGB_TX_UNLOCK(txr);
1424
1425         /* Schedule a clean task */
1426         taskqueue_enqueue(adapter->tq, &txr->tx_task);
1427
1428         /* Reenable this interrupt */
1429         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, txr->eims);
1430         return;
1431 }
1432
1433 /*********************************************************************
1434  *
1435  *  MSIX RX Interrupt Service routine
1436  *
1437  **********************************************************************/
1438
1439 static void
1440 igb_msix_rx(void *arg)
1441 {
1442         struct rx_ring *rxr = arg;
1443         struct adapter *adapter = rxr->adapter;
1444         u32             loop = IGB_MAX_LOOP;
1445         bool            more;
1446
1447         ++rxr->rx_irq;
1448         do {
1449                 more = igb_rxeof(rxr, adapter->rx_process_limit);
1450         } while (loop-- && more);
1451
1452         /* Update interrupt rate */
1453         if (igb_enable_aim == TRUE)
1454                 igb_update_aim(rxr);
1455
1456         /* Schedule another clean */
1457         taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1458
1459         /* Reenable this interrupt */
1460         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, rxr->eims);
1461         return;
1462 }
1463
1464
1465 /*********************************************************************
1466  *
1467  *  MSIX Link Interrupt Service routine
1468  *
1469  **********************************************************************/
1470
1471 static void
1472 igb_msix_link(void *arg)
1473 {
1474         struct adapter  *adapter = arg;
1475         u32             icr;
1476
1477         ++adapter->link_irq;
1478         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1479         if (!(icr & E1000_ICR_LSC))
1480                 goto spurious;
1481         adapter->hw.mac.get_link_status = 1;
1482         igb_update_link_status(adapter);
1483
1484 spurious:
1485         /* Rearm */
1486         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1487         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1488         return;
1489 }
1490
1491
1492 /*
1493 ** Routine to adjust the RX EITR value based on traffic,
1494 ** its a simple three state model, but seems to help.
1495 **
1496 ** Note that the three EITR values are tuneable using
1497 ** sysctl in real time. The feature can be effectively
1498 ** nullified by setting them equal.
1499 */
1500 #define BULK_THRESHOLD  10000
1501 #define AVE_THRESHOLD   1600 
1502
1503 static void
1504 igb_update_aim(struct rx_ring *rxr)
1505 {
1506         struct adapter  *adapter = rxr->adapter;
1507         u32             olditr, newitr;
1508
1509         /* Update interrupt moderation based on traffic */
1510         olditr = rxr->eitr_setting;
1511         newitr = olditr;
1512
1513         /* Idle, don't change setting */
1514         if (rxr->bytes == 0)
1515                 return;
1516
1517         if (olditr == igb_low_latency) {
1518                 if (rxr->bytes > AVE_THRESHOLD)
1519                         newitr = igb_ave_latency;
1520         } else if (olditr == igb_ave_latency) {
1521                 if (rxr->bytes < AVE_THRESHOLD) 
1522                         newitr = igb_low_latency;
1523                 else if (rxr->bytes > BULK_THRESHOLD)
1524                         newitr = igb_bulk_latency;
1525         } else if (olditr == igb_bulk_latency) {
1526                 if (rxr->bytes < BULK_THRESHOLD)
1527                         newitr = igb_ave_latency;
1528         }
1529
1530         if (olditr != newitr) {
1531                 /* Change interrupt rate */
1532                 rxr->eitr_setting = newitr;
1533                 E1000_WRITE_REG(&adapter->hw, E1000_EITR(rxr->me),
1534                     newitr | (newitr << 16));
1535         }
1536
1537         rxr->bytes = 0;
1538         return;
1539 }
1540
1541
1542 /*********************************************************************
1543  *
1544  *  Media Ioctl callback
1545  *
1546  *  This routine is called whenever the user queries the status of
1547  *  the interface using ifconfig.
1548  *
1549  **********************************************************************/
1550 static void
1551 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1552 {
1553         struct adapter *adapter = ifp->if_softc;
1554         u_char fiber_type = IFM_1000_SX;
1555
1556         INIT_DEBUGOUT("igb_media_status: begin");
1557
1558         IGB_CORE_LOCK(adapter);
1559         igb_update_link_status(adapter);
1560
1561         ifmr->ifm_status = IFM_AVALID;
1562         ifmr->ifm_active = IFM_ETHER;
1563
1564         if (!adapter->link_active) {
1565                 IGB_CORE_UNLOCK(adapter);
1566                 return;
1567         }
1568
1569         ifmr->ifm_status |= IFM_ACTIVE;
1570
1571         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1572             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1573                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1574         else {
1575                 switch (adapter->link_speed) {
1576                 case 10:
1577                         ifmr->ifm_active |= IFM_10_T;
1578                         break;
1579                 case 100:
1580                         ifmr->ifm_active |= IFM_100_TX;
1581                         break;
1582                 case 1000:
1583                         ifmr->ifm_active |= IFM_1000_T;
1584                         break;
1585                 }
1586                 if (adapter->link_duplex == FULL_DUPLEX)
1587                         ifmr->ifm_active |= IFM_FDX;
1588                 else
1589                         ifmr->ifm_active |= IFM_HDX;
1590         }
1591         IGB_CORE_UNLOCK(adapter);
1592 }
1593
1594 /*********************************************************************
1595  *
1596  *  Media Ioctl callback
1597  *
1598  *  This routine is called when the user changes speed/duplex using
1599  *  media/mediopt option with ifconfig.
1600  *
1601  **********************************************************************/
1602 static int
1603 igb_media_change(struct ifnet *ifp)
1604 {
1605         struct adapter *adapter = ifp->if_softc;
1606         struct ifmedia  *ifm = &adapter->media;
1607
1608         INIT_DEBUGOUT("igb_media_change: begin");
1609
1610         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1611                 return (EINVAL);
1612
1613         IGB_CORE_LOCK(adapter);
1614         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1615         case IFM_AUTO:
1616                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1617                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1618                 break;
1619         case IFM_1000_LX:
1620         case IFM_1000_SX:
1621         case IFM_1000_T:
1622                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1623                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1624                 break;
1625         case IFM_100_TX:
1626                 adapter->hw.mac.autoneg = FALSE;
1627                 adapter->hw.phy.autoneg_advertised = 0;
1628                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1629                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1630                 else
1631                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1632                 break;
1633         case IFM_10_T:
1634                 adapter->hw.mac.autoneg = FALSE;
1635                 adapter->hw.phy.autoneg_advertised = 0;
1636                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1637                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1638                 else
1639                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1640                 break;
1641         default:
1642                 device_printf(adapter->dev, "Unsupported media type\n");
1643         }
1644
1645         /* As the speed/duplex settings my have changed we need to
1646          * reset the PHY.
1647          */
1648         adapter->hw.phy.reset_disable = FALSE;
1649
1650         igb_init_locked(adapter);
1651         IGB_CORE_UNLOCK(adapter);
1652
1653         return (0);
1654 }
1655
1656
1657 /*********************************************************************
1658  *
1659  *  This routine maps the mbufs to Advanced TX descriptors.
1660  *  used by the 82575 adapter.
1661  *  
1662  **********************************************************************/
1663
1664 static int
1665 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1666 {
1667         struct adapter          *adapter = txr->adapter;
1668         bus_dma_segment_t       segs[IGB_MAX_SCATTER];
1669         bus_dmamap_t            map;
1670         struct igb_tx_buffer    *tx_buffer, *tx_buffer_mapped;
1671         union e1000_adv_tx_desc *txd = NULL;
1672         struct mbuf             *m_head;
1673         u32                     olinfo_status = 0, cmd_type_len = 0;
1674         int                     nsegs, i, j, error, first, last = 0;
1675         u32                     hdrlen = 0;
1676
1677         m_head = *m_headp;
1678
1679
1680         /* Set basic descriptor constants */
1681         cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1682         cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1683         if (m_head->m_flags & M_VLANTAG)
1684                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1685
1686         /*
1687          * Force a cleanup if number of TX descriptors
1688          * available hits the threshold
1689          */
1690         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1691                 igb_txeof(txr);
1692                 /* Now do we at least have a minimal? */
1693                 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1694                         txr->no_desc_avail++;
1695                         return (ENOBUFS);
1696                 }
1697         }
1698
1699         /*
1700          * Map the packet for DMA.
1701          *
1702          * Capture the first descriptor index,
1703          * this descriptor will have the index
1704          * of the EOP which is the only one that
1705          * now gets a DONE bit writeback.
1706          */
1707         first = txr->next_avail_desc;
1708         tx_buffer = &txr->tx_buffers[first];
1709         tx_buffer_mapped = tx_buffer;
1710         map = tx_buffer->map;
1711
1712         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1713             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1714
1715         if (error == EFBIG) {
1716                 struct mbuf *m;
1717
1718                 m = m_defrag(*m_headp, M_DONTWAIT);
1719                 if (m == NULL) {
1720                         adapter->mbuf_defrag_failed++;
1721                         m_freem(*m_headp);
1722                         *m_headp = NULL;
1723                         return (ENOBUFS);
1724                 }
1725                 *m_headp = m;
1726
1727                 /* Try it again */
1728                 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1729                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1730
1731                 if (error == ENOMEM) {
1732                         adapter->no_tx_dma_setup++;
1733                         return (error);
1734                 } else if (error != 0) {
1735                         adapter->no_tx_dma_setup++;
1736                         m_freem(*m_headp);
1737                         *m_headp = NULL;
1738                         return (error);
1739                 }
1740         } else if (error == ENOMEM) {
1741                 adapter->no_tx_dma_setup++;
1742                 return (error);
1743         } else if (error != 0) {
1744                 adapter->no_tx_dma_setup++;
1745                 m_freem(*m_headp);
1746                 *m_headp = NULL;
1747                 return (error);
1748         }
1749
1750         /* Check again to be sure we have enough descriptors */
1751         if (nsegs > (txr->tx_avail - 2)) {
1752                 txr->no_desc_avail++;
1753                 bus_dmamap_unload(txr->txtag, map);
1754                 return (ENOBUFS);
1755         }
1756         m_head = *m_headp;
1757
1758         /*
1759          * Set up the context descriptor:
1760          * used when any hardware offload is done.
1761          * This includes CSUM, VLAN, and TSO. It
1762          * will use the first descriptor.
1763          */
1764         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1765                 if (igb_tso_setup(txr, m_head, &hdrlen)) {
1766                         cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1767                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1768                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1769                 } else
1770                         return (ENXIO); 
1771         } else if (igb_tx_ctx_setup(txr, m_head))
1772                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1773
1774 #ifdef IGB_IEEE1588
1775         /* This is changing soon to an mtag detection */
1776         if (we detect this mbuf has a TSTAMP mtag)
1777                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
1778 #endif
1779         /* Calculate payload length */
1780         olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1781             << E1000_ADVTXD_PAYLEN_SHIFT);
1782
1783         /* Set up our transmit descriptors */
1784         i = txr->next_avail_desc;
1785         for (j = 0; j < nsegs; j++) {
1786                 bus_size_t seg_len;
1787                 bus_addr_t seg_addr;
1788
1789                 tx_buffer = &txr->tx_buffers[i];
1790                 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1791                 seg_addr = segs[j].ds_addr;
1792                 seg_len  = segs[j].ds_len;
1793
1794                 txd->read.buffer_addr = htole64(seg_addr);
1795                 txd->read.cmd_type_len = htole32(
1796                     adapter->txd_cmd | cmd_type_len | seg_len);
1797                 txd->read.olinfo_status = htole32(olinfo_status);
1798                 last = i;
1799                 if (++i == adapter->num_tx_desc)
1800                         i = 0;
1801                 tx_buffer->m_head = NULL;
1802                 tx_buffer->next_eop = -1;
1803         }
1804
1805         txr->next_avail_desc = i;
1806         txr->tx_avail -= nsegs;
1807
1808         tx_buffer->m_head = m_head;
1809         tx_buffer_mapped->map = tx_buffer->map;
1810         tx_buffer->map = map;
1811         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1812
1813         /*
1814          * Last Descriptor of Packet
1815          * needs End Of Packet (EOP)
1816          * and Report Status (RS)
1817          */
1818         txd->read.cmd_type_len |=
1819             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1820         /*
1821          * Keep track in the first buffer which
1822          * descriptor will be written back
1823          */
1824         tx_buffer = &txr->tx_buffers[first];
1825         tx_buffer->next_eop = last;
1826
1827         /*
1828          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1829          * that this frame is available to transmit.
1830          */
1831         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1832             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1833         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1834         ++txr->tx_packets;
1835
1836         return (0);
1837
1838 }
1839
1840 static void
1841 igb_set_promisc(struct adapter *adapter)
1842 {
1843         struct ifnet    *ifp = adapter->ifp;
1844         uint32_t        reg_rctl;
1845
1846         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1847
1848         if (ifp->if_flags & IFF_PROMISC) {
1849                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1850                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1851         } else if (ifp->if_flags & IFF_ALLMULTI) {
1852                 reg_rctl |= E1000_RCTL_MPE;
1853                 reg_rctl &= ~E1000_RCTL_UPE;
1854                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1855         }
1856 }
1857
1858 static void
1859 igb_disable_promisc(struct adapter *adapter)
1860 {
1861         uint32_t        reg_rctl;
1862
1863         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1864
1865         reg_rctl &=  (~E1000_RCTL_UPE);
1866         reg_rctl &=  (~E1000_RCTL_MPE);
1867         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1868 }
1869
1870
1871 /*********************************************************************
1872  *  Multicast Update
1873  *
1874  *  This routine is called whenever multicast address list is updated.
1875  *
1876  **********************************************************************/
1877
1878 static void
1879 igb_set_multi(struct adapter *adapter)
1880 {
1881         struct ifnet    *ifp = adapter->ifp;
1882         struct ifmultiaddr *ifma;
1883         u32 reg_rctl = 0;
1884         u8  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1885
1886         int mcnt = 0;
1887
1888         IOCTL_DEBUGOUT("igb_set_multi: begin");
1889
1890         if_maddr_rlock(ifp);
1891         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1892                 if (ifma->ifma_addr->sa_family != AF_LINK)
1893                         continue;
1894
1895                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1896                         break;
1897
1898                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1899                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1900                 mcnt++;
1901         }
1902         if_maddr_runlock(ifp);
1903
1904         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1905                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1906                 reg_rctl |= E1000_RCTL_MPE;
1907                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1908         } else
1909                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1910 }
1911
1912
1913 /*********************************************************************
1914  *  Timer routine
1915  *
1916  *  This routine checks for link status and updates statistics.
1917  *
1918  **********************************************************************/
1919
1920 static void
1921 igb_local_timer(void *arg)
1922 {
1923         struct adapter  *adapter = arg;
1924         struct ifnet    *ifp = adapter->ifp;
1925
1926         IGB_CORE_LOCK_ASSERT(adapter);
1927
1928         igb_update_link_status(adapter);
1929         igb_update_stats_counters(adapter);
1930
1931         if (igb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1932                 igb_print_hw_stats(adapter);
1933
1934         /*
1935          * Each second we check the watchdog to 
1936          * protect against hardware hangs.
1937          */
1938         igb_watchdog(adapter);
1939
1940         /* Trigger an RX interrupt on all queues */
1941         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->rx_mask);
1942  
1943         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1944
1945 }
1946
1947 static void
1948 igb_update_link_status(struct adapter *adapter)
1949 {
1950         struct e1000_hw *hw = &adapter->hw;
1951         struct ifnet *ifp = adapter->ifp;
1952         device_t dev = adapter->dev;
1953         struct tx_ring *txr = adapter->tx_rings;
1954         u32 link_check = 0;
1955
1956         /* Get the cached link value or read for real */
1957         switch (hw->phy.media_type) {
1958         case e1000_media_type_copper:
1959                 if (hw->mac.get_link_status) {
1960                         /* Do the work to read phy */
1961                         e1000_check_for_link(hw);
1962                         link_check = !hw->mac.get_link_status;
1963                 } else
1964                         link_check = TRUE;
1965                 break;
1966         case e1000_media_type_fiber:
1967                 e1000_check_for_link(hw);
1968                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1969                                  E1000_STATUS_LU);
1970                 break;
1971         case e1000_media_type_internal_serdes:
1972                 e1000_check_for_link(hw);
1973                 link_check = adapter->hw.mac.serdes_has_link;
1974                 break;
1975         default:
1976         case e1000_media_type_unknown:
1977                 break;
1978         }
1979
1980         /* Now we check if a transition has happened */
1981         if (link_check && (adapter->link_active == 0)) {
1982                 e1000_get_speed_and_duplex(&adapter->hw, 
1983                     &adapter->link_speed, &adapter->link_duplex);
1984                 if (bootverbose)
1985                         device_printf(dev, "Link is up %d Mbps %s\n",
1986                             adapter->link_speed,
1987                             ((adapter->link_duplex == FULL_DUPLEX) ?
1988                             "Full Duplex" : "Half Duplex"));
1989                 adapter->link_active = 1;
1990                 ifp->if_baudrate = adapter->link_speed * 1000000;
1991                 if_link_state_change(ifp, LINK_STATE_UP);
1992         } else if (!link_check && (adapter->link_active == 1)) {
1993                 ifp->if_baudrate = adapter->link_speed = 0;
1994                 adapter->link_duplex = 0;
1995                 if (bootverbose)
1996                         device_printf(dev, "Link is Down\n");
1997                 adapter->link_active = 0;
1998                 if_link_state_change(ifp, LINK_STATE_DOWN);
1999                 /* Turn off watchdogs */
2000                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2001                         txr->watchdog_timer = FALSE;
2002         }
2003 }
2004
2005 /*********************************************************************
2006  *
2007  *  This routine disables all traffic on the adapter by issuing a
2008  *  global reset on the MAC and deallocates TX/RX buffers.
2009  *
2010  **********************************************************************/
2011
2012 static void
2013 igb_stop(void *arg)
2014 {
2015         struct adapter  *adapter = arg;
2016         struct ifnet    *ifp = adapter->ifp;
2017
2018         IGB_CORE_LOCK_ASSERT(adapter);
2019
2020         INIT_DEBUGOUT("igb_stop: begin");
2021
2022         igb_disable_intr(adapter);
2023
2024         callout_stop(&adapter->timer);
2025
2026         /* Tell the stack that the interface is no longer active */
2027         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2028
2029         e1000_reset_hw(&adapter->hw);
2030         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2031 }
2032
2033
2034 /*********************************************************************
2035  *
2036  *  Determine hardware revision.
2037  *
2038  **********************************************************************/
2039 static void
2040 igb_identify_hardware(struct adapter *adapter)
2041 {
2042         device_t dev = adapter->dev;
2043
2044         /* Make sure our PCI config space has the necessary stuff set */
2045         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2046         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2047             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2048                 device_printf(dev, "Memory Access and/or Bus Master bits "
2049                     "were not set!\n");
2050                 adapter->hw.bus.pci_cmd_word |=
2051                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2052                 pci_write_config(dev, PCIR_COMMAND,
2053                     adapter->hw.bus.pci_cmd_word, 2);
2054         }
2055
2056         /* Save off the information about this board */
2057         adapter->hw.vendor_id = pci_get_vendor(dev);
2058         adapter->hw.device_id = pci_get_device(dev);
2059         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2060         adapter->hw.subsystem_vendor_id =
2061             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2062         adapter->hw.subsystem_device_id =
2063             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2064
2065         /* Do Shared Code Init and Setup */
2066         if (e1000_set_mac_type(&adapter->hw)) {
2067                 device_printf(dev, "Setup init failure\n");
2068                 return;
2069         }
2070 }
2071
2072 static int
2073 igb_allocate_pci_resources(struct adapter *adapter)
2074 {
2075         device_t        dev = adapter->dev;
2076         int             rid;
2077
2078         rid = PCIR_BAR(0);
2079         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2080             &rid, RF_ACTIVE);
2081         if (adapter->pci_mem == NULL) {
2082                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2083                 return (ENXIO);
2084         }
2085         adapter->osdep.mem_bus_space_tag =
2086             rman_get_bustag(adapter->pci_mem);
2087         adapter->osdep.mem_bus_space_handle =
2088             rman_get_bushandle(adapter->pci_mem);
2089         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2090
2091         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2092
2093         /* This will setup either MSI/X or MSI */
2094         adapter->msix = igb_setup_msix(adapter);
2095         adapter->hw.back = &adapter->osdep;
2096
2097         return (0);
2098 }
2099
2100 /*********************************************************************
2101  *
2102  *  Setup the Legacy or MSI Interrupt handler
2103  *
2104  **********************************************************************/
2105 static int
2106 igb_allocate_legacy(struct adapter *adapter)
2107 {
2108         device_t dev = adapter->dev;
2109         int error, rid = 0;
2110
2111         /* Turn off all interrupts */
2112         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2113
2114         /* MSI RID is 1 */
2115         if (adapter->msix == 1)
2116                 rid = 1;
2117
2118         /* We allocate a single interrupt resource */
2119         adapter->res = bus_alloc_resource_any(dev,
2120             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2121         if (adapter->res == NULL) {
2122                 device_printf(dev, "Unable to allocate bus resource: "
2123                     "interrupt\n");
2124                 return (ENXIO);
2125         }
2126
2127         /*
2128          * Try allocating a fast interrupt and the associated deferred
2129          * processing contexts.
2130          */
2131         TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, adapter);
2132         adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2133             taskqueue_thread_enqueue, &adapter->tq);
2134         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2135             device_get_nameunit(adapter->dev));
2136         if ((error = bus_setup_intr(dev, adapter->res,
2137             INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2138             adapter, &adapter->tag)) != 0) {
2139                 device_printf(dev, "Failed to register fast interrupt "
2140                             "handler: %d\n", error);
2141                 taskqueue_free(adapter->tq);
2142                 adapter->tq = NULL;
2143                 return (error);
2144         }
2145
2146         return (0);
2147 }
2148
2149
2150 /*********************************************************************
2151  *
2152  *  Setup the MSIX Interrupt handlers: 
2153  *
2154  **********************************************************************/
2155 static int
2156 igb_allocate_msix(struct adapter *adapter)
2157 {
2158         device_t dev = adapter->dev;
2159         struct tx_ring *txr = adapter->tx_rings;
2160         struct rx_ring *rxr = adapter->rx_rings;
2161         int error, rid, vector = 0;
2162
2163         /*
2164          * Setup the interrupt handlers
2165          */
2166
2167         /* TX Setup */
2168         for (int i = 0; i < adapter->num_queues; i++, vector++, txr++) {
2169                 rid = vector +1;
2170                 txr->res = bus_alloc_resource_any(dev,
2171                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2172                 if (txr->res == NULL) {
2173                         device_printf(dev,
2174                             "Unable to allocate bus resource: "
2175                             "MSIX TX Interrupt\n");
2176                         return (ENXIO);
2177                 }
2178                 error = bus_setup_intr(dev, txr->res,
2179                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2180                     igb_msix_tx, txr, &txr->tag);
2181                 if (error) {
2182                         txr->res = NULL;
2183                         device_printf(dev, "Failed to register TX handler");
2184                         return (error);
2185                 }
2186                 /* Make tasklet for deferred handling - one per queue */
2187                 TASK_INIT(&txr->tx_task, 0, igb_handle_tx, txr);
2188                 txr->msix = vector;
2189                 if (adapter->hw.mac.type == e1000_82575)
2190                         txr->eims = E1000_EICR_TX_QUEUE0 << i;
2191                 else
2192                         txr->eims = 1 << vector;
2193                 /*
2194                 ** Bind the msix vector, and thus the
2195                 ** ring to the corresponding cpu.
2196                 */
2197                 if (adapter->num_queues > 1)
2198                         bus_bind_intr(dev, txr->res, i);
2199         }
2200
2201         /* RX Setup */
2202         for (int i = 0; i < adapter->num_queues; i++, vector++, rxr++) {
2203                 rid = vector +1;
2204                 rxr->res = bus_alloc_resource_any(dev,
2205                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2206                 if (rxr->res == NULL) {
2207                         device_printf(dev,
2208                             "Unable to allocate bus resource: "
2209                             "MSIX RX Interrupt\n");
2210                         return (ENXIO);
2211                 }
2212                 error = bus_setup_intr(dev, rxr->res,
2213                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2214                     igb_msix_rx, rxr, &rxr->tag);
2215                 if (error) {
2216                         rxr->res = NULL;
2217                         device_printf(dev, "Failed to register RX handler");
2218                         return (error);
2219                 }
2220                 /* Make tasklet for deferred handling - one per queue */
2221                 TASK_INIT(&rxr->rx_task, 0, igb_handle_rx, rxr);
2222                 rxr->msix = vector;
2223                 if (adapter->hw.mac.type == e1000_82575)
2224                         rxr->eims = E1000_EICR_RX_QUEUE0 << i;
2225                 else
2226                         rxr->eims = 1 << vector;
2227                 /* Get a mask for local timer */
2228                 adapter->rx_mask |= rxr->eims;
2229                 /*
2230                 ** Bind the msix vector, and thus the
2231                 ** ring to the corresponding cpu.
2232                 ** Notice that this makes an RX/TX pair
2233                 ** bound to each CPU, limited by the MSIX
2234                 ** vectors.
2235                 */
2236                 if (adapter->num_queues > 1)
2237                         bus_bind_intr(dev, rxr->res, i);
2238         }
2239
2240         /* And Link */
2241         rid = vector +1;
2242         adapter->res = bus_alloc_resource_any(dev,
2243             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2244         if (adapter->res == NULL) {
2245                 device_printf(dev,
2246                     "Unable to allocate bus resource: "
2247                     "MSIX Link Interrupt\n");
2248                 return (ENXIO);
2249         }
2250         if ((error = bus_setup_intr(dev, adapter->res,
2251             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2252             igb_msix_link, adapter, &adapter->tag)) != 0) {
2253                 device_printf(dev, "Failed to register Link handler");
2254                 return (error);
2255         }
2256         adapter->linkvec = vector;
2257         adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2258             taskqueue_thread_enqueue, &adapter->tq);
2259         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2260             device_get_nameunit(adapter->dev));
2261
2262         return (0);
2263 }
2264
2265
2266 static void
2267 igb_configure_queues(struct adapter *adapter)
2268 {
2269         struct  e1000_hw *hw = &adapter->hw;
2270         struct  tx_ring *txr;
2271         struct  rx_ring *rxr;
2272
2273         /* Turn on MSIX */
2274         /*
2275         ** 82576 uses IVARs to route MSI/X
2276         ** interrupts, its not very intuitive,
2277         ** study the code carefully :)
2278         */
2279         if (adapter->hw.mac.type == e1000_82576) {
2280                 u32     ivar = 0;
2281                 /* First turn on the capability */
2282                 E1000_WRITE_REG(hw, E1000_GPIE,
2283                     E1000_GPIE_MSIX_MODE |
2284                     E1000_GPIE_EIAME |
2285                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2286                 /* RX */
2287                 for (int i = 0; i < adapter->num_queues; i++) {
2288                         u32 index = i & 0x7; /* Each IVAR has two entries */
2289                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2290                         rxr = &adapter->rx_rings[i];
2291                         if (i < 8) {
2292                                 ivar &= 0xFFFFFF00;
2293                                 ivar |= rxr->msix | E1000_IVAR_VALID;
2294                         } else {
2295                                 ivar &= 0xFF00FFFF;
2296                                 ivar |= (rxr->msix | E1000_IVAR_VALID) << 16;
2297                         }
2298                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2299                         adapter->eims_mask |= rxr->eims;
2300                 }
2301                 /* TX */
2302                 for (int i = 0; i < adapter->num_queues; i++) {
2303                         u32 index = i & 0x7; /* Each IVAR has two entries */
2304                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2305                         txr = &adapter->tx_rings[i];
2306                         if (i < 8) {
2307                                 ivar &= 0xFFFF00FF;
2308                                 ivar |= (txr->msix | E1000_IVAR_VALID) << 8;
2309                         } else {
2310                                 ivar &= 0x00FFFFFF;
2311                                 ivar |= (txr->msix | E1000_IVAR_VALID) << 24;
2312                         }
2313                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2314                         adapter->eims_mask |= txr->eims;
2315                 }
2316
2317                 /* And for the link interrupt */
2318                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2319                 adapter->link_mask = 1 << adapter->linkvec;
2320                 adapter->eims_mask |= adapter->link_mask;
2321                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2322         } else
2323         { /* 82575 */
2324                 int tmp;
2325
2326                 /* enable MSI-X PBA support*/
2327                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2328                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2329                 /* Auto-Mask interrupts upon ICR read. */
2330                 tmp |= E1000_CTRL_EXT_EIAME;
2331                 tmp |= E1000_CTRL_EXT_IRCA;
2332                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2333
2334                 /* TX */
2335                 for (int i = 0; i < adapter->num_queues; i++) {
2336                         txr = &adapter->tx_rings[i];
2337                         E1000_WRITE_REG(hw, E1000_MSIXBM(txr->msix),
2338                             txr->eims);
2339                         adapter->eims_mask |= txr->eims;
2340                 }
2341
2342                 /* RX */
2343                 for (int i = 0; i < adapter->num_queues; i++) {
2344                         rxr = &adapter->rx_rings[i];
2345                         E1000_WRITE_REG(hw, E1000_MSIXBM(rxr->msix),
2346                             rxr->eims);
2347                         adapter->eims_mask |= rxr->eims;
2348                 }
2349
2350                 /* Link */
2351                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2352                     E1000_EIMS_OTHER);
2353                 adapter->link_mask |= E1000_EIMS_OTHER;
2354                 adapter->eims_mask |= adapter->link_mask;
2355         }
2356         return;
2357 }
2358
2359
2360 static void
2361 igb_free_pci_resources(struct adapter *adapter)
2362 {
2363         struct          tx_ring *txr = adapter->tx_rings;
2364         struct          rx_ring *rxr = adapter->rx_rings;
2365         device_t        dev = adapter->dev;
2366         int             rid;
2367
2368         /*
2369         ** There is a slight possibility of a failure mode
2370         ** in attach that will result in entering this function
2371         ** before interrupt resources have been initialized, and
2372         ** in that case we do not want to execute the loops below
2373         ** We can detect this reliably by the state of the adapter
2374         ** res pointer.
2375         */
2376         if (adapter->res == NULL)
2377                 goto mem;
2378
2379         /*
2380          * First release all the TX/RX interrupt resources:
2381          */
2382         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2383                 rid = txr->msix + 1;
2384                 if (txr->tag != NULL) {
2385                         bus_teardown_intr(dev, txr->res, txr->tag);
2386                         txr->tag = NULL;
2387                 }
2388                 if (txr->res != NULL)
2389                         bus_release_resource(dev, SYS_RES_IRQ, rid, txr->res);
2390         }
2391
2392         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
2393                 rid = rxr->msix + 1;
2394                 if (rxr->tag != NULL) {
2395                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2396                         rxr->tag = NULL;
2397                 }
2398                 if (rxr->res != NULL)
2399                         bus_release_resource(dev, SYS_RES_IRQ, rid, rxr->res);
2400         }
2401
2402         /* Clean the Legacy or Link interrupt last */
2403         if (adapter->linkvec) /* we are doing MSIX */
2404                 rid = adapter->linkvec + 1;
2405         else
2406                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2407
2408         if (adapter->tag != NULL) {
2409                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2410                 adapter->tag = NULL;
2411         }
2412         if (adapter->res != NULL)
2413                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2414
2415 mem:
2416         if (adapter->msix)
2417                 pci_release_msi(dev);
2418
2419         if (adapter->msix_mem != NULL)
2420                 bus_release_resource(dev, SYS_RES_MEMORY,
2421                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2422
2423         if (adapter->pci_mem != NULL)
2424                 bus_release_resource(dev, SYS_RES_MEMORY,
2425                     PCIR_BAR(0), adapter->pci_mem);
2426
2427 }
2428
2429 /*
2430  * Setup Either MSI/X or MSI
2431  */
2432 static int
2433 igb_setup_msix(struct adapter *adapter)
2434 {
2435         device_t dev = adapter->dev;
2436         int rid, want, queues, msgs;
2437
2438         /* First try MSI/X */
2439         rid = PCIR_BAR(IGB_MSIX_BAR);
2440         adapter->msix_mem = bus_alloc_resource_any(dev,
2441             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2442         if (!adapter->msix_mem) {
2443                 /* May not be enabled */
2444                 device_printf(adapter->dev,
2445                     "Unable to map MSIX table \n");
2446                 goto msi;
2447         }
2448
2449         msgs = pci_msix_count(dev); 
2450         if (msgs == 0) { /* system has msix disabled */
2451                 bus_release_resource(dev, SYS_RES_MEMORY,
2452                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2453                 adapter->msix_mem = NULL;
2454                 goto msi;
2455         }
2456
2457         /* Figure out a reasonable auto config value */
2458         queues = (mp_ncpus > ((msgs-1)/2)) ? (msgs-1)/2 : mp_ncpus;
2459
2460         if (igb_num_queues == 0)
2461                 igb_num_queues = queues;
2462         /*
2463         ** Two vectors (RX/TX pair) per queue
2464         ** plus an additional for Link interrupt
2465         */
2466         want = (igb_num_queues * 2) + 1;
2467         if (msgs >= want)
2468                 msgs = want;
2469         else {
2470                 device_printf(adapter->dev,
2471                     "MSIX Configuration Problem, "
2472                     "%d vectors configured, but %d queues wanted!\n",
2473                     msgs, want);
2474                 return (ENXIO);
2475         }
2476         if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2477                 device_printf(adapter->dev,
2478                     "Using MSIX interrupts with %d vectors\n", msgs);
2479                 adapter->num_queues = igb_num_queues;
2480                 return (msgs);
2481         }
2482 msi:
2483         msgs = pci_msi_count(dev);
2484         if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2485                 device_printf(adapter->dev,"Using MSI interrupt\n");
2486         return (msgs);
2487 }
2488
2489 /*********************************************************************
2490  *
2491  *  Initialize the hardware to a configuration
2492  *  as specified by the adapter structure.
2493  *
2494  **********************************************************************/
2495 static int
2496 igb_hardware_init(struct adapter *adapter)
2497 {
2498         device_t        dev = adapter->dev;
2499         u32             rx_buffer_size;
2500
2501         INIT_DEBUGOUT("igb_hardware_init: begin");
2502
2503         /* Issue a global reset */
2504         e1000_reset_hw(&adapter->hw);
2505
2506         /* Let the firmware know the OS is in control */
2507         igb_get_hw_control(adapter);
2508
2509         /*
2510          * These parameters control the automatic generation (Tx) and
2511          * response (Rx) to Ethernet PAUSE frames.
2512          * - High water mark should allow for at least two frames to be
2513          *   received after sending an XOFF.
2514          * - Low water mark works best when it is very near the high water mark.
2515          *   This allows the receiver to restart by sending XON when it has
2516          *   drained a bit. Here we use an arbitary value of 1500 which will
2517          *   restart after one full frame is pulled from the buffer. There
2518          *   could be several smaller frames in the buffer and if so they will
2519          *   not trigger the XON until their total number reduces the buffer
2520          *   by 1500.
2521          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2522          */
2523         if (adapter->hw.mac.type == e1000_82576)
2524                 rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
2525                     E1000_RXPBS) & 0xffff) << 10 );
2526         else
2527                 rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
2528                     E1000_PBA) & 0xffff) << 10 );
2529
2530         adapter->hw.fc.high_water = rx_buffer_size -
2531             roundup2(adapter->max_frame_size, 1024);
2532         adapter->hw.fc.low_water = adapter->hw.fc.high_water - 1500;
2533
2534         adapter->hw.fc.pause_time = IGB_FC_PAUSE_TIME;
2535         adapter->hw.fc.send_xon = TRUE;
2536
2537         /* Set Flow control, use the tunable location if sane */
2538         if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2539                 adapter->hw.fc.requested_mode = igb_fc_setting;
2540         else
2541                 adapter->hw.fc.requested_mode = e1000_fc_none;
2542
2543         if (e1000_init_hw(&adapter->hw) < 0) {
2544                 device_printf(dev, "Hardware Initialization Failed\n");
2545                 return (EIO);
2546         }
2547
2548         e1000_check_for_link(&adapter->hw);
2549
2550         return (0);
2551 }
2552
2553 /*********************************************************************
2554  *
2555  *  Setup networking device structure and register an interface.
2556  *
2557  **********************************************************************/
2558 static void
2559 igb_setup_interface(device_t dev, struct adapter *adapter)
2560 {
2561         struct ifnet   *ifp;
2562
2563         INIT_DEBUGOUT("igb_setup_interface: begin");
2564
2565         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2566         if (ifp == NULL)
2567                 panic("%s: can not if_alloc()", device_get_nameunit(dev));
2568         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2569         ifp->if_mtu = ETHERMTU;
2570         ifp->if_init =  igb_init;
2571         ifp->if_softc = adapter;
2572         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2573         ifp->if_ioctl = igb_ioctl;
2574         ifp->if_start = igb_start;
2575 #if __FreeBSD_version >= 800000
2576         ifp->if_transmit = igb_mq_start;
2577         ifp->if_qflush = igb_qflush;
2578 #endif
2579         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2580         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2581         IFQ_SET_READY(&ifp->if_snd);
2582
2583         ether_ifattach(ifp, adapter->hw.mac.addr);
2584
2585         ifp->if_capabilities = ifp->if_capenable = 0;
2586
2587         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_MTU;
2588         ifp->if_capabilities |= IFCAP_TSO4;
2589         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2590         ifp->if_capenable = ifp->if_capabilities;
2591
2592         /*
2593          * Tell the upper layer(s) we support long frames.
2594          */
2595         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2596         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2597         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2598
2599         /*
2600          * Specify the media types supported by this adapter and register
2601          * callbacks to update media and link information
2602          */
2603         ifmedia_init(&adapter->media, IFM_IMASK,
2604             igb_media_change, igb_media_status);
2605         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2606             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2607                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
2608                             0, NULL);
2609                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2610         } else {
2611                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2612                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2613                             0, NULL);
2614                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2615                             0, NULL);
2616                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2617                             0, NULL);
2618                 if (adapter->hw.phy.type != e1000_phy_ife) {
2619                         ifmedia_add(&adapter->media,
2620                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2621                         ifmedia_add(&adapter->media,
2622                                 IFM_ETHER | IFM_1000_T, 0, NULL);
2623                 }
2624         }
2625         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2626         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2627 }
2628
2629
2630 /*
2631  * Manage DMA'able memory.
2632  */
2633 static void
2634 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2635 {
2636         if (error)
2637                 return;
2638         *(bus_addr_t *) arg = segs[0].ds_addr;
2639 }
2640
2641 static int
2642 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2643         struct igb_dma_alloc *dma, int mapflags)
2644 {
2645         int error;
2646
2647         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2648                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
2649                                 BUS_SPACE_MAXADDR,      /* lowaddr */
2650                                 BUS_SPACE_MAXADDR,      /* highaddr */
2651                                 NULL, NULL,             /* filter, filterarg */
2652                                 size,                   /* maxsize */
2653                                 1,                      /* nsegments */
2654                                 size,                   /* maxsegsize */
2655                                 0,                      /* flags */
2656                                 NULL,                   /* lockfunc */
2657                                 NULL,                   /* lockarg */
2658                                 &dma->dma_tag);
2659         if (error) {
2660                 device_printf(adapter->dev,
2661                     "%s: bus_dma_tag_create failed: %d\n",
2662                     __func__, error);
2663                 goto fail_0;
2664         }
2665
2666         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2667             BUS_DMA_NOWAIT, &dma->dma_map);
2668         if (error) {
2669                 device_printf(adapter->dev,
2670                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2671                     __func__, (uintmax_t)size, error);
2672                 goto fail_2;
2673         }
2674
2675         dma->dma_paddr = 0;
2676         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2677             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2678         if (error || dma->dma_paddr == 0) {
2679                 device_printf(adapter->dev,
2680                     "%s: bus_dmamap_load failed: %d\n",
2681                     __func__, error);
2682                 goto fail_3;
2683         }
2684
2685         return (0);
2686
2687 fail_3:
2688         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2689 fail_2:
2690         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2691         bus_dma_tag_destroy(dma->dma_tag);
2692 fail_0:
2693         dma->dma_map = NULL;
2694         dma->dma_tag = NULL;
2695
2696         return (error);
2697 }
2698
2699 static void
2700 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2701 {
2702         if (dma->dma_tag == NULL)
2703                 return;
2704         if (dma->dma_map != NULL) {
2705                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2706                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2707                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2708                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2709                 dma->dma_map = NULL;
2710         }
2711         bus_dma_tag_destroy(dma->dma_tag);
2712         dma->dma_tag = NULL;
2713 }
2714
2715
2716 /*********************************************************************
2717  *
2718  *  Allocate memory for the transmit and receive rings, and then
2719  *  the descriptors associated with each, called only once at attach.
2720  *
2721  **********************************************************************/
2722 static int
2723 igb_allocate_queues(struct adapter *adapter)
2724 {
2725         device_t dev = adapter->dev;
2726         struct tx_ring *txr;
2727         struct rx_ring *rxr;
2728         int rsize, tsize, error = E1000_SUCCESS;
2729         int txconf = 0, rxconf = 0;
2730
2731         /* First allocate the TX ring struct memory */
2732         if (!(adapter->tx_rings =
2733             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2734             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2735                 device_printf(dev, "Unable to allocate TX ring memory\n");
2736                 error = ENOMEM;
2737                 goto fail;
2738         }
2739         txr = adapter->tx_rings;
2740
2741         /* Next allocate the RX */
2742         if (!(adapter->rx_rings =
2743             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2744             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2745                 device_printf(dev, "Unable to allocate RX ring memory\n");
2746                 error = ENOMEM;
2747                 goto rx_fail;
2748         }
2749         rxr = adapter->rx_rings;
2750
2751         tsize = roundup2(adapter->num_tx_desc *
2752             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2753         /*
2754          * Now set up the TX queues, txconf is needed to handle the
2755          * possibility that things fail midcourse and we need to
2756          * undo memory gracefully
2757          */ 
2758         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2759                 /* Set up some basics */
2760                 txr = &adapter->tx_rings[i];
2761                 txr->adapter = adapter;
2762                 txr->me = i;
2763
2764                 /* Initialize the TX lock */
2765                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2766                     device_get_nameunit(dev), txr->me);
2767                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2768
2769                 if (igb_dma_malloc(adapter, tsize,
2770                         &txr->txdma, BUS_DMA_NOWAIT)) {
2771                         device_printf(dev,
2772                             "Unable to allocate TX Descriptor memory\n");
2773                         error = ENOMEM;
2774                         goto err_tx_desc;
2775                 }
2776                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2777                 bzero((void *)txr->tx_base, tsize);
2778
2779                 /* Now allocate transmit buffers for the ring */
2780                 if (igb_allocate_transmit_buffers(txr)) {
2781                         device_printf(dev,
2782                             "Critical Failure setting up transmit buffers\n");
2783                         error = ENOMEM;
2784                         goto err_tx_desc;
2785                 }
2786 #if __FreeBSD_version >= 800000
2787                 /* Allocate a buf ring */
2788                 txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2789                     M_WAITOK, &txr->tx_mtx);
2790 #endif
2791         }
2792
2793         /*
2794          * Next the RX queues...
2795          */ 
2796         rsize = roundup2(adapter->num_rx_desc *
2797             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2798         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2799                 rxr = &adapter->rx_rings[i];
2800                 rxr->adapter = adapter;
2801                 rxr->me = i;
2802
2803                 /* Initialize the RX lock */
2804                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2805                     device_get_nameunit(dev), txr->me);
2806                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2807
2808                 if (igb_dma_malloc(adapter, rsize,
2809                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2810                         device_printf(dev,
2811                             "Unable to allocate RxDescriptor memory\n");
2812                         error = ENOMEM;
2813                         goto err_rx_desc;
2814                 }
2815                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2816                 bzero((void *)rxr->rx_base, rsize);
2817
2818                 /* Allocate receive buffers for the ring*/
2819                 if (igb_allocate_receive_buffers(rxr)) {
2820                         device_printf(dev,
2821                             "Critical Failure setting up receive buffers\n");
2822                         error = ENOMEM;
2823                         goto err_rx_desc;
2824                 }
2825         }
2826
2827         return (0);
2828
2829 err_rx_desc:
2830         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2831                 igb_dma_free(adapter, &rxr->rxdma);
2832 err_tx_desc:
2833         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2834                 igb_dma_free(adapter, &txr->txdma);
2835         free(adapter->rx_rings, M_DEVBUF);
2836 rx_fail:
2837         free(adapter->tx_rings, M_DEVBUF);
2838 fail:
2839         return (error);
2840 }
2841
2842 /*********************************************************************
2843  *
2844  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2845  *  the information needed to transmit a packet on the wire. This is
2846  *  called only once at attach, setup is done every reset.
2847  *
2848  **********************************************************************/
2849 static int
2850 igb_allocate_transmit_buffers(struct tx_ring *txr)
2851 {
2852         struct adapter *adapter = txr->adapter;
2853         device_t dev = adapter->dev;
2854         struct igb_tx_buffer *txbuf;
2855         int error, i;
2856
2857         /*
2858          * Setup DMA descriptor areas.
2859          */
2860         if ((error = bus_dma_tag_create(NULL,           /* parent */
2861                                PAGE_SIZE, 0,            /* alignment, bounds */
2862                                BUS_SPACE_MAXADDR,       /* lowaddr */
2863                                BUS_SPACE_MAXADDR,       /* highaddr */
2864                                NULL, NULL,              /* filter, filterarg */
2865                                IGB_TSO_SIZE,            /* maxsize */
2866                                IGB_MAX_SCATTER,         /* nsegments */
2867                                PAGE_SIZE,               /* maxsegsize */
2868                                0,                       /* flags */
2869                                NULL,                    /* lockfunc */
2870                                NULL,                    /* lockfuncarg */
2871                                &txr->txtag))) {
2872                 device_printf(dev,"Unable to allocate TX DMA tag\n");
2873                 goto fail;
2874         }
2875
2876         if (!(txr->tx_buffers =
2877             (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
2878             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2879                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
2880                 error = ENOMEM;
2881                 goto fail;
2882         }
2883
2884         /* Create the descriptor buffer dma maps */
2885         txbuf = txr->tx_buffers;
2886         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2887                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2888                 if (error != 0) {
2889                         device_printf(dev, "Unable to create TX DMA map\n");
2890                         goto fail;
2891                 }
2892         }
2893
2894         return 0;
2895 fail:
2896         /* We free all, it handles case where we are in the middle */
2897         igb_free_transmit_structures(adapter);
2898         return (error);
2899 }
2900
2901 /*********************************************************************
2902  *
2903  *  Initialize a transmit ring.
2904  *
2905  **********************************************************************/
2906 static void
2907 igb_setup_transmit_ring(struct tx_ring *txr)
2908 {
2909         struct adapter *adapter = txr->adapter;
2910         struct igb_tx_buffer *txbuf;
2911         int i;
2912
2913         /* Clear the old descriptor contents */
2914         bzero((void *)txr->tx_base,
2915               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
2916         /* Reset indices */
2917         txr->next_avail_desc = 0;
2918         txr->next_to_clean = 0;
2919
2920         /* Free any existing tx buffers. */
2921         txbuf = txr->tx_buffers;
2922         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2923                 if (txbuf->m_head != NULL) {
2924                         bus_dmamap_sync(txr->txtag, txbuf->map,
2925                             BUS_DMASYNC_POSTWRITE);
2926                         bus_dmamap_unload(txr->txtag, txbuf->map);
2927                         m_freem(txbuf->m_head);
2928                         txbuf->m_head = NULL;
2929                 }
2930                 /* clear the watch index */
2931                 txbuf->next_eop = -1;
2932         }
2933
2934         /* Set number of descriptors available */
2935         txr->tx_avail = adapter->num_tx_desc;
2936
2937         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2938             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2939
2940 }
2941
2942 /*********************************************************************
2943  *
2944  *  Initialize all transmit rings.
2945  *
2946  **********************************************************************/
2947 static void
2948 igb_setup_transmit_structures(struct adapter *adapter)
2949 {
2950         struct tx_ring *txr = adapter->tx_rings;
2951
2952         for (int i = 0; i < adapter->num_queues; i++, txr++)
2953                 igb_setup_transmit_ring(txr);
2954
2955         return;
2956 }
2957
2958 /*********************************************************************
2959  *
2960  *  Enable transmit unit.
2961  *
2962  **********************************************************************/
2963 static void
2964 igb_initialize_transmit_units(struct adapter *adapter)
2965 {
2966         struct tx_ring  *txr = adapter->tx_rings;
2967         u32             tctl, txdctl;
2968
2969          INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
2970
2971         /* Setup the Base and Length of the Tx Descriptor Rings */
2972         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2973                 u64 bus_addr = txr->txdma.dma_paddr;
2974
2975                 E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(i),
2976                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
2977                 E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(i),
2978                     (uint32_t)(bus_addr >> 32));
2979                 E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(i),
2980                     (uint32_t)bus_addr);
2981
2982                 /* Setup the HW Tx Head and Tail descriptor pointers */
2983                 E1000_WRITE_REG(&adapter->hw, E1000_TDT(i), 0);
2984                 E1000_WRITE_REG(&adapter->hw, E1000_TDH(i), 0);
2985
2986                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
2987                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
2988                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
2989
2990                 /* Setup Transmit Descriptor Base Settings */   
2991                 adapter->txd_cmd = E1000_TXD_CMD_IFCS;
2992
2993                 txdctl = E1000_READ_REG(&adapter->hw, E1000_TXDCTL(i));
2994                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2995                 E1000_WRITE_REG(&adapter->hw, E1000_TXDCTL(i), txdctl);
2996         }
2997
2998         /* Program the Transmit Control Register */
2999         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3000         tctl &= ~E1000_TCTL_CT;
3001         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3002                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3003
3004         e1000_config_collision_dist(&adapter->hw);
3005
3006         /* This write will effectively turn on the transmit unit. */
3007         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3008
3009 }
3010
3011 /*********************************************************************
3012  *
3013  *  Free all transmit rings.
3014  *
3015  **********************************************************************/
3016 static void
3017 igb_free_transmit_structures(struct adapter *adapter)
3018 {
3019         struct tx_ring *txr = adapter->tx_rings;
3020
3021         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3022                 IGB_TX_LOCK(txr);
3023                 igb_free_transmit_buffers(txr);
3024                 igb_dma_free(adapter, &txr->txdma);
3025                 IGB_TX_UNLOCK(txr);
3026                 IGB_TX_LOCK_DESTROY(txr);
3027         }
3028         free(adapter->tx_rings, M_DEVBUF);
3029 }
3030
3031 /*********************************************************************
3032  *
3033  *  Free transmit ring related data structures.
3034  *
3035  **********************************************************************/
3036 static void
3037 igb_free_transmit_buffers(struct tx_ring *txr)
3038 {
3039         struct adapter *adapter = txr->adapter;
3040         struct igb_tx_buffer *tx_buffer;
3041         int             i;
3042
3043         INIT_DEBUGOUT("free_transmit_ring: begin");
3044
3045         if (txr->tx_buffers == NULL)
3046                 return;
3047
3048         tx_buffer = txr->tx_buffers;
3049         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3050                 if (tx_buffer->m_head != NULL) {
3051                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3052                             BUS_DMASYNC_POSTWRITE);
3053                         bus_dmamap_unload(txr->txtag,
3054                             tx_buffer->map);
3055                         m_freem(tx_buffer->m_head);
3056                         tx_buffer->m_head = NULL;
3057                         if (tx_buffer->map != NULL) {
3058                                 bus_dmamap_destroy(txr->txtag,
3059                                     tx_buffer->map);
3060                                 tx_buffer->map = NULL;
3061                         }
3062                 } else if (tx_buffer->map != NULL) {
3063                         bus_dmamap_unload(txr->txtag,
3064                             tx_buffer->map);
3065                         bus_dmamap_destroy(txr->txtag,
3066                             tx_buffer->map);
3067                         tx_buffer->map = NULL;
3068                 }
3069         }
3070 #if __FreeBSD_version >= 800000
3071         if (txr->br != NULL)
3072                 buf_ring_free(txr->br, M_DEVBUF);
3073 #endif
3074         if (txr->tx_buffers != NULL) {
3075                 free(txr->tx_buffers, M_DEVBUF);
3076                 txr->tx_buffers = NULL;
3077         }
3078         if (txr->txtag != NULL) {
3079                 bus_dma_tag_destroy(txr->txtag);
3080                 txr->txtag = NULL;
3081         }
3082         return;
3083 }
3084
3085 /**********************************************************************
3086  *
3087  *  Setup work for hardware segmentation offload (TSO) on
3088  *  adapters using advanced tx descriptors (82575)
3089  *
3090  **********************************************************************/
3091 static boolean_t
3092 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3093 {
3094         struct adapter *adapter = txr->adapter;
3095         struct e1000_adv_tx_context_desc *TXD;
3096         struct igb_tx_buffer        *tx_buffer;
3097         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3098         u32 mss_l4len_idx = 0;
3099         u16 vtag = 0;
3100         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3101         struct ether_vlan_header *eh;
3102         struct ip *ip;
3103         struct tcphdr *th;
3104
3105
3106         /*
3107          * Determine where frame payload starts.
3108          * Jump over vlan headers if already present
3109          */
3110         eh = mtod(mp, struct ether_vlan_header *);
3111         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3112                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3113         else
3114                 ehdrlen = ETHER_HDR_LEN;
3115
3116         /* Ensure we have at least the IP+TCP header in the first mbuf. */
3117         if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3118                 return FALSE;
3119
3120         /* Only supports IPV4 for now */
3121         ctxd = txr->next_avail_desc;
3122         tx_buffer = &txr->tx_buffers[ctxd];
3123         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3124
3125         ip = (struct ip *)(mp->m_data + ehdrlen);
3126         if (ip->ip_p != IPPROTO_TCP)
3127                 return FALSE;   /* 0 */
3128         ip->ip_sum = 0;
3129         ip_hlen = ip->ip_hl << 2;
3130         th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3131         th->th_sum = in_pseudo(ip->ip_src.s_addr,
3132             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3133         tcp_hlen = th->th_off << 2;
3134         /*
3135          * Calculate header length, this is used
3136          * in the transmit desc in igb_xmit
3137          */
3138         *hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3139
3140         /* VLAN MACLEN IPLEN */
3141         if (mp->m_flags & M_VLANTAG) {
3142                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3143                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3144         }
3145
3146         vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3147         vlan_macip_lens |= ip_hlen;
3148         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3149
3150         /* ADV DTYPE TUCMD */
3151         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3152         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3153         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3154         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3155
3156         /* MSS L4LEN IDX */
3157         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3158         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3159         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3160
3161         TXD->seqnum_seed = htole32(0);
3162         tx_buffer->m_head = NULL;
3163         tx_buffer->next_eop = -1;
3164
3165         if (++ctxd == adapter->num_tx_desc)
3166                 ctxd = 0;
3167
3168         txr->tx_avail--;
3169         txr->next_avail_desc = ctxd;
3170         return TRUE;
3171 }
3172
3173
3174 /*********************************************************************
3175  *
3176  *  Context Descriptor setup for VLAN or CSUM
3177  *
3178  **********************************************************************/
3179
3180 static bool
3181 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3182 {
3183         struct adapter *adapter = txr->adapter;
3184         struct e1000_adv_tx_context_desc *TXD;
3185         struct igb_tx_buffer        *tx_buffer;
3186         uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3187         struct ether_vlan_header *eh;
3188         struct ip *ip = NULL;
3189         struct ip6_hdr *ip6;
3190         int  ehdrlen, ctxd, ip_hlen = 0;
3191         u16     etype, vtag = 0;
3192         u8      ipproto = 0;
3193         bool    offload = TRUE;
3194
3195         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3196                 offload = FALSE;
3197
3198         ctxd = txr->next_avail_desc;
3199         tx_buffer = &txr->tx_buffers[ctxd];
3200         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3201
3202         /*
3203         ** In advanced descriptors the vlan tag must 
3204         ** be placed into the context descriptor, thus
3205         ** we need to be here just for that setup.
3206         */
3207         if (mp->m_flags & M_VLANTAG) {
3208                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3209                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3210         } else if (offload == FALSE)
3211                 return FALSE;
3212
3213         /*
3214          * Determine where frame payload starts.
3215          * Jump over vlan headers if already present,
3216          * helpful for QinQ too.
3217          */
3218         eh = mtod(mp, struct ether_vlan_header *);
3219         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3220                 etype = ntohs(eh->evl_proto);
3221                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3222         } else {
3223                 etype = ntohs(eh->evl_encap_proto);
3224                 ehdrlen = ETHER_HDR_LEN;
3225         }
3226
3227         /* Set the ether header length */
3228         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3229
3230         switch (etype) {
3231                 case ETHERTYPE_IP:
3232                         ip = (struct ip *)(mp->m_data + ehdrlen);
3233                         ip_hlen = ip->ip_hl << 2;
3234                         if (mp->m_len < ehdrlen + ip_hlen) {
3235                                 offload = FALSE;
3236                                 break;
3237                         }
3238                         ipproto = ip->ip_p;
3239                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3240                         break;
3241                 case ETHERTYPE_IPV6:
3242                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3243                         ip_hlen = sizeof(struct ip6_hdr);
3244                         if (mp->m_len < ehdrlen + ip_hlen)
3245                                 return (FALSE);
3246                         ipproto = ip6->ip6_nxt;
3247                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3248                         break;
3249                 default:
3250                         offload = FALSE;
3251                         break;
3252         }
3253
3254         vlan_macip_lens |= ip_hlen;
3255         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3256
3257         switch (ipproto) {
3258                 case IPPROTO_TCP:
3259                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3260                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3261                         break;
3262                 case IPPROTO_UDP:
3263                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3264                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3265                         break;
3266 #if __FreeBSD_version >= 800000
3267                 case IPPROTO_SCTP:
3268                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3269                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3270                         break;
3271 #endif
3272                 default:
3273                         offload = FALSE;
3274                         break;
3275         }
3276
3277         /* Now copy bits into descriptor */
3278         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3279         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3280         TXD->seqnum_seed = htole32(0);
3281         TXD->mss_l4len_idx = htole32(0);
3282
3283         tx_buffer->m_head = NULL;
3284         tx_buffer->next_eop = -1;
3285
3286         /* We've consumed the first desc, adjust counters */
3287         if (++ctxd == adapter->num_tx_desc)
3288                 ctxd = 0;
3289         txr->next_avail_desc = ctxd;
3290         --txr->tx_avail;
3291
3292         return (offload);
3293 }
3294
3295
3296 /**********************************************************************
3297  *
3298  *  Examine each tx_buffer in the used queue. If the hardware is done
3299  *  processing the packet then free associated resources. The
3300  *  tx_buffer is put back on the free queue.
3301  *
3302  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3303  **********************************************************************/
3304 static bool
3305 igb_txeof(struct tx_ring *txr)
3306 {
3307         struct adapter  *adapter = txr->adapter;
3308         int first, last, done, num_avail;
3309         u32     cleaned = 0;
3310         struct igb_tx_buffer *tx_buffer;
3311         struct e1000_tx_desc   *tx_desc, *eop_desc;
3312         struct ifnet   *ifp = adapter->ifp;
3313
3314         IGB_TX_LOCK_ASSERT(txr);
3315
3316         if (txr->tx_avail == adapter->num_tx_desc)
3317                 return FALSE;
3318
3319         num_avail = txr->tx_avail;
3320         first = txr->next_to_clean;
3321         tx_desc = &txr->tx_base[first];
3322         tx_buffer = &txr->tx_buffers[first];
3323         last = tx_buffer->next_eop;
3324         eop_desc = &txr->tx_base[last];
3325
3326         /*
3327          * What this does is get the index of the
3328          * first descriptor AFTER the EOP of the 
3329          * first packet, that way we can do the
3330          * simple comparison on the inner while loop.
3331          */
3332         if (++last == adapter->num_tx_desc)
3333                 last = 0;
3334         done = last;
3335
3336         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3337             BUS_DMASYNC_POSTREAD);
3338
3339         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3340                 /* We clean the range of the packet */
3341                 while (first != done) {
3342                         tx_desc->upper.data = 0;
3343                         tx_desc->lower.data = 0;
3344                         tx_desc->buffer_addr = 0;
3345                         ++num_avail; ++cleaned;
3346
3347                         if (tx_buffer->m_head) {
3348                                 ifp->if_opackets++;
3349                                 bus_dmamap_sync(txr->txtag,
3350                                     tx_buffer->map,
3351                                     BUS_DMASYNC_POSTWRITE);
3352                                 bus_dmamap_unload(txr->txtag,
3353                                     tx_buffer->map);
3354
3355                                 m_freem(tx_buffer->m_head);
3356                                 tx_buffer->m_head = NULL;
3357                         }
3358                         tx_buffer->next_eop = -1;
3359
3360                         if (++first == adapter->num_tx_desc)
3361                                 first = 0;
3362
3363                         tx_buffer = &txr->tx_buffers[first];
3364                         tx_desc = &txr->tx_base[first];
3365                 }
3366                 /* See if we can continue to the next packet */
3367                 last = tx_buffer->next_eop;
3368                 if (last != -1) {
3369                         eop_desc = &txr->tx_base[last];
3370                         /* Get new done point */
3371                         if (++last == adapter->num_tx_desc) last = 0;
3372                         done = last;
3373                 } else
3374                         break;
3375         }
3376         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3377             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3378
3379         txr->next_to_clean = first;
3380
3381         /*
3382          * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3383          * that it is OK to send packets.
3384          * If there are no pending descriptors, clear the timeout. Otherwise,
3385          * if some descriptors have been freed, restart the timeout.
3386          */
3387         if (num_avail > IGB_TX_CLEANUP_THRESHOLD) {                
3388                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3389                 /* All clean, turn off the timer */
3390                 if (num_avail == adapter->num_tx_desc) {
3391                         txr->watchdog_timer = 0;
3392                         txr->tx_avail = num_avail;
3393                         return FALSE;
3394                 }
3395         }
3396
3397         /* Some cleaned, reset the timer */
3398         if (cleaned)
3399                 txr->watchdog_timer = IGB_TX_TIMEOUT;
3400         txr->tx_avail = num_avail;
3401         return TRUE;
3402 }
3403
3404
3405 /*********************************************************************
3406  *
3407  *  Setup descriptor buffer(s) from system mbuf buffer pools.
3408  *              i - designates the ring index
3409  *              clean - tells the function whether to update
3410  *                      the header, the packet buffer, or both.
3411  *
3412  **********************************************************************/
3413 static int
3414 igb_get_buf(struct rx_ring *rxr, int i, u8 clean)
3415 {
3416         struct adapter          *adapter = rxr->adapter;
3417         struct mbuf             *mh, *mp;
3418         bus_dma_segment_t       seg[2];
3419         bus_dmamap_t            map;
3420         struct igb_rx_buffer    *rx_buffer;
3421         int                     error, nsegs;
3422         int                     merr = 0;
3423
3424
3425         rx_buffer = &rxr->rx_buffers[i];
3426
3427         /* First get our header and payload mbuf */
3428         if (clean & IGB_CLEAN_HEADER) {
3429                 mh = m_gethdr(M_DONTWAIT, MT_DATA);
3430                 if (mh == NULL)
3431                         goto remap;
3432         } else  /* reuse */
3433                 mh = rxr->rx_buffers[i].m_head;
3434
3435         mh->m_len = MHLEN;
3436         mh->m_flags |= M_PKTHDR;
3437
3438         if (clean & IGB_CLEAN_PAYLOAD) {
3439                 mp = m_getjcl(M_DONTWAIT, MT_DATA,
3440                     M_PKTHDR, adapter->rx_mbuf_sz);
3441                 if (mp == NULL)
3442                         goto remap;
3443                 mp->m_len = adapter->rx_mbuf_sz;
3444                 mp->m_flags &= ~M_PKTHDR;
3445         } else {        /* reusing */
3446                 mp = rxr->rx_buffers[i].m_pack;
3447                 mp->m_len = adapter->rx_mbuf_sz;
3448                 mp->m_flags &= ~M_PKTHDR;
3449         }
3450         /*
3451         ** Need to create a chain for the following
3452         ** dmamap call at this point.
3453         */
3454         mh->m_next = mp;
3455         mh->m_pkthdr.len = mh->m_len + mp->m_len;
3456
3457         /* Get the memory mapping */
3458         error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3459             rxr->rx_spare_map, mh, seg, &nsegs, BUS_DMA_NOWAIT);
3460         if (error != 0) {
3461                 printf("GET BUF: dmamap load failure - %d\n", error);
3462                 m_free(mh);
3463                 return (error);
3464         }
3465
3466         /* Unload old mapping and update buffer struct */
3467         if (rx_buffer->m_head != NULL)
3468                         bus_dmamap_unload(rxr->rxtag, rx_buffer->map);
3469         map = rx_buffer->map;
3470         rx_buffer->map = rxr->rx_spare_map;
3471         rxr->rx_spare_map = map;
3472         rx_buffer->m_head = mh;
3473         rx_buffer->m_pack = mp;
3474         bus_dmamap_sync(rxr->rxtag,
3475             rx_buffer->map, BUS_DMASYNC_PREREAD);
3476
3477         /* Update descriptor */
3478         rxr->rx_base[i].read.hdr_addr = htole64(seg[0].ds_addr);
3479         rxr->rx_base[i].read.pkt_addr = htole64(seg[1].ds_addr);
3480
3481         return (0);
3482
3483         /*
3484         ** If we get here, we have an mbuf resource
3485         ** issue, so we discard the incoming packet
3486         ** and attempt to reuse existing mbufs next
3487         ** pass thru the ring, but to do so we must
3488         ** fix up the descriptor which had the address
3489         ** clobbered with writeback info.
3490         */
3491 remap:
3492         adapter->mbuf_header_failed++;
3493         merr = ENOBUFS;
3494         /* Is there a reusable buffer? */
3495         mh = rxr->rx_buffers[i].m_head;
3496         if (mh == NULL) /* Nope, init error */
3497                 return (merr);
3498         mp = rxr->rx_buffers[i].m_pack;
3499         if (mp == NULL) /* Nope, init error */
3500                 return (merr);
3501         /* Get our old mapping */
3502         rx_buffer = &rxr->rx_buffers[i];
3503         error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3504             rx_buffer->map, mh, seg, &nsegs, BUS_DMA_NOWAIT);
3505         if (error != 0) {
3506                 /* We really have a problem */
3507                 m_free(mh);
3508                 return (error);
3509         }
3510         /* Now fix the descriptor as needed */
3511         rxr->rx_base[i].read.hdr_addr = htole64(seg[0].ds_addr);
3512         rxr->rx_base[i].read.pkt_addr = htole64(seg[1].ds_addr);
3513         return (merr);
3514 }
3515
3516
3517 /*********************************************************************
3518  *
3519  *  Allocate memory for rx_buffer structures. Since we use one
3520  *  rx_buffer per received packet, the maximum number of rx_buffer's
3521  *  that we'll need is equal to the number of receive descriptors
3522  *  that we've allocated.
3523  *
3524  **********************************************************************/
3525 static int
3526 igb_allocate_receive_buffers(struct rx_ring *rxr)
3527 {
3528         struct  adapter         *adapter = rxr->adapter;
3529         device_t                dev = adapter->dev;
3530         struct igb_rx_buffer    *rxbuf;
3531         int                     i, bsize, error;
3532
3533         bsize = sizeof(struct igb_rx_buffer) * adapter->num_rx_desc;
3534         if (!(rxr->rx_buffers =
3535             (struct igb_rx_buffer *) malloc(bsize,
3536             M_DEVBUF, M_NOWAIT | M_ZERO))) {
3537                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3538                 error = ENOMEM;
3539                 goto fail;
3540         }
3541
3542         /*
3543         ** The tag is made to accomodate the largest buffer size
3544         ** with packet split (hence the two segments, even though
3545         ** it may not always use this.
3546         */
3547         if ((error = bus_dma_tag_create(NULL,           /* parent */
3548                                    PAGE_SIZE, 0,        /* alignment, bounds */
3549                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3550                                    BUS_SPACE_MAXADDR,   /* highaddr */
3551                                    NULL, NULL,          /* filter, filterarg */
3552                                    MJUM16BYTES,         /* maxsize */
3553                                    2,                   /* nsegments */
3554                                    MJUMPAGESIZE,        /* maxsegsize */
3555                                    0,                   /* flags */
3556                                    NULL,                /* lockfunc */
3557                                    NULL,                /* lockfuncarg */
3558                                    &rxr->rxtag))) {
3559                 device_printf(dev, "Unable to create RX DMA tag\n");
3560                 goto fail;
3561         }
3562
3563         /* Create the spare map (used by getbuf) */
3564         error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3565              &rxr->rx_spare_map);
3566         if (error) {
3567                 device_printf(dev,
3568                     "%s: bus_dmamap_create header spare failed: %d\n",
3569                     __func__, error);
3570                 goto fail;
3571         }
3572
3573         for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3574                 rxbuf = &rxr->rx_buffers[i];
3575                 error = bus_dmamap_create(rxr->rxtag,
3576                     BUS_DMA_NOWAIT, &rxbuf->map);
3577                 if (error) {
3578                         device_printf(dev, "Unable to create RX DMA maps\n");
3579                         goto fail;
3580                 }
3581         }
3582
3583         return (0);
3584
3585 fail:
3586         /* Frees all, but can handle partial completion */
3587         igb_free_receive_structures(adapter);
3588         return (error);
3589 }
3590
3591 /*********************************************************************
3592  *
3593  *  Initialize a receive ring and its buffers.
3594  *
3595  **********************************************************************/
3596 static int
3597 igb_setup_receive_ring(struct rx_ring *rxr)
3598 {
3599         struct  adapter         *adapter;
3600         struct  ifnet           *ifp;
3601         device_t                dev;
3602         struct igb_rx_buffer    *rxbuf;
3603         struct lro_ctrl         *lro = &rxr->lro;
3604         int                     j, rsize;
3605
3606         adapter = rxr->adapter;
3607         dev = adapter->dev;
3608         ifp = adapter->ifp;
3609         rxr->lro_enabled = FALSE;
3610         rxr->hdr_split = FALSE;
3611
3612         /* Clear the ring contents */
3613         rsize = roundup2(adapter->num_rx_desc *
3614             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3615         bzero((void *)rxr->rx_base, rsize);
3616
3617         /*
3618         ** Free current RX buffer structures and their mbufs
3619         */
3620         for (int i = 0; i < adapter->num_rx_desc; i++) {
3621                 rxbuf = &rxr->rx_buffers[i];
3622                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3623                     BUS_DMASYNC_POSTREAD);
3624                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3625                 if (rxbuf->m_head) {
3626                         rxbuf->m_head->m_next = rxbuf->m_pack;
3627                         m_freem(rxbuf->m_head);
3628                 }
3629                 rxbuf->m_head = NULL;
3630                 rxbuf->m_pack = NULL;
3631         }
3632
3633         /* Next replenish the ring */
3634         for (j = 0; j < adapter->num_rx_desc; j++) {
3635                 if (igb_get_buf(rxr, j, IGB_CLEAN_BOTH) == ENOBUFS) {
3636                         rxr->rx_buffers[j].m_head = NULL;
3637                         rxr->rx_buffers[j].m_pack = NULL;
3638                         rxr->rx_base[j].read.hdr_addr = 0;
3639                         rxr->rx_base[j].read.pkt_addr = 0;
3640                         goto fail;
3641                 }
3642         }
3643
3644         /* Setup our descriptor indices */
3645         rxr->next_to_check = 0;
3646         rxr->last_cleaned = 0;
3647
3648         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3649             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3650
3651         /*
3652         ** Now set up the LRO interface, we
3653         ** also only do head split when LRO
3654         ** is enabled, since so often they
3655         ** are undesireable in similar setups.
3656         */
3657         if (ifp->if_capenable & IFCAP_LRO) {
3658                 int err = tcp_lro_init(lro);
3659                 if (err) {
3660                         device_printf(dev,"LRO Initialization failed!\n");
3661                         goto fail;
3662                 }
3663                 INIT_DEBUGOUT("RX LRO Initialized\n");
3664                 rxr->lro_enabled = TRUE;
3665                 rxr->hdr_split = TRUE;
3666                 lro->ifp = adapter->ifp;
3667         }
3668
3669         return (0);
3670 fail:
3671         /*
3672          * We need to clean up any buffers allocated
3673          * so far, 'j' is the failing index.
3674          */
3675         for (int i = 0; i < j; i++) {
3676                 rxbuf = &rxr->rx_buffers[i];
3677                 if (rxbuf->m_head != NULL) {
3678                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3679                             BUS_DMASYNC_POSTREAD);
3680                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3681                         m_freem(rxbuf->m_head);
3682                         rxbuf->m_head = NULL;
3683                 }
3684         }
3685         return (ENOBUFS);
3686 }
3687
3688 /*********************************************************************
3689  *
3690  *  Initialize all receive rings.
3691  *
3692  **********************************************************************/
3693 static int
3694 igb_setup_receive_structures(struct adapter *adapter)
3695 {
3696         struct rx_ring *rxr = adapter->rx_rings;
3697         int i, j;
3698
3699         for (i = 0; i < adapter->num_queues; i++, rxr++)
3700                 if (igb_setup_receive_ring(rxr))
3701                         goto fail;
3702
3703         return (0);
3704 fail:
3705         /*
3706          * Free RX buffers allocated so far, we will only handle
3707          * the rings that completed, the failing case will have
3708          * cleaned up for itself. The value of 'i' will be the
3709          * failed ring so we must pre-decrement it.
3710          */
3711         rxr = adapter->rx_rings;
3712         for (--i; i > 0; i--, rxr++) {
3713                 for (j = 0; j < adapter->num_rx_desc; j++) {
3714                         struct igb_rx_buffer *rxbuf;
3715                         rxbuf = &rxr->rx_buffers[j];
3716                         if (rxbuf->m_head != NULL) {
3717                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3718                                   BUS_DMASYNC_POSTREAD);
3719                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3720                                 m_freem(rxbuf->m_head);
3721                                 rxbuf->m_head = NULL;
3722                         }
3723                 }
3724         }
3725
3726         return (ENOBUFS);
3727 }
3728
3729 /*********************************************************************
3730  *
3731  *  Enable receive unit.
3732  *
3733  **********************************************************************/
3734 static void
3735 igb_initialize_receive_units(struct adapter *adapter)
3736 {
3737         struct rx_ring  *rxr = adapter->rx_rings;
3738         struct ifnet    *ifp = adapter->ifp;
3739         u32             rctl, rxcsum, psize, srrctl = 0;
3740
3741         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3742
3743         /*
3744          * Make sure receives are disabled while setting
3745          * up the descriptor ring
3746          */
3747         rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
3748         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3749
3750         /*
3751         ** Set up for header split
3752         */
3753         if (rxr->hdr_split) {
3754                 /* Use a standard mbuf for the header */
3755                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3756                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3757         } else
3758                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3759
3760         /*
3761         ** Set up for jumbo frames
3762         */
3763         if (ifp->if_mtu > ETHERMTU) {
3764                 rctl |= E1000_RCTL_LPE;
3765                 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3766                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3767
3768                 /* Set maximum packet len */
3769                 psize = adapter->max_frame_size;
3770                 /* are we on a vlan? */
3771                 if (adapter->ifp->if_vlantrunk != NULL)
3772                         psize += VLAN_TAG_SIZE;
3773                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3774         } else {
3775                 rctl &= ~E1000_RCTL_LPE;
3776                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3777                 rctl |= E1000_RCTL_SZ_2048;
3778         }
3779
3780         /* Setup the Base and Length of the Rx Descriptor Rings */
3781         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3782                 u64 bus_addr = rxr->rxdma.dma_paddr;
3783                 u32 rxdctl;
3784
3785                 E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(i),
3786                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3787                 E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(i),
3788                     (uint32_t)(bus_addr >> 32));
3789                 E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(i),
3790                     (uint32_t)bus_addr);
3791                 E1000_WRITE_REG(&adapter->hw, E1000_SRRCTL(i), srrctl);
3792                 /* Enable this Queue */
3793                 rxdctl = E1000_READ_REG(&adapter->hw, E1000_RXDCTL(i));
3794                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3795                 rxdctl &= 0xFFF00000;
3796                 rxdctl |= IGB_RX_PTHRESH;
3797                 rxdctl |= IGB_RX_HTHRESH << 8;
3798                 rxdctl |= IGB_RX_WTHRESH << 16;
3799                 E1000_WRITE_REG(&adapter->hw, E1000_RXDCTL(i), rxdctl);
3800         }
3801
3802         /*
3803         ** Setup for RX MultiQueue
3804         */
3805         rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
3806         if (adapter->num_queues >1) {
3807                 u32 random[10], mrqc, shift = 0;
3808                 union igb_reta {
3809                         u32 dword;
3810                         u8  bytes[4];
3811                 } reta;
3812
3813                 arc4rand(&random, sizeof(random), 0);
3814                 if (adapter->hw.mac.type == e1000_82575)
3815                         shift = 6;
3816                 /* Warning FM follows */
3817                 for (int i = 0; i < 128; i++) {
3818                         reta.bytes[i & 3] =
3819                             (i % adapter->num_queues) << shift;
3820                         if ((i & 3) == 3)
3821                                 E1000_WRITE_REG(&adapter->hw,
3822                                     E1000_RETA(i >> 2), reta.dword);
3823                 }
3824                 /* Now fill in hash table */
3825                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3826                 for (int i = 0; i < 10; i++)
3827                         E1000_WRITE_REG_ARRAY(&adapter->hw,
3828                             E1000_RSSRK(0), i, random[i]);
3829
3830                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
3831                     E1000_MRQC_RSS_FIELD_IPV4_TCP);
3832                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
3833                     E1000_MRQC_RSS_FIELD_IPV6_TCP);
3834                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
3835                     E1000_MRQC_RSS_FIELD_IPV6_UDP);
3836                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
3837                     E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
3838
3839                 E1000_WRITE_REG(&adapter->hw, E1000_MRQC, mrqc);
3840
3841                 /*
3842                 ** NOTE: Receive Full-Packet Checksum Offload 
3843                 ** is mutually exclusive with Multiqueue. However
3844                 ** this is not the same as TCP/IP checksums which
3845                 ** still work.
3846                 */
3847                 rxcsum |= E1000_RXCSUM_PCSD;
3848 #if __FreeBSD_version >= 800000
3849                 /* For SCTP Offload */
3850                 if ((adapter->hw.mac.type == e1000_82576)
3851                     && (ifp->if_capenable & IFCAP_RXCSUM))
3852                         rxcsum |= E1000_RXCSUM_CRCOFL;
3853 #endif
3854         } else {
3855                 /* Non RSS setup */
3856                 if (ifp->if_capenable & IFCAP_RXCSUM) {
3857                         rxcsum |= E1000_RXCSUM_IPPCSE;
3858 #if __FreeBSD_version >= 800000
3859                         if (adapter->hw.mac.type == e1000_82576)
3860                                 rxcsum |= E1000_RXCSUM_CRCOFL;
3861 #endif
3862                 } else
3863                         rxcsum &= ~E1000_RXCSUM_TUOFL;
3864         }
3865         E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
3866
3867         /* Setup the Receive Control Register */
3868         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3869         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3870                    E1000_RCTL_RDMTS_HALF |
3871                    (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3872
3873         /* Make sure VLAN Filters are off */
3874         rctl &= ~E1000_RCTL_VFE;
3875         /* Don't store bad packets */
3876         rctl &= ~E1000_RCTL_SBP;
3877
3878         /* Enable Receives */
3879         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
3880
3881         /*
3882          * Setup the HW Rx Head and Tail Descriptor Pointers
3883          *   - needs to be after enable
3884          */
3885         for (int i = 0; i < adapter->num_queues; i++) {
3886                 E1000_WRITE_REG(&adapter->hw, E1000_RDH(i), 0);
3887                 E1000_WRITE_REG(&adapter->hw, E1000_RDT(i),
3888                      adapter->num_rx_desc - 1);
3889         }
3890         return;
3891 }
3892
3893 /*********************************************************************
3894  *
3895  *  Free receive rings.
3896  *
3897  **********************************************************************/
3898 static void
3899 igb_free_receive_structures(struct adapter *adapter)
3900 {
3901         struct rx_ring *rxr = adapter->rx_rings;
3902
3903         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3904                 struct lro_ctrl *lro = &rxr->lro;
3905                 igb_free_receive_buffers(rxr);
3906                 tcp_lro_free(lro);
3907                 igb_dma_free(adapter, &rxr->rxdma);
3908         }
3909
3910         free(adapter->rx_rings, M_DEVBUF);
3911 }
3912
3913 /*********************************************************************
3914  *
3915  *  Free receive ring data structures.
3916  *
3917  **********************************************************************/
3918 static void
3919 igb_free_receive_buffers(struct rx_ring *rxr)
3920 {
3921         struct adapter  *adapter = rxr->adapter;
3922         struct igb_rx_buffer *rx_buffer;
3923
3924         INIT_DEBUGOUT("free_receive_structures: begin");
3925
3926         if (rxr->rx_spare_map) {
3927                 bus_dmamap_destroy(rxr->rxtag, rxr->rx_spare_map);
3928                 rxr->rx_spare_map = NULL;
3929         }
3930
3931         /* Cleanup any existing buffers */
3932         if (rxr->rx_buffers != NULL) {
3933                 rx_buffer = &rxr->rx_buffers[0];
3934                 for (int i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3935                         if (rx_buffer->m_head != NULL) {
3936                                 bus_dmamap_sync(rxr->rxtag, rx_buffer->map,
3937                                     BUS_DMASYNC_POSTREAD);
3938                                 bus_dmamap_unload(rxr->rxtag,
3939                                     rx_buffer->map);
3940                                 m_freem(rx_buffer->m_head);
3941                                 rx_buffer->m_head = NULL;
3942                         } else if (rx_buffer->map != NULL)
3943                                 bus_dmamap_unload(rxr->rxtag,
3944                                     rx_buffer->map);
3945                         if (rx_buffer->map != NULL) {
3946                                 bus_dmamap_destroy(rxr->rxtag,
3947                                     rx_buffer->map);
3948                                 rx_buffer->map = NULL;
3949                         }
3950                 }
3951         }
3952
3953         if (rxr->rx_buffers != NULL) {
3954                 free(rxr->rx_buffers, M_DEVBUF);
3955                 rxr->rx_buffers = NULL;
3956         }
3957
3958         if (rxr->rxtag != NULL) {
3959                 bus_dma_tag_destroy(rxr->rxtag);
3960                 rxr->rxtag = NULL;
3961         }
3962 }
3963 /*********************************************************************
3964  *
3965  *  This routine executes in interrupt context. It replenishes
3966  *  the mbufs in the descriptor and sends data which has been
3967  *  dma'ed into host memory to upper layer.
3968  *
3969  *  We loop at most count times if count is > 0, or until done if
3970  *  count < 0.
3971  *
3972  *  Return TRUE if more to clean, FALSE otherwise
3973  *********************************************************************/
3974 static bool
3975 igb_rxeof(struct rx_ring *rxr, int count)
3976 {
3977         struct adapter          *adapter = rxr->adapter;
3978         struct ifnet            *ifp;
3979         struct lro_ctrl         *lro = &rxr->lro;
3980         struct lro_entry        *queued;
3981         int                     i;
3982         u32                     staterr;
3983         union e1000_adv_rx_desc *cur;
3984
3985
3986         IGB_RX_LOCK(rxr);
3987         ifp = adapter->ifp;
3988         i = rxr->next_to_check;
3989         cur = &rxr->rx_base[i];
3990         staterr = cur->wb.upper.status_error;
3991
3992         if (!(staterr & E1000_RXD_STAT_DD)) {
3993                 IGB_RX_UNLOCK(rxr);
3994                 return FALSE;
3995         }
3996
3997         /* Sync the ring */
3998         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3999             BUS_DMASYNC_POSTREAD);
4000
4001         /* Main clean loop */
4002         while ((staterr & E1000_RXD_STAT_DD) &&
4003             (count != 0) &&
4004             (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4005                 struct mbuf *sendmp, *mh, *mp;
4006                 u16 hlen, plen, hdr, ptype, len_adj, vtag;
4007                 u8 dopayload, accept_frame, eop;
4008  
4009                 accept_frame = 1;
4010                 hlen = plen = len_adj = vtag = 0;
4011                 sendmp = mh = mp = NULL;
4012                 ptype = (u16)(cur->wb.lower.lo_dword.data >> 4);
4013
4014                 /* Sync the buffers */
4015                 bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[i].map,
4016                             BUS_DMASYNC_POSTREAD);
4017
4018                 /*
4019                 ** The way the hardware is configured to
4020                 ** split, it will ONLY use the header buffer
4021                 ** when header split is enabled, otherwise we
4022                 ** get normal behavior, ie, both header and
4023                 ** payload are DMA'd into the payload buffer.
4024                 **
4025                 ** The fmp test is to catch the case where a
4026                 ** packet spans multiple descriptors, in that
4027                 ** case only the first header is valid.
4028                 */
4029                 if ((rxr->hdr_split) && (rxr->fmp == NULL)){
4030                         hdr = le16toh(cur->
4031                             wb.lower.lo_dword.hs_rss.hdr_info);
4032                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4033                             E1000_RXDADV_HDRBUFLEN_SHIFT;
4034                         if (hlen > IGB_HDR_BUF)
4035                                 hlen = IGB_HDR_BUF;
4036                         plen = le16toh(cur->wb.upper.length);
4037                         /* Handle the header mbuf */
4038                         mh = rxr->rx_buffers[i].m_head;
4039                         mh->m_len = hlen;
4040                         dopayload = IGB_CLEAN_HEADER;
4041                         /*
4042                         ** Get the payload length, this
4043                         ** could be zero if its a small
4044                         ** packet.
4045                         */
4046                         if (plen) {
4047                                 mp = rxr->rx_buffers[i].m_pack;
4048                                 mp->m_len = plen;
4049                                 mp->m_next = NULL;
4050                                 mp->m_flags &= ~M_PKTHDR;
4051                                 mh->m_next = mp;
4052                                 mh->m_flags |= M_PKTHDR;
4053                                 dopayload = IGB_CLEAN_BOTH;
4054                                 rxr->rx_split_packets++;
4055                         } else {  /* small packets */
4056                                 mh->m_flags &= ~M_PKTHDR;
4057                                 mh->m_next = NULL;
4058                         }
4059                 } else {
4060                         /*
4061                         ** Either no header split, or a
4062                         ** secondary piece of a fragmented
4063                         ** split packet.
4064                         */
4065                         mh = rxr->rx_buffers[i].m_pack;
4066                         mh->m_flags |= M_PKTHDR;
4067                         mh->m_len = le16toh(cur->wb.upper.length);
4068                         dopayload = IGB_CLEAN_PAYLOAD;
4069                 }
4070
4071                 if (staterr & E1000_RXD_STAT_EOP) {
4072                         count--;
4073                         eop = 1;
4074                         /*
4075                         ** Strip CRC and account for frag
4076                         */
4077                         if (mp) { 
4078                                 if (mp->m_len < ETHER_CRC_LEN) {
4079                                         /* a frag, how much is left? */
4080                                         len_adj = ETHER_CRC_LEN - mp->m_len;
4081                                         mp->m_len = 0;
4082                                 } else
4083                                         mp->m_len -= ETHER_CRC_LEN;
4084                         } else { /* not split */
4085                                 if (mh->m_len < ETHER_CRC_LEN) {
4086                                         len_adj = ETHER_CRC_LEN - mh->m_len;
4087                                         mh->m_len = 0;
4088                                 } else
4089                                         mh->m_len -= ETHER_CRC_LEN;
4090                         }
4091                 } else 
4092                         eop = 0;
4093
4094                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK)
4095                         accept_frame = 0;
4096 #ifdef IGB_IEEE1588
4097         This linux code needs to be converted to work here
4098         -----------------------------------------------------
4099                if (unlikely(staterr & E1000_RXD_STAT_TS)) {
4100                        u64 regval;
4101                        u64 ns;
4102 // Create an mtag and set it up
4103                        struct skb_shared_hwtstamps *shhwtstamps =
4104                                skb_hwtstamps(skb);
4105
4106                        rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID),
4107                        "igb: no RX time stamp available for time stamped packet");
4108                        regval = rd32(E1000_RXSTMPL);
4109                        regval |= (u64)rd32(E1000_RXSTMPH) << 32;
4110 // Do time conversion from the register
4111                        ns = timecounter_cyc2time(&adapter->clock, regval);
4112                        clocksync_update(&adapter->sync, ns);
4113                        memset(shhwtstamps, 0, sizeof(*shhwtstamps));
4114                        shhwtstamps->hwtstamp = ns_to_ktime(ns);
4115                        shhwtstamps->syststamp =
4116                                clocksync_hw2sys(&adapter->sync, ns);
4117                }
4118 #endif
4119                 if (accept_frame) {
4120                         /*
4121                         ** get_buf will overwrite the writeback
4122                         ** descriptor so save the VLAN tag now.
4123                         */
4124                         vtag = le16toh(cur->wb.upper.vlan);
4125                         if (igb_get_buf(rxr, i, dopayload) != 0) {
4126                                 ifp->if_iqdrops++;
4127                                 goto discard;
4128                         }
4129                         /* Initial frame - setup */
4130                         if (rxr->fmp == NULL) {
4131                                 mh->m_flags |= M_PKTHDR;
4132                                 mh->m_pkthdr.len = mh->m_len;
4133                                 rxr->fmp = mh; /* Store the first mbuf */
4134                                 rxr->lmp = mh;
4135                                 if (mp) { /* Add payload if split */
4136                                         mh->m_pkthdr.len += mp->m_len;
4137                                         rxr->lmp = mh->m_next;
4138                                 }
4139                         } else {
4140                                 /* Chain mbuf's together */
4141                                 mh->m_flags &= ~M_PKTHDR;
4142                                 rxr->lmp->m_next = mh;
4143                                 rxr->lmp = rxr->lmp->m_next;
4144                                 rxr->fmp->m_pkthdr.len += mh->m_len;
4145                                 /* Adjust for CRC frag */
4146                                 if (len_adj) {
4147                                         rxr->lmp->m_len -= len_adj;
4148                                         rxr->fmp->m_pkthdr.len -= len_adj;
4149                                 }
4150                         }
4151
4152                         if (eop) {
4153                                 bool sctp = ((ptype & 0x40) != 0);
4154                                 rxr->fmp->m_pkthdr.rcvif = ifp;
4155                                 ifp->if_ipackets++;
4156                                 rxr->rx_packets++;
4157                                 /* capture data for AIM */
4158                                 rxr->bytes += rxr->fmp->m_pkthdr.len;
4159                                 rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4160
4161                                 igb_rx_checksum(staterr, rxr->fmp, sctp);
4162                                 if (staterr & E1000_RXD_STAT_VP) {
4163                                         rxr->fmp->m_pkthdr.ether_vtag = vtag;
4164                                         rxr->fmp->m_flags |= M_VLANTAG;
4165                                 }
4166 #if __FreeBSD_version >= 800000
4167                                 rxr->fmp->m_pkthdr.flowid = curcpu;
4168                                 rxr->fmp->m_flags |= M_FLOWID;
4169 #endif
4170                                 sendmp = rxr->fmp;
4171                                 rxr->fmp = NULL;
4172                                 rxr->lmp = NULL;
4173                         }
4174                 } else {
4175                         ifp->if_ierrors++;
4176 discard:
4177                         /* Reuse loaded DMA map and just update mbuf chain */
4178                         if (hlen) {
4179                                 mh = rxr->rx_buffers[i].m_head;
4180                                 mh->m_len = MHLEN;
4181                                 mh->m_next = NULL;
4182                         }
4183                         mp = rxr->rx_buffers[i].m_pack;
4184                         mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz;
4185                         mp->m_data = mp->m_ext.ext_buf;
4186                         mp->m_next = NULL;
4187                         if (adapter->max_frame_size <=
4188                             (MCLBYTES - ETHER_ALIGN))
4189                                 m_adj(mp, ETHER_ALIGN);
4190                         if (rxr->fmp != NULL) {
4191                                 /* handles the whole chain */
4192                                 m_freem(rxr->fmp);
4193                                 rxr->fmp = NULL;
4194                                 rxr->lmp = NULL;
4195                         }
4196                         sendmp = NULL;
4197                 }
4198
4199                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4200                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4201
4202                 rxr->last_cleaned = i; /* For updating tail */
4203
4204                 /* Advance our pointers to the next descriptor. */
4205                 if (++i == adapter->num_rx_desc)
4206                         i = 0;
4207  
4208                 /*
4209                 ** Note that we hold the RX lock thru
4210                 ** the following call so this ring's
4211                 ** next_to_check is not gonna change.
4212                 */
4213                 if (sendmp != NULL) {
4214                         /*
4215                         ** Send to the stack if:
4216                         **  - LRO not enabled, or
4217                         **  - no LRO resources, or
4218                         **  - lro enqueue fails
4219                         */
4220                         if ((!rxr->lro_enabled) ||
4221                             ((!lro->lro_cnt) || (tcp_lro_rx(lro, sendmp, 0))))
4222                                 (*ifp->if_input)(ifp, sendmp);
4223                 }
4224
4225                 /* Get the next descriptor */
4226                 cur = &rxr->rx_base[i];
4227                 staterr = cur->wb.upper.status_error;
4228         }
4229         rxr->next_to_check = i;
4230
4231         /* Advance the E1000's Receive Queue #0  "Tail Pointer". */
4232         E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->last_cleaned);
4233
4234         /*
4235          * Flush any outstanding LRO work
4236          */
4237         while (!SLIST_EMPTY(&lro->lro_active)) {
4238                 queued = SLIST_FIRST(&lro->lro_active);
4239                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4240                 tcp_lro_flush(lro, queued);
4241         }
4242
4243         IGB_RX_UNLOCK(rxr);
4244
4245         /*
4246         ** We still have cleaning to do?
4247         ** Schedule another interrupt if so.
4248         */
4249         if (staterr & E1000_RXD_STAT_DD) {
4250                 E1000_WRITE_REG(&adapter->hw, E1000_EICS, rxr->eims);
4251                 return TRUE;
4252         }
4253
4254         return FALSE;
4255 }
4256
4257
4258 /*********************************************************************
4259  *
4260  *  Verify that the hardware indicated that the checksum is valid.
4261  *  Inform the stack about the status of checksum so that stack
4262  *  doesn't spend time verifying the checksum.
4263  *
4264  *********************************************************************/
4265 static void
4266 igb_rx_checksum(u32 staterr, struct mbuf *mp, bool sctp)
4267 {
4268         u16 status = (u16)staterr;
4269         u8  errors = (u8) (staterr >> 24);
4270
4271         /* Ignore Checksum bit is set */
4272         if (status & E1000_RXD_STAT_IXSM) {
4273                 mp->m_pkthdr.csum_flags = 0;
4274                 return;
4275         }
4276
4277         if (status & E1000_RXD_STAT_IPCS) {
4278                 /* Did it pass? */
4279                 if (!(errors & E1000_RXD_ERR_IPE)) {
4280                         /* IP Checksum Good */
4281                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4282                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4283                 } else
4284                         mp->m_pkthdr.csum_flags = 0;
4285         }
4286
4287         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4288                 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4289 #if __FreeBSD_version >= 800000
4290                 if (sctp) /* reassign */
4291                         type = CSUM_SCTP_VALID;
4292 #endif
4293                 /* Did it pass? */
4294                 if (!(errors & E1000_RXD_ERR_TCPE)) {
4295                         mp->m_pkthdr.csum_flags |= type;
4296                         if (!sctp)
4297                                 mp->m_pkthdr.csum_data = htons(0xffff);
4298                 }
4299         }
4300         return;
4301 }
4302
4303 /*
4304  * This routine is run via an vlan
4305  * config EVENT
4306  */
4307 static void
4308 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4309 {
4310         struct adapter  *adapter = ifp->if_softc;
4311         u32             index, bit;
4312
4313         if (ifp->if_softc !=  arg)   /* Not our event */
4314                 return;
4315
4316         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4317                 return;
4318
4319         index = (vtag >> 5) & 0x7F;
4320         bit = vtag & 0x1F;
4321         igb_shadow_vfta[index] |= (1 << bit);
4322         ++adapter->num_vlans;
4323         /* Re-init to load the changes */
4324         igb_init(adapter);
4325 }
4326
4327 /*
4328  * This routine is run via an vlan
4329  * unconfig EVENT
4330  */
4331 static void
4332 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4333 {
4334         struct adapter  *adapter = ifp->if_softc;
4335         u32             index, bit;
4336
4337         if (ifp->if_softc !=  arg)
4338                 return;
4339
4340         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4341                 return;
4342
4343         index = (vtag >> 5) & 0x7F;
4344         bit = vtag & 0x1F;
4345         igb_shadow_vfta[index] &= ~(1 << bit);
4346         --adapter->num_vlans;
4347         /* Re-init to load the changes */
4348         igb_init(adapter);
4349 }
4350
4351 static void
4352 igb_setup_vlan_hw_support(struct adapter *adapter)
4353 {
4354         struct e1000_hw *hw = &adapter->hw;
4355         u32             reg;
4356
4357         /*
4358         ** We get here thru init_locked, meaning
4359         ** a soft reset, this has already cleared
4360         ** the VFTA and other state, so if there
4361         ** have been no vlan's registered do nothing.
4362         */
4363         if (adapter->num_vlans == 0)
4364                 return;
4365
4366         /*
4367         ** A soft reset zero's out the VFTA, so
4368         ** we need to repopulate it now.
4369         */
4370         for (int i = 0; i < IGB_VFTA_SIZE; i++)
4371                 if (igb_shadow_vfta[i] != 0)
4372                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4373                             i, igb_shadow_vfta[i]);
4374
4375         reg = E1000_READ_REG(hw, E1000_CTRL);
4376         reg |= E1000_CTRL_VME;
4377         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4378
4379         /* Enable the Filter Table */
4380         reg = E1000_READ_REG(hw, E1000_RCTL);
4381         reg &= ~E1000_RCTL_CFIEN;
4382         reg |= E1000_RCTL_VFE;
4383         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4384
4385         /* Update the frame size */
4386         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4387             adapter->max_frame_size + VLAN_TAG_SIZE);
4388 }
4389
4390 static void
4391 igb_enable_intr(struct adapter *adapter)
4392 {
4393         /* With RSS set up what to auto clear */
4394         if (adapter->msix_mem) {
4395                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4396                     adapter->eims_mask);
4397                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4398                     adapter->eims_mask);
4399                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4400                     adapter->eims_mask);
4401                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4402                     E1000_IMS_LSC);
4403         } else {
4404                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4405                     IMS_ENABLE_MASK);
4406         }
4407         E1000_WRITE_FLUSH(&adapter->hw);
4408
4409         return;
4410 }
4411
4412 static void
4413 igb_disable_intr(struct adapter *adapter)
4414 {
4415         if (adapter->msix_mem) {
4416                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4417                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4418         } 
4419         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4420         E1000_WRITE_FLUSH(&adapter->hw);
4421         return;
4422 }
4423
4424 /*
4425  * Bit of a misnomer, what this really means is
4426  * to enable OS management of the system... aka
4427  * to disable special hardware management features 
4428  */
4429 static void
4430 igb_init_manageability(struct adapter *adapter)
4431 {
4432         if (adapter->has_manage) {
4433                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4434                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4435
4436                 /* disable hardware interception of ARP */
4437                 manc &= ~(E1000_MANC_ARP_EN);
4438
4439                 /* enable receiving management packets to the host */
4440                 manc |= E1000_MANC_EN_MNG2HOST;
4441                 manc2h |= 1 << 5;  /* Mng Port 623 */
4442                 manc2h |= 1 << 6;  /* Mng Port 664 */
4443                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4444                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4445         }
4446 }
4447
4448 /*
4449  * Give control back to hardware management
4450  * controller if there is one.
4451  */
4452 static void
4453 igb_release_manageability(struct adapter *adapter)
4454 {
4455         if (adapter->has_manage) {
4456                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4457
4458                 /* re-enable hardware interception of ARP */
4459                 manc |= E1000_MANC_ARP_EN;
4460                 manc &= ~E1000_MANC_EN_MNG2HOST;
4461
4462                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4463         }
4464 }
4465
4466 /*
4467  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4468  * For ASF and Pass Through versions of f/w this means that
4469  * the driver is loaded. 
4470  *
4471  */
4472 static void
4473 igb_get_hw_control(struct adapter *adapter)
4474 {
4475         u32 ctrl_ext;
4476
4477         /* Let firmware know the driver has taken over */
4478         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4479         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4480             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4481 }
4482
4483 /*
4484  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4485  * For ASF and Pass Through versions of f/w this means that the
4486  * driver is no longer loaded.
4487  *
4488  */
4489 static void
4490 igb_release_hw_control(struct adapter *adapter)
4491 {
4492         u32 ctrl_ext;
4493
4494         /* Let firmware taken over control of h/w */
4495         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4496         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4497             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4498 }
4499
4500 static int
4501 igb_is_valid_ether_addr(uint8_t *addr)
4502 {
4503         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4504
4505         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4506                 return (FALSE);
4507         }
4508
4509         return (TRUE);
4510 }
4511
4512
4513 /*
4514  * Enable PCI Wake On Lan capability
4515  */
4516 void
4517 igb_enable_wakeup(device_t dev)
4518 {
4519         u16     cap, status;
4520         u8      id;
4521
4522         /* First find the capabilities pointer*/
4523         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4524         /* Read the PM Capabilities */
4525         id = pci_read_config(dev, cap, 1);
4526         if (id != PCIY_PMG)     /* Something wrong */
4527                 return;
4528         /* OK, we have the power capabilities, so
4529            now get the status register */
4530         cap += PCIR_POWER_STATUS;
4531         status = pci_read_config(dev, cap, 2);
4532         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4533         pci_write_config(dev, cap, status, 2);
4534         return;
4535 }
4536
4537
4538 /**********************************************************************
4539  *
4540  *  Update the board statistics counters.
4541  *
4542  **********************************************************************/
4543 static void
4544 igb_update_stats_counters(struct adapter *adapter)
4545 {
4546         struct ifnet   *ifp;
4547
4548         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4549            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4550                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4551                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4552         }
4553         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4554         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4555         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4556         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4557
4558         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4559         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4560         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4561         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4562         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4563         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4564         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4565         adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4566         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4567         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4568         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4569         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4570         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4571         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4572         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4573         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4574         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4575         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4576         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4577         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4578
4579         /* For the 64-bit byte counters the low dword must be read first. */
4580         /* Both registers clear on the read of the high dword */
4581
4582         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4583         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4584
4585         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4586         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4587         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4588         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4589         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4590
4591         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4592         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4593
4594         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4595         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4596         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4597         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4598         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4599         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4600         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4601         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4602         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4603         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4604
4605         adapter->stats.algnerrc += 
4606                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4607         adapter->stats.rxerrc += 
4608                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4609         adapter->stats.tncrs += 
4610                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4611         adapter->stats.cexterr += 
4612                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4613         adapter->stats.tsctc += 
4614                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4615         adapter->stats.tsctfc += 
4616                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4617         ifp = adapter->ifp;
4618
4619         ifp->if_collisions = adapter->stats.colc;
4620
4621         /* Rx Errors */
4622         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4623             adapter->stats.crcerrs + adapter->stats.algnerrc +
4624             adapter->stats.ruc + adapter->stats.roc +
4625             adapter->stats.mpc + adapter->stats.cexterr;
4626
4627         /* Tx Errors */
4628         ifp->if_oerrors = adapter->stats.ecol +
4629             adapter->stats.latecol + adapter->watchdog_events;
4630 }
4631
4632
4633 /**********************************************************************
4634  *
4635  *  This routine is called only when igb_display_debug_stats is enabled.
4636  *  This routine provides a way to take a look at important statistics
4637  *  maintained by the driver and hardware.
4638  *
4639  **********************************************************************/
4640 static void
4641 igb_print_debug_info(struct adapter *adapter)
4642 {
4643         device_t dev = adapter->dev;
4644         struct rx_ring *rxr = adapter->rx_rings;
4645         struct tx_ring *txr = adapter->tx_rings;
4646         uint8_t *hw_addr = adapter->hw.hw_addr;
4647
4648         device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4649         device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4650             E1000_READ_REG(&adapter->hw, E1000_CTRL),
4651             E1000_READ_REG(&adapter->hw, E1000_RCTL));
4652
4653 #if     (DEBUG_HW > 0)  /* Dont output these errors normally */
4654         device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4655             E1000_READ_REG(&adapter->hw, E1000_IMS),
4656             E1000_READ_REG(&adapter->hw, E1000_EIMS));
4657 #endif
4658
4659         device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4660             ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4661             (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4662         device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4663             adapter->hw.fc.high_water,
4664             adapter->hw.fc.low_water);
4665
4666         for (int i = 0; i < adapter->num_queues; i++, txr++) {
4667                 device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
4668                     E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4669                     E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4670                 device_printf(dev, "TX(%d) no descriptors avail event = %lld\n",
4671                     txr->me, (long long)txr->no_desc_avail);
4672                 device_printf(dev, "TX(%d) MSIX IRQ Handled = %lld\n", txr->me,
4673                     (long long)txr->tx_irq);
4674                 device_printf(dev, "TX(%d) Packets sent = %lld\n", txr->me,
4675                     (long long)txr->tx_packets);
4676         }
4677
4678         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4679                 struct lro_ctrl *lro = &rxr->lro;
4680                 device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4681                     E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4682                     E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4683                 device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4684                     (long long)rxr->rx_packets);
4685                 device_printf(dev, "RX(%d) Split Packets = %lld\n", rxr->me,
4686                     (long long)rxr->rx_split_packets);
4687                 device_printf(dev, "RX(%d) Byte count = %lld\n", rxr->me,
4688                     (long long)rxr->rx_bytes);
4689                 device_printf(dev, "RX(%d) MSIX IRQ Handled = %lld\n", rxr->me,
4690                     (long long)rxr->rx_irq);
4691                 device_printf(dev,"RX(%d) LRO Queued= %d\n",
4692                     rxr->me, lro->lro_queued);
4693                 device_printf(dev,"RX(%d) LRO Flushed= %d\n",
4694                     rxr->me, lro->lro_flushed);
4695         }
4696
4697         device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4698
4699         device_printf(dev, "Mbuf defrag failed = %ld\n",
4700             adapter->mbuf_defrag_failed);
4701         device_printf(dev, "Std mbuf header failed = %ld\n",
4702             adapter->mbuf_header_failed);
4703         device_printf(dev, "Std mbuf packet failed = %ld\n",
4704             adapter->mbuf_packet_failed);
4705         device_printf(dev, "Driver dropped packets = %ld\n",
4706             adapter->dropped_pkts);
4707         device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4708                 adapter->no_tx_dma_setup);
4709 }
4710
4711 static void
4712 igb_print_hw_stats(struct adapter *adapter)
4713 {
4714         device_t dev = adapter->dev;
4715
4716         device_printf(dev, "Excessive collisions = %lld\n",
4717             (long long)adapter->stats.ecol);
4718 #if     (DEBUG_HW > 0)  /* Dont output these errors normally */
4719         device_printf(dev, "Symbol errors = %lld\n",
4720             (long long)adapter->stats.symerrs);
4721 #endif
4722         device_printf(dev, "Sequence errors = %lld\n",
4723             (long long)adapter->stats.sec);
4724         device_printf(dev, "Defer count = %lld\n",
4725             (long long)adapter->stats.dc);
4726         device_printf(dev, "Missed Packets = %lld\n",
4727             (long long)adapter->stats.mpc);
4728         device_printf(dev, "Receive No Buffers = %lld\n",
4729             (long long)adapter->stats.rnbc);
4730         /* RLEC is inaccurate on some hardware, calculate our own. */
4731         device_printf(dev, "Receive Length Errors = %lld\n",
4732             ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4733         device_printf(dev, "Receive errors = %lld\n",
4734             (long long)adapter->stats.rxerrc);
4735         device_printf(dev, "Crc errors = %lld\n",
4736             (long long)adapter->stats.crcerrs);
4737         device_printf(dev, "Alignment errors = %lld\n",
4738             (long long)adapter->stats.algnerrc);
4739         /* On 82575 these are collision counts */
4740         device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4741             (long long)adapter->stats.cexterr);
4742         device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4743         device_printf(dev, "watchdog timeouts = %ld\n",
4744             adapter->watchdog_events);
4745         device_printf(dev, "XON Rcvd = %lld\n",
4746             (long long)adapter->stats.xonrxc);
4747         device_printf(dev, "XON Xmtd = %lld\n",
4748             (long long)adapter->stats.xontxc);
4749         device_printf(dev, "XOFF Rcvd = %lld\n",
4750             (long long)adapter->stats.xoffrxc);
4751         device_printf(dev, "XOFF Xmtd = %lld\n",
4752             (long long)adapter->stats.xofftxc);
4753         device_printf(dev, "Good Packets Rcvd = %lld\n",
4754             (long long)adapter->stats.gprc);
4755         device_printf(dev, "Good Packets Xmtd = %lld\n",
4756             (long long)adapter->stats.gptc);
4757         device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4758             (long long)adapter->stats.tsctc);
4759         device_printf(dev, "TSO Contexts Failed = %lld\n",
4760             (long long)adapter->stats.tsctfc);
4761 }
4762
4763 /**********************************************************************
4764  *
4765  *  This routine provides a way to dump out the adapter eeprom,
4766  *  often a useful debug/service tool. This only dumps the first
4767  *  32 words, stuff that matters is in that extent.
4768  *
4769  **********************************************************************/
4770 static void
4771 igb_print_nvm_info(struct adapter *adapter)
4772 {
4773         u16     eeprom_data;
4774         int     i, j, row = 0;
4775
4776         /* Its a bit crude, but it gets the job done */
4777         printf("\nInterface EEPROM Dump:\n");
4778         printf("Offset\n0x0000  ");
4779         for (i = 0, j = 0; i < 32; i++, j++) {
4780                 if (j == 8) { /* Make the offset block */
4781                         j = 0; ++row;
4782                         printf("\n0x00%x0  ",row);
4783                 }
4784                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4785                 printf("%04x ", eeprom_data);
4786         }
4787         printf("\n");
4788 }
4789
4790 static int
4791 igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4792 {
4793         struct adapter *adapter;
4794         int error;
4795         int result;
4796
4797         result = -1;
4798         error = sysctl_handle_int(oidp, &result, 0, req);
4799
4800         if (error || !req->newptr)
4801                 return (error);
4802
4803         if (result == 1) {
4804                 adapter = (struct adapter *)arg1;
4805                 igb_print_debug_info(adapter);
4806         }
4807         /*
4808          * This value will cause a hex dump of the
4809          * first 32 16-bit words of the EEPROM to
4810          * the screen.
4811          */
4812         if (result == 2) {
4813                 adapter = (struct adapter *)arg1;
4814                 igb_print_nvm_info(adapter);
4815         }
4816
4817         return (error);
4818 }
4819
4820
4821 static int
4822 igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
4823 {
4824         struct adapter *adapter;
4825         int error;
4826         int result;
4827
4828         result = -1;
4829         error = sysctl_handle_int(oidp, &result, 0, req);
4830
4831         if (error || !req->newptr)
4832                 return (error);
4833
4834         if (result == 1) {
4835                 adapter = (struct adapter *)arg1;
4836                 igb_print_hw_stats(adapter);
4837         }
4838
4839         return (error);
4840 }
4841
4842 static void
4843 igb_add_rx_process_limit(struct adapter *adapter, const char *name,
4844         const char *description, int *limit, int value)
4845 {
4846         *limit = value;
4847         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4848             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4849             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4850 }
4851
4852 #ifdef IGB_IEEE1588
4853 /*
4854 ** igb_hwtstamp_ioctl - control hardware time stamping
4855 **
4856 ** Outgoing time stamping can be enabled and disabled. Play nice and
4857 ** disable it when requested, although it shouldn't case any overhead
4858 ** when no packet needs it. At most one packet in the queue may be
4859 ** marked for time stamping, otherwise it would be impossible to tell
4860 ** for sure to which packet the hardware time stamp belongs.
4861 **
4862 ** Incoming time stamping has to be configured via the hardware
4863 ** filters. Not all combinations are supported, in particular event
4864 ** type has to be specified. Matching the kind of event packet is
4865 ** not supported, with the exception of "all V2 events regardless of
4866 ** level 2 or 4".
4867 **
4868 */
4869 static int
4870 igb_hwtstamp_ioctl(struct adapter *adapter, struct ifreq *ifr)
4871 {
4872         struct e1000_hw *hw = &adapter->hw;
4873         struct hwtstamp_ctrl *config;
4874         u32 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
4875         u32 tsync_rx_ctl_bit = E1000_TSYNCRXCTL_ENABLED;
4876         u32 tsync_rx_ctl_type = 0;
4877         u32 tsync_rx_cfg = 0;
4878         int is_l4 = 0;
4879         int is_l2 = 0;
4880         u16 port = 319; /* PTP */
4881         u32 regval;
4882
4883         config = (struct hwtstamp_ctrl *) ifr->ifr_data;
4884
4885         /* reserved for future extensions */
4886         if (config->flags)
4887                 return (EINVAL);
4888
4889         switch (config->tx_type) {
4890         case HWTSTAMP_TX_OFF:
4891                 tsync_tx_ctl_bit = 0;
4892                 break;
4893         case HWTSTAMP_TX_ON:
4894                 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
4895                 break;
4896         default:
4897                 return (ERANGE);
4898         }
4899
4900         switch (config->rx_filter) {
4901         case HWTSTAMP_FILTER_NONE:
4902                 tsync_rx_ctl_bit = 0;
4903                 break;
4904         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
4905         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
4906         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
4907         case HWTSTAMP_FILTER_ALL:
4908                 /*
4909                  * register TSYNCRXCFG must be set, therefore it is not
4910                  * possible to time stamp both Sync and Delay_Req messages
4911                  * => fall back to time stamping all packets
4912                  */
4913                 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_ALL;
4914                 config->rx_filter = HWTSTAMP_FILTER_ALL;
4915                 break;
4916         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
4917                 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
4918                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
4919                 is_l4 = 1;
4920                 break;
4921         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
4922                 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
4923                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
4924                 is_l4 = 1;
4925                 break;
4926         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
4927         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
4928                 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
4929                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
4930                 is_l2 = 1;
4931                 is_l4 = 1;
4932                 config->rx_filter = HWTSTAMP_FILTER_SOME;
4933                 break;
4934         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
4935         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
4936                 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
4937                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
4938                 is_l2 = 1;
4939                 is_l4 = 1;
4940                 config->rx_filter = HWTSTAMP_FILTER_SOME;
4941                 break;
4942         case HWTSTAMP_FILTER_PTP_V2_EVENT:
4943         case HWTSTAMP_FILTER_PTP_V2_SYNC:
4944         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
4945                 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_EVENT_V2;
4946                 config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
4947                 is_l2 = 1;
4948                 break;
4949         default:
4950                 return -ERANGE;
4951         }
4952
4953         /* enable/disable TX */
4954         regval = E1000_READ_REG(hw, E1000_TSYNCTXCTL);
4955         regval = (regval & ~E1000_TSYNCTXCTL_ENABLED) | tsync_tx_ctl_bit;
4956         E1000_WRITE_REG(hw, E1000_TSYNCTXCTL, regval);
4957
4958         /* enable/disable RX, define which PTP packets are time stamped */
4959         regval = E1000_READ_REG(hw, E1000_TSYNCRXCTL);
4960         regval = (regval & ~E1000_TSYNCRXCTL_ENABLED) | tsync_rx_ctl_bit;
4961         regval = (regval & ~0xE) | tsync_rx_ctl_type;
4962         E1000_WRITE_REG(hw, E1000_TSYNCRXCTL, regval);
4963         E1000_WRITE_REG(hw, E1000_TSYNCRXCFG, tsync_rx_cfg);
4964
4965         /*
4966          * Ethertype Filter Queue Filter[0][15:0] = 0x88F7
4967          *                                          (Ethertype to filter on)
4968          * Ethertype Filter Queue Filter[0][26] = 0x1 (Enable filter)
4969          * Ethertype Filter Queue Filter[0][30] = 0x1 (Enable Timestamping)
4970          */
4971         E1000_WRITE_REG(hw, E1000_ETQF0, is_l2 ? 0x440088f7 : 0);
4972
4973         /* L4 Queue Filter[0]: only filter by source and destination port */
4974         E1000_WRITE_REG(hw, E1000_SPQF0, htons(port));
4975         E1000_WRITE_REG(hw, E1000_IMIREXT(0), is_l4 ?
4976              ((1<<12) | (1<<19) /* bypass size and control flags */) : 0);
4977         E1000_WRITE_REG(hw, E1000_IMIR(0), is_l4 ?
4978              (htons(port)
4979               | (0<<16) /* immediate interrupt disabled */
4980               | 0 /* (1<<17) bit cleared: do not bypass
4981                      destination port check */)
4982                 : 0);
4983         E1000_WRITE_REG(hw, E1000_FTQF0, is_l4 ?
4984              (0x11 /* UDP */
4985               | (1<<15) /* VF not compared */
4986               | (1<<27) /* Enable Timestamping */
4987               | (7<<28) /* only source port filter enabled,
4988                            source/target address and protocol
4989                            masked */)
4990              : ((1<<15) | (15<<28) /* all mask bits set = filter not
4991                                       enabled */));
4992
4993         wrfl();
4994
4995         adapter->hwtstamp_ctrl = config;
4996
4997         /* clear TX/RX time stamp registers, just to be sure */
4998         regval = E1000_READ_REG(hw, E1000_TXSTMPH);
4999         regval = E1000_READ_REG(hw, E1000_RXSTMPH);
5000
5001         return (error);
5002 }
5003
5004 /*
5005 ** igb_read_clock - read raw cycle counter (to be used by time counter)
5006 */
5007 static cycle_t igb_read_clock(const struct cyclecounter *tc)
5008 {
5009        struct igb_adapter *adapter =
5010                container_of(tc, struct igb_adapter, cycles);
5011        struct e1000_hw *hw = &adapter->hw;
5012        u64 stamp;
5013
5014        stamp =  E1000_READ_REG(hw, E1000_SYSTIML);
5015        stamp |= (u64)E1000_READ_REG(hw, E1000_SYSTIMH) << 32ULL;
5016
5017        return (stamp);
5018 }
5019
5020 #endif /* IGB_IEEE1588 */