]> CyberLeo.Net >> Repos - FreeBSD/releng/8.0.git/blob - sys/dev/e1000/if_igb.c
Adjust to reflect 8.0-RELEASE.
[FreeBSD/releng/8.0.git] / sys / dev / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2009, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #ifdef HAVE_KERNEL_OPTION_HEADERS
37 #include "opt_device_polling.h"
38 #include "opt_inet.h"
39 #endif
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #if __FreeBSD_version >= 800000
44 #include <sys/buf_ring.h>
45 #endif
46 #include <sys/bus.h>
47 #include <sys/endian.h>
48 #include <sys/kernel.h>
49 #include <sys/kthread.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/rman.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/taskqueue.h>
58 #include <sys/eventhandler.h>
59 #include <sys/pcpu.h>
60 #include <sys/smp.h>
61 #include <machine/smp.h>
62 #include <machine/bus.h>
63 #include <machine/resource.h>
64
65 #ifdef IGB_IEEE1588
66 #include <sys/ieee1588.h>
67 #endif
68
69 #include <net/bpf.h>
70 #include <net/ethernet.h>
71 #include <net/if.h>
72 #include <net/if_arp.h>
73 #include <net/if_dl.h>
74 #include <net/if_media.h>
75
76 #include <net/if_types.h>
77 #include <net/if_vlan_var.h>
78
79 #include <netinet/in_systm.h>
80 #include <netinet/in.h>
81 #include <netinet/if_ether.h>
82 #include <netinet/ip.h>
83 #include <netinet/ip6.h>
84 #include <netinet/tcp.h>
85 #include <netinet/tcp_lro.h>
86 #include <netinet/udp.h>
87
88 #include <machine/in_cksum.h>
89 #include <dev/pci/pcivar.h>
90 #include <dev/pci/pcireg.h>
91
92 #include "e1000_api.h"
93 #include "e1000_82575.h"
94 #include "if_igb.h"
95
96 /*********************************************************************
97  *  Set this to one to display debug statistics
98  *********************************************************************/
99 int     igb_display_debug_stats = 0;
100
101 /*********************************************************************
102  *  Driver version:
103  *********************************************************************/
104 char igb_driver_version[] = "version - 1.7.3";
105
106
107 /*********************************************************************
108  *  PCI Device ID Table
109  *
110  *  Used by probe to select devices to load on
111  *  Last field stores an index into e1000_strings
112  *  Last entry must be all 0s
113  *
114  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
115  *********************************************************************/
116
117 static igb_vendor_info_t igb_vendor_info_array[] =
118 {
119         { 0x8086, E1000_DEV_ID_82575EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
123                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82576,           PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82576_NS,        PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82576_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82576_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
129                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
131                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
132         /* required last entry */
133         { 0, 0, 0, 0, 0}
134 };
135
136 /*********************************************************************
137  *  Table of branding strings for all supported NICs.
138  *********************************************************************/
139
140 static char *igb_strings[] = {
141         "Intel(R) PRO/1000 Network Connection"
142 };
143
144 /*********************************************************************
145  *  Function prototypes
146  *********************************************************************/
147 static int      igb_probe(device_t);
148 static int      igb_attach(device_t);
149 static int      igb_detach(device_t);
150 static int      igb_shutdown(device_t);
151 static int      igb_suspend(device_t);
152 static int      igb_resume(device_t);
153 static void     igb_start(struct ifnet *);
154 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
155 #if __FreeBSD_version >= 800000
156 static int      igb_mq_start(struct ifnet *, struct mbuf *);
157 static int      igb_mq_start_locked(struct ifnet *,
158                     struct tx_ring *, struct mbuf *);
159 static void     igb_qflush(struct ifnet *);
160 #endif
161 static int      igb_ioctl(struct ifnet *, u_long, caddr_t);
162 static void     igb_watchdog(struct adapter *);
163 static void     igb_init(void *);
164 static void     igb_init_locked(struct adapter *);
165 static void     igb_stop(void *);
166 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
167 static int      igb_media_change(struct ifnet *);
168 static void     igb_identify_hardware(struct adapter *);
169 static int      igb_allocate_pci_resources(struct adapter *);
170 static int      igb_allocate_msix(struct adapter *);
171 static int      igb_allocate_legacy(struct adapter *);
172 static int      igb_setup_msix(struct adapter *);
173 static void     igb_free_pci_resources(struct adapter *);
174 static void     igb_local_timer(void *);
175 static int      igb_hardware_init(struct adapter *);
176 static void     igb_setup_interface(device_t, struct adapter *);
177 static int      igb_allocate_queues(struct adapter *);
178 static void     igb_configure_queues(struct adapter *);
179
180 static int      igb_allocate_transmit_buffers(struct tx_ring *);
181 static void     igb_setup_transmit_structures(struct adapter *);
182 static void     igb_setup_transmit_ring(struct tx_ring *);
183 static void     igb_initialize_transmit_units(struct adapter *);
184 static void     igb_free_transmit_structures(struct adapter *);
185 static void     igb_free_transmit_buffers(struct tx_ring *);
186
187 static int      igb_allocate_receive_buffers(struct rx_ring *);
188 static int      igb_setup_receive_structures(struct adapter *);
189 static int      igb_setup_receive_ring(struct rx_ring *);
190 static void     igb_initialize_receive_units(struct adapter *);
191 static void     igb_free_receive_structures(struct adapter *);
192 static void     igb_free_receive_buffers(struct rx_ring *);
193
194 static void     igb_enable_intr(struct adapter *);
195 static void     igb_disable_intr(struct adapter *);
196 static void     igb_update_stats_counters(struct adapter *);
197 static bool     igb_txeof(struct tx_ring *);
198 static bool     igb_rxeof(struct rx_ring *, int);
199 static void     igb_rx_checksum(u32, struct mbuf *, bool);
200 static int      igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
201 static bool     igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
202 static void     igb_set_promisc(struct adapter *);
203 static void     igb_disable_promisc(struct adapter *);
204 static void     igb_set_multi(struct adapter *);
205 static void     igb_print_hw_stats(struct adapter *);
206 static void     igb_update_link_status(struct adapter *);
207 static int      igb_get_buf(struct rx_ring *, int, u8);
208
209 static void     igb_register_vlan(void *, struct ifnet *, u16);
210 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
211 static void     igb_setup_vlan_hw_support(struct adapter *);
212
213 static int      igb_xmit(struct tx_ring *, struct mbuf **);
214 static int      igb_dma_malloc(struct adapter *, bus_size_t,
215                     struct igb_dma_alloc *, int);
216 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
217 static void     igb_print_debug_info(struct adapter *);
218 static void     igb_print_nvm_info(struct adapter *);
219 static int      igb_is_valid_ether_addr(u8 *);
220 static int      igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
221 static int      igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
222 /* Management and WOL Support */
223 static void     igb_init_manageability(struct adapter *);
224 static void     igb_release_manageability(struct adapter *);
225 static void     igb_get_hw_control(struct adapter *);
226 static void     igb_release_hw_control(struct adapter *);
227 static void     igb_enable_wakeup(device_t);
228
229 static int      igb_irq_fast(void *);
230 static void     igb_add_rx_process_limit(struct adapter *, const char *,
231                     const char *, int *, int);
232 static void     igb_handle_rxtx(void *context, int pending);
233 static void     igb_handle_tx(void *context, int pending);
234 static void     igb_handle_rx(void *context, int pending);
235
236 /* These are MSIX only irq handlers */
237 static void     igb_msix_rx(void *);
238 static void     igb_msix_tx(void *);
239 static void     igb_msix_link(void *);
240
241 /* Adaptive Interrupt Moderation */
242 static void     igb_update_aim(struct rx_ring *);
243
244 /*********************************************************************
245  *  FreeBSD Device Interface Entry Points
246  *********************************************************************/
247
248 static device_method_t igb_methods[] = {
249         /* Device interface */
250         DEVMETHOD(device_probe, igb_probe),
251         DEVMETHOD(device_attach, igb_attach),
252         DEVMETHOD(device_detach, igb_detach),
253         DEVMETHOD(device_shutdown, igb_shutdown),
254         DEVMETHOD(device_suspend, igb_suspend),
255         DEVMETHOD(device_resume, igb_resume),
256         {0, 0}
257 };
258
259 static driver_t igb_driver = {
260         "igb", igb_methods, sizeof(struct adapter),
261 };
262
263 static devclass_t igb_devclass;
264 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
265 MODULE_DEPEND(igb, pci, 1, 1, 1);
266 MODULE_DEPEND(igb, ether, 1, 1, 1);
267
268 /*********************************************************************
269  *  Tunable default values.
270  *********************************************************************/
271
272 /* Descriptor defaults */
273 static int igb_rxd = IGB_DEFAULT_RXD;
274 static int igb_txd = IGB_DEFAULT_TXD;
275 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
276 TUNABLE_INT("hw.igb.txd", &igb_txd);
277
278 /*
279 ** These parameters are used in Adaptive
280 ** Interrupt Moderation. The value is set
281 ** into EITR and controls the interrupt
282 ** frequency. A variable static scheme can
283 ** be created by changing the assigned value
284 ** of igb_ave_latency to the desired value,
285 ** and then set igb_enable_aim to FALSE.
286 ** This will result in all EITR registers
287 ** getting set to that value statically.
288 */
289 static int igb_enable_aim = TRUE;
290 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
291 static int igb_low_latency = IGB_LOW_LATENCY;
292 TUNABLE_INT("hw.igb.low_latency", &igb_low_latency);
293 static int igb_ave_latency = IGB_AVE_LATENCY;
294 TUNABLE_INT("hw.igb.ave_latency", &igb_ave_latency);
295 static int igb_bulk_latency = IGB_BULK_LATENCY;
296 TUNABLE_INT("hw.igb.bulk_latency", &igb_bulk_latency);
297                 
298 /*
299 ** This will autoconfigure based on the number
300 ** of CPUs if set to 0. Only a matched pair of
301 ** TX and RX rings are allowed.
302 */
303 static int igb_num_queues = 1;
304 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
305
306 /* How many packets rxeof tries to clean at a time */
307 static int igb_rx_process_limit = 100;
308 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
309
310 /* Flow control setting - default to FULL */
311 static int igb_fc_setting = e1000_fc_full;
312 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
313
314 /*
315 ** Shadow VFTA table, this is needed because
316 ** the real filter table gets cleared during
317 ** a soft reset and the driver needs to be able
318 ** to repopulate it.
319 */
320 static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
321
322
323 /*********************************************************************
324  *  Device identification routine
325  *
326  *  igb_probe determines if the driver should be loaded on
327  *  adapter based on PCI vendor/device id of the adapter.
328  *
329  *  return BUS_PROBE_DEFAULT on success, positive on failure
330  *********************************************************************/
331
332 static int
333 igb_probe(device_t dev)
334 {
335         char            adapter_name[60];
336         uint16_t        pci_vendor_id = 0;
337         uint16_t        pci_device_id = 0;
338         uint16_t        pci_subvendor_id = 0;
339         uint16_t        pci_subdevice_id = 0;
340         igb_vendor_info_t *ent;
341
342         INIT_DEBUGOUT("igb_probe: begin");
343
344         pci_vendor_id = pci_get_vendor(dev);
345         if (pci_vendor_id != IGB_VENDOR_ID)
346                 return (ENXIO);
347
348         pci_device_id = pci_get_device(dev);
349         pci_subvendor_id = pci_get_subvendor(dev);
350         pci_subdevice_id = pci_get_subdevice(dev);
351
352         ent = igb_vendor_info_array;
353         while (ent->vendor_id != 0) {
354                 if ((pci_vendor_id == ent->vendor_id) &&
355                     (pci_device_id == ent->device_id) &&
356
357                     ((pci_subvendor_id == ent->subvendor_id) ||
358                     (ent->subvendor_id == PCI_ANY_ID)) &&
359
360                     ((pci_subdevice_id == ent->subdevice_id) ||
361                     (ent->subdevice_id == PCI_ANY_ID))) {
362                         sprintf(adapter_name, "%s %s",
363                                 igb_strings[ent->index],
364                                 igb_driver_version);
365                         device_set_desc_copy(dev, adapter_name);
366                         return (BUS_PROBE_DEFAULT);
367                 }
368                 ent++;
369         }
370
371         return (ENXIO);
372 }
373
374 /*********************************************************************
375  *  Device initialization routine
376  *
377  *  The attach entry point is called when the driver is being loaded.
378  *  This routine identifies the type of hardware, allocates all resources
379  *  and initializes the hardware.
380  *
381  *  return 0 on success, positive on failure
382  *********************************************************************/
383
384 static int
385 igb_attach(device_t dev)
386 {
387         struct adapter  *adapter;
388         int             error = 0;
389         u16             eeprom_data;
390
391         INIT_DEBUGOUT("igb_attach: begin");
392
393         adapter = device_get_softc(dev);
394         adapter->dev = adapter->osdep.dev = dev;
395         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
396
397         /* SYSCTL stuff */
398         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
399             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
400             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
401             igb_sysctl_debug_info, "I", "Debug Information");
402
403         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
404             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
405             OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
406             igb_sysctl_stats, "I", "Statistics");
407
408         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
409             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
410             OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
411             &igb_fc_setting, 0, "Flow Control");
412
413         SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
414             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
415             OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
416             &igb_enable_aim, 1, "Interrupt Moderation");
417
418         SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
419             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
420             OID_AUTO, "low_latency", CTLTYPE_INT|CTLFLAG_RW,
421             &igb_low_latency, 1, "Low Latency");
422                 
423         SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
424             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
425             OID_AUTO, "ave_latency", CTLTYPE_INT|CTLFLAG_RW,
426             &igb_ave_latency, 1, "Average Latency");
427
428         SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
429             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
430             OID_AUTO, "bulk_latency", CTLTYPE_INT|CTLFLAG_RW,
431             &igb_bulk_latency, 1, "Bulk Latency");
432  
433         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
434
435         /* Determine hardware and mac info */
436         igb_identify_hardware(adapter);
437
438         /* Setup PCI resources */
439         if (igb_allocate_pci_resources(adapter)) {
440                 device_printf(dev, "Allocation of PCI resources failed\n");
441                 error = ENXIO;
442                 goto err_pci;
443         }
444
445         /* Do Shared Code initialization */
446         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
447                 device_printf(dev, "Setup of Shared code failed\n");
448                 error = ENXIO;
449                 goto err_pci;
450         }
451
452         e1000_get_bus_info(&adapter->hw);
453
454         /* Sysctls for limiting the amount of work done in the taskqueue */
455         igb_add_rx_process_limit(adapter, "rx_processing_limit",
456             "max number of rx packets to process", &adapter->rx_process_limit,
457             igb_rx_process_limit);
458
459         /*
460          * Validate number of transmit and receive descriptors. It
461          * must not exceed hardware maximum, and must be multiple
462          * of E1000_DBA_ALIGN.
463          */
464         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
465             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
466                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
467                     IGB_DEFAULT_TXD, igb_txd);
468                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
469         } else
470                 adapter->num_tx_desc = igb_txd;
471         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
472             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
473                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
474                     IGB_DEFAULT_RXD, igb_rxd);
475                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
476         } else
477                 adapter->num_rx_desc = igb_rxd;
478
479         adapter->hw.mac.autoneg = DO_AUTO_NEG;
480         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
481         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
482
483         /* Copper options */
484         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
485                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
486                 adapter->hw.phy.disable_polarity_correction = FALSE;
487                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
488         }
489
490         /*
491          * Set the frame limits assuming
492          * standard ethernet sized frames.
493          */
494         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
495         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
496
497         /*
498         ** Allocate and Setup Queues
499         */
500         if (igb_allocate_queues(adapter)) {
501                 error = ENOMEM;
502                 goto err_pci;
503         }
504
505         /*
506         ** Start from a known state, this is
507         ** important in reading the nvm and
508         ** mac from that.
509         */
510         e1000_reset_hw(&adapter->hw);
511
512         /* Make sure we have a good EEPROM before we read from it */
513         if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
514                 /*
515                 ** Some PCI-E parts fail the first check due to
516                 ** the link being in sleep state, call it again,
517                 ** if it fails a second time its a real issue.
518                 */
519                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
520                         device_printf(dev,
521                             "The EEPROM Checksum Is Not Valid\n");
522                         error = EIO;
523                         goto err_late;
524                 }
525         }
526
527         /*
528         ** Copy the permanent MAC address out of the EEPROM
529         */
530         if (e1000_read_mac_addr(&adapter->hw) < 0) {
531                 device_printf(dev, "EEPROM read error while reading MAC"
532                     " address\n");
533                 error = EIO;
534                 goto err_late;
535         }
536         /* Check its sanity */
537         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
538                 device_printf(dev, "Invalid MAC address\n");
539                 error = EIO;
540                 goto err_late;
541         }
542
543         /* Now Initialize the hardware */
544         if (igb_hardware_init(adapter)) {
545                 device_printf(dev, "Unable to initialize the hardware\n");
546                 error = EIO;
547                 goto err_late;
548         }
549
550         /* 
551         ** Configure Interrupts
552         */
553         if (adapter->msix > 1) /* MSIX */
554                 error = igb_allocate_msix(adapter);
555         else /* MSI or Legacy */
556                 error = igb_allocate_legacy(adapter);
557         if (error)
558                 goto err_late;
559
560         /* Setup OS specific network interface */
561         igb_setup_interface(dev, adapter);
562
563 #ifdef IGB_IEEE1588
564         /*
565         ** Setup the timer: IEEE 1588 support
566         */
567         adapter->cycles.read = igb_read_clock;
568         adapter->cycles.mask = (u64)-1;
569         adapter->cycles.mult = 1;
570         adapter->cycles.shift = IGB_TSYNC_SHIFT;
571         E1000_WRITE_REG(&adapter->hw, E1000_TIMINCA, (1<<24) |
572             IGB_TSYNC_CYCLE_TIME * IGB_TSYNC_SHIFT);
573         E1000_WRITE_REG(&adapter->hw, E1000_SYSTIML, 0x00000000);
574         E1000_WRITE_REG(&adapter->hw, E1000_SYSTIMH, 0xFF800000);
575
576         // JFV - this is not complete yet
577 #endif
578
579         /* Initialize statistics */
580         igb_update_stats_counters(adapter);
581
582         adapter->hw.mac.get_link_status = 1;
583         igb_update_link_status(adapter);
584
585         /* Indicate SOL/IDER usage */
586         if (e1000_check_reset_block(&adapter->hw))
587                 device_printf(dev,
588                     "PHY reset is blocked due to SOL/IDER session.\n");
589
590         /* Determine if we have to control management hardware */
591         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
592
593         /*
594          * Setup Wake-on-Lan
595          */
596         /* APME bit in EEPROM is mapped to WUC.APME */
597         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
598         if (eeprom_data)
599                 adapter->wol = E1000_WUFC_MAG;
600
601         /* Register for VLAN events */
602         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
603              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
604         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
605              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
606
607         /* Tell the stack that the interface is not active */
608         adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
609
610         INIT_DEBUGOUT("igb_attach: end");
611
612         return (0);
613
614 err_late:
615         igb_free_transmit_structures(adapter);
616         igb_free_receive_structures(adapter);
617         igb_release_hw_control(adapter);
618 err_pci:
619         igb_free_pci_resources(adapter);
620         IGB_CORE_LOCK_DESTROY(adapter);
621
622         return (error);
623 }
624
625 /*********************************************************************
626  *  Device removal routine
627  *
628  *  The detach entry point is called when the driver is being removed.
629  *  This routine stops the adapter and deallocates all the resources
630  *  that were allocated for driver operation.
631  *
632  *  return 0 on success, positive on failure
633  *********************************************************************/
634
635 static int
636 igb_detach(device_t dev)
637 {
638         struct adapter  *adapter = device_get_softc(dev);
639         struct ifnet    *ifp = adapter->ifp;
640
641         INIT_DEBUGOUT("igb_detach: begin");
642
643         /* Make sure VLANS are not using driver */
644         if (adapter->ifp->if_vlantrunk != NULL) {
645                 device_printf(dev,"Vlan in use, detach first\n");
646                 return (EBUSY);
647         }
648
649         IGB_CORE_LOCK(adapter);
650         adapter->in_detach = 1;
651         igb_stop(adapter);
652         IGB_CORE_UNLOCK(adapter);
653
654         e1000_phy_hw_reset(&adapter->hw);
655
656         /* Give control back to firmware */
657         igb_release_manageability(adapter);
658         igb_release_hw_control(adapter);
659
660         if (adapter->wol) {
661                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
662                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
663                 igb_enable_wakeup(dev);
664         }
665
666         /* Unregister VLAN events */
667         if (adapter->vlan_attach != NULL)
668                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
669         if (adapter->vlan_detach != NULL)
670                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
671
672         ether_ifdetach(adapter->ifp);
673
674         callout_drain(&adapter->timer);
675
676         igb_free_pci_resources(adapter);
677         bus_generic_detach(dev);
678         if_free(ifp);
679
680         igb_free_transmit_structures(adapter);
681         igb_free_receive_structures(adapter);
682
683         IGB_CORE_LOCK_DESTROY(adapter);
684
685         return (0);
686 }
687
688 /*********************************************************************
689  *
690  *  Shutdown entry point
691  *
692  **********************************************************************/
693
694 static int
695 igb_shutdown(device_t dev)
696 {
697         return igb_suspend(dev);
698 }
699
700 /*
701  * Suspend/resume device methods.
702  */
703 static int
704 igb_suspend(device_t dev)
705 {
706         struct adapter *adapter = device_get_softc(dev);
707
708         IGB_CORE_LOCK(adapter);
709
710         igb_stop(adapter);
711
712         igb_release_manageability(adapter);
713         igb_release_hw_control(adapter);
714
715         if (adapter->wol) {
716                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
717                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
718                 igb_enable_wakeup(dev);
719         }
720
721         IGB_CORE_UNLOCK(adapter);
722
723         return bus_generic_suspend(dev);
724 }
725
726 static int
727 igb_resume(device_t dev)
728 {
729         struct adapter *adapter = device_get_softc(dev);
730         struct ifnet *ifp = adapter->ifp;
731
732         IGB_CORE_LOCK(adapter);
733         igb_init_locked(adapter);
734         igb_init_manageability(adapter);
735
736         if ((ifp->if_flags & IFF_UP) &&
737             (ifp->if_drv_flags & IFF_DRV_RUNNING))
738                 igb_start(ifp);
739
740         IGB_CORE_UNLOCK(adapter);
741
742         return bus_generic_resume(dev);
743 }
744
745
746 /*********************************************************************
747  *  Transmit entry point
748  *
749  *  igb_start is called by the stack to initiate a transmit.
750  *  The driver will remain in this routine as long as there are
751  *  packets to transmit and transmit resources are available.
752  *  In case resources are not available stack is notified and
753  *  the packet is requeued.
754  **********************************************************************/
755
756 static void
757 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
758 {
759         struct adapter  *adapter = ifp->if_softc;
760         struct mbuf     *m_head;
761
762         IGB_TX_LOCK_ASSERT(txr);
763
764         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
765             IFF_DRV_RUNNING)
766                 return;
767         if (!adapter->link_active)
768                 return;
769
770         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
771
772                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
773                 if (m_head == NULL)
774                         break;
775                 /*
776                  *  Encapsulation can modify our pointer, and or make it
777                  *  NULL on failure.  In that event, we can't requeue.
778                  */
779                 if (igb_xmit(txr, &m_head)) {
780                         if (m_head == NULL)
781                                 break;
782                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
783                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
784                         break;
785                 }
786
787                 /* Send a copy of the frame to the BPF listener */
788                 ETHER_BPF_MTAP(ifp, m_head);
789
790                 /* Set timeout in case hardware has problems transmitting. */
791                 txr->watchdog_timer = IGB_TX_TIMEOUT;
792         }
793 }
794  
795 /*
796  * Legacy TX driver routine, called from the
797  * stack, always uses tx[0], and spins for it.
798  * Should not be used with multiqueue tx
799  */
800 static void
801 igb_start(struct ifnet *ifp)
802 {
803         struct adapter  *adapter = ifp->if_softc;
804         struct tx_ring  *txr = adapter->tx_rings;
805
806         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
807                 IGB_TX_LOCK(txr);
808                 igb_start_locked(txr, ifp);
809                 IGB_TX_UNLOCK(txr);
810         }
811         return;
812 }
813
814 #if __FreeBSD_version >= 800000
815 /*
816 ** Multiqueue Transmit driver
817 **
818 */
819 static int
820 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
821 {
822         struct adapter  *adapter = ifp->if_softc;
823         struct tx_ring  *txr;
824         int             i = 0, err = 0;
825
826         /* Which queue to use */
827         if ((m->m_flags & M_FLOWID) != 0)
828                 i = m->m_pkthdr.flowid % adapter->num_queues;
829         txr = &adapter->tx_rings[i];
830
831         if (IGB_TX_TRYLOCK(txr)) {
832                 err = igb_mq_start_locked(ifp, txr, m);
833                 IGB_TX_UNLOCK(txr);
834         } else
835                 err = drbr_enqueue(ifp, txr->br, m);
836
837         return (err);
838 }
839
840 static int
841 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
842 {
843         struct adapter  *adapter = txr->adapter;
844         struct mbuf     *next;
845         int             err = 0;
846
847         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
848                 err = drbr_enqueue(ifp, txr->br, m);
849                 return (err);
850         }
851
852         if (m == NULL) /* Called by tasklet */
853                 goto process;
854
855         /* If nothing queued go right to xmit */
856         if (drbr_empty(ifp, txr->br)) {
857                 if ((err = igb_xmit(txr, &m)) != 0) {
858                         if (m != NULL)
859                                 err = drbr_enqueue(ifp, txr->br, m);
860                         return (err);
861                 } else {
862                         /* Success, update stats */
863                         drbr_stats_update(ifp, m->m_pkthdr.len, m->m_flags);
864                         /* Send a copy of the frame to the BPF listener */
865                         ETHER_BPF_MTAP(ifp, m);
866                         /* Set the watchdog */
867                         txr->watchdog_timer = IGB_TX_TIMEOUT;
868                 }
869
870         } else if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
871                 return (err);
872
873 process:
874         if (drbr_empty(ifp, txr->br))
875                 return (err);
876
877         /* Process the queue */
878         while (TRUE) {
879                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
880                         break;
881                 next = drbr_dequeue(ifp, txr->br);
882                 if (next == NULL)
883                         break;
884                 if ((err = igb_xmit(txr, &next)) != 0) {
885                         if (next != NULL)
886                                 err = drbr_enqueue(ifp, txr->br, next);
887                         break;
888                 }
889                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
890                 ETHER_BPF_MTAP(ifp, next);
891                 /* Set the watchdog */
892                 txr->watchdog_timer = IGB_TX_TIMEOUT;
893         }
894                 
895         if (txr->tx_avail <= IGB_TX_OP_THRESHOLD)
896                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
897
898         return (err);
899 }
900
901 /*
902 ** Flush all ring buffers
903 */
904 static void
905 igb_qflush(struct ifnet *ifp)
906 {
907         struct adapter  *adapter = ifp->if_softc;
908         struct tx_ring  *txr = adapter->tx_rings;
909         struct mbuf     *m;
910
911         for (int i = 0; i < adapter->num_queues; i++, txr++) {
912                 IGB_TX_LOCK(txr);
913                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
914                         m_freem(m);
915                 IGB_TX_UNLOCK(txr);
916         }
917         if_qflush(ifp);
918 }
919 #endif /* __FreeBSD_version >= 800000 */
920
921 /*********************************************************************
922  *  Ioctl entry point
923  *
924  *  igb_ioctl is called when the user wants to configure the
925  *  interface.
926  *
927  *  return 0 on success, positive on failure
928  **********************************************************************/
929
930 static int
931 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
932 {
933         struct adapter  *adapter = ifp->if_softc;
934         struct ifreq *ifr = (struct ifreq *)data;
935 #ifdef INET
936         struct ifaddr *ifa = (struct ifaddr *)data;
937 #endif
938         int error = 0;
939
940         if (adapter->in_detach)
941                 return (error);
942
943         switch (command) {
944         case SIOCSIFADDR:
945 #ifdef INET
946                 if (ifa->ifa_addr->sa_family == AF_INET) {
947                         /*
948                          * XXX
949                          * Since resetting hardware takes a very long time
950                          * and results in link renegotiation we only
951                          * initialize the hardware only when it is absolutely
952                          * required.
953                          */
954                         ifp->if_flags |= IFF_UP;
955                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
956                                 IGB_CORE_LOCK(adapter);
957                                 igb_init_locked(adapter);
958                                 IGB_CORE_UNLOCK(adapter);
959                         }
960                         if (!(ifp->if_flags & IFF_NOARP))
961                                 arp_ifinit(ifp, ifa);
962                 } else
963 #endif
964                         error = ether_ioctl(ifp, command, data);
965                 break;
966         case SIOCSIFMTU:
967             {
968                 int max_frame_size;
969
970                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
971
972                 IGB_CORE_LOCK(adapter);
973                 max_frame_size = 9234;
974                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
975                     ETHER_CRC_LEN) {
976                         IGB_CORE_UNLOCK(adapter);
977                         error = EINVAL;
978                         break;
979                 }
980
981                 ifp->if_mtu = ifr->ifr_mtu;
982                 adapter->max_frame_size =
983                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
984                 igb_init_locked(adapter);
985                 IGB_CORE_UNLOCK(adapter);
986                 break;
987             }
988         case SIOCSIFFLAGS:
989                 IOCTL_DEBUGOUT("ioctl rcv'd:\
990                     SIOCSIFFLAGS (Set Interface Flags)");
991                 IGB_CORE_LOCK(adapter);
992                 if (ifp->if_flags & IFF_UP) {
993                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
994                                 if ((ifp->if_flags ^ adapter->if_flags) &
995                                     (IFF_PROMISC | IFF_ALLMULTI)) {
996                                         igb_disable_promisc(adapter);
997                                         igb_set_promisc(adapter);
998                                 }
999                         } else
1000                                 igb_init_locked(adapter);
1001                 } else
1002                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1003                                 igb_stop(adapter);
1004                 adapter->if_flags = ifp->if_flags;
1005                 IGB_CORE_UNLOCK(adapter);
1006                 break;
1007         case SIOCADDMULTI:
1008         case SIOCDELMULTI:
1009                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1010                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1011                         IGB_CORE_LOCK(adapter);
1012                         igb_disable_intr(adapter);
1013                         igb_set_multi(adapter);
1014                                 igb_enable_intr(adapter);
1015                         IGB_CORE_UNLOCK(adapter);
1016                 }
1017                 break;
1018         case SIOCSIFMEDIA:
1019                 /* Check SOL/IDER usage */
1020                 IGB_CORE_LOCK(adapter);
1021                 if (e1000_check_reset_block(&adapter->hw)) {
1022                         IGB_CORE_UNLOCK(adapter);
1023                         device_printf(adapter->dev, "Media change is"
1024                             " blocked due to SOL/IDER session.\n");
1025                         break;
1026                 }
1027                 IGB_CORE_UNLOCK(adapter);
1028         case SIOCGIFMEDIA:
1029                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1030                     SIOCxIFMEDIA (Get/Set Interface Media)");
1031                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1032                 break;
1033         case SIOCSIFCAP:
1034             {
1035                 int mask, reinit;
1036
1037                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1038                 reinit = 0;
1039                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1040                 if (mask & IFCAP_HWCSUM) {
1041                         ifp->if_capenable ^= IFCAP_HWCSUM;
1042                         reinit = 1;
1043                 }
1044                 if (mask & IFCAP_TSO4) {
1045                         ifp->if_capenable ^= IFCAP_TSO4;
1046                         reinit = 1;
1047                 }
1048                 if (mask & IFCAP_VLAN_HWTAGGING) {
1049                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1050                         reinit = 1;
1051                 }
1052                 if (mask & IFCAP_LRO) {
1053                         ifp->if_capenable ^= IFCAP_LRO;
1054                         reinit = 1;
1055                 }
1056                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1057                         igb_init(adapter);
1058                 VLAN_CAPABILITIES(ifp);
1059                 break;
1060             }
1061
1062 #ifdef IGB_IEEE1588
1063         /*
1064         ** IOCTL support for Precision Time (IEEE 1588) Support
1065         */
1066         case SIOCSHWTSTAMP:
1067                 error = igb_hwtstamp_ioctl(adapter, ifp);
1068                 break;
1069 #endif
1070
1071         default:
1072                 error = ether_ioctl(ifp, command, data);
1073                 break;
1074         }
1075
1076         return (error);
1077 }
1078
1079 /*********************************************************************
1080  *  Watchdog timer:
1081  *
1082  *  This routine is called from the local timer every second.
1083  *  As long as transmit descriptors are being cleaned the value
1084  *  is non-zero and we do nothing. Reaching 0 indicates a tx hang
1085  *  and we then reset the device.
1086  *
1087  **********************************************************************/
1088
1089 static void
1090 igb_watchdog(struct adapter *adapter)
1091 {
1092         struct tx_ring  *txr = adapter->tx_rings;
1093         bool            tx_hang = FALSE;
1094
1095         IGB_CORE_LOCK_ASSERT(adapter);
1096
1097         /*
1098         ** The timer is set to 5 every time start() queues a packet.
1099         ** Then txeof keeps resetting it as long as it cleans at
1100         ** least one descriptor.
1101         ** Finally, anytime all descriptors are clean the timer is
1102         ** set to 0.
1103         **
1104         ** With TX Multiqueue we need to check every queue's timer,
1105         ** if any time out we do the reset.
1106         */
1107         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1108                 IGB_TX_LOCK(txr);
1109                 if (txr->watchdog_timer == 0 ||
1110                     (--txr->watchdog_timer)) {
1111                         IGB_TX_UNLOCK(txr);
1112                         continue;
1113                 } else {
1114                         tx_hang = TRUE;
1115                         IGB_TX_UNLOCK(txr);
1116                         break;
1117                 }
1118         }
1119         if (tx_hang == FALSE)
1120                 return;
1121
1122         /* If we are in this routine because of pause frames, then
1123          * don't reset the hardware.
1124          */
1125         if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
1126             E1000_STATUS_TXOFF) {
1127                 txr = adapter->tx_rings; /* reset pointer */
1128                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1129                         IGB_TX_LOCK(txr);
1130                         txr->watchdog_timer = IGB_TX_TIMEOUT;
1131                         IGB_TX_UNLOCK(txr);
1132                 }
1133                 return;
1134         }
1135
1136         if (e1000_check_for_link(&adapter->hw) == 0)
1137                 device_printf(adapter->dev, "watchdog timeout -- resetting\n");
1138
1139         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1140                 device_printf(adapter->dev, "Queue(%d) tdh = %d, tdt = %d\n",
1141                     i, E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
1142                     E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
1143                 device_printf(adapter->dev, "Queue(%d) desc avail = %d,"
1144                     " Next Desc to Clean = %d\n", i, txr->tx_avail,
1145                     txr->next_to_clean);
1146         }
1147
1148         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1149         adapter->watchdog_events++;
1150
1151         igb_init_locked(adapter);
1152 }
1153
1154 /*********************************************************************
1155  *  Init entry point
1156  *
1157  *  This routine is used in two ways. It is used by the stack as
1158  *  init entry point in network interface structure. It is also used
1159  *  by the driver as a hw/sw initialization routine to get to a
1160  *  consistent state.
1161  *
1162  *  return 0 on success, positive on failure
1163  **********************************************************************/
1164
1165 static void
1166 igb_init_locked(struct adapter *adapter)
1167 {
1168         struct rx_ring *rxr = adapter->rx_rings;
1169         struct tx_ring *txr = adapter->tx_rings;
1170         struct ifnet    *ifp = adapter->ifp;
1171         device_t        dev = adapter->dev;
1172         u32             pba = 0;
1173
1174         INIT_DEBUGOUT("igb_init: begin");
1175
1176         IGB_CORE_LOCK_ASSERT(adapter);
1177
1178         igb_stop(adapter);
1179
1180         /*
1181          * Packet Buffer Allocation (PBA)
1182          * Writing PBA sets the receive portion of the buffer
1183          * the remainder is used for the transmit buffer.
1184          */
1185         if (adapter->hw.mac.type == e1000_82575) {
1186                 INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
1187                 pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1188                 E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1189         }
1190         
1191         /* Get the latest mac address, User can use a LAA */
1192         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1193               ETHER_ADDR_LEN);
1194
1195         /* Put the address into the Receive Address Array */
1196         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1197
1198         /* Initialize the hardware */
1199         if (igb_hardware_init(adapter)) {
1200                 device_printf(dev, "Unable to initialize the hardware\n");
1201                 return;
1202         }
1203         igb_update_link_status(adapter);
1204
1205         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1206
1207         /* Set hardware offload abilities */
1208         ifp->if_hwassist = 0;
1209         if (ifp->if_capenable & IFCAP_TXCSUM) {
1210                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1211 #if __FreeBSD_version >= 800000
1212                 if (adapter->hw.mac.type == e1000_82576)
1213                         ifp->if_hwassist |= CSUM_SCTP;
1214 #endif
1215         }
1216
1217         if (ifp->if_capenable & IFCAP_TSO4)
1218                 ifp->if_hwassist |= CSUM_TSO;
1219
1220         /* Configure for OS presence */
1221         igb_init_manageability(adapter);
1222
1223         /* Prepare transmit descriptors and buffers */
1224         igb_setup_transmit_structures(adapter);
1225         igb_initialize_transmit_units(adapter);
1226
1227         /* Setup Multicast table */
1228         igb_set_multi(adapter);
1229
1230         /*
1231         ** Figure out the desired mbuf pool
1232         ** for doing jumbo/packetsplit
1233         */
1234         if (ifp->if_mtu > ETHERMTU)
1235                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1236         else
1237                 adapter->rx_mbuf_sz = MCLBYTES;
1238
1239         /* Prepare receive descriptors and buffers */
1240         if (igb_setup_receive_structures(adapter)) {
1241                 device_printf(dev, "Could not setup receive structures\n");
1242                 igb_stop(adapter);
1243                 return;
1244         }
1245         igb_initialize_receive_units(adapter);
1246
1247         /* Don't lose promiscuous settings */
1248         igb_set_promisc(adapter);
1249
1250         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1251         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1252
1253         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1254         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1255
1256         if (adapter->msix > 1) /* Set up queue routing */
1257                 igb_configure_queues(adapter);
1258
1259         /* Set up VLAN tag offload and filter */
1260         igb_setup_vlan_hw_support(adapter);
1261
1262         /* Set default RX interrupt moderation */
1263         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1264                 E1000_WRITE_REG(&adapter->hw,
1265                     E1000_EITR(rxr->msix), igb_ave_latency);
1266                 rxr->eitr_setting = igb_ave_latency;
1267         }
1268
1269         /* Set TX interrupt rate & reset TX watchdog */
1270         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1271                 E1000_WRITE_REG(&adapter->hw, 
1272                     E1000_EITR(txr->msix), igb_ave_latency);
1273                 txr->watchdog_timer = FALSE;
1274         }
1275
1276         {
1277                 /* this clears any pending interrupts */
1278                 E1000_READ_REG(&adapter->hw, E1000_ICR);
1279                 igb_enable_intr(adapter);
1280                 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1281         }
1282
1283         /* Don't reset the phy next time init gets called */
1284         adapter->hw.phy.reset_disable = TRUE;
1285 }
1286
1287 static void
1288 igb_init(void *arg)
1289 {
1290         struct adapter *adapter = arg;
1291
1292         IGB_CORE_LOCK(adapter);
1293         igb_init_locked(adapter);
1294         IGB_CORE_UNLOCK(adapter);
1295 }
1296
1297
1298 static void
1299 igb_handle_rxtx(void *context, int pending)
1300 {
1301         struct adapter  *adapter = context;
1302         struct tx_ring  *txr = adapter->tx_rings;
1303         struct rx_ring  *rxr = adapter->rx_rings;
1304         struct ifnet    *ifp;
1305
1306         ifp = adapter->ifp;
1307
1308         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1309                 if (igb_rxeof(rxr, adapter->rx_process_limit))
1310                         taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1311                 IGB_TX_LOCK(txr);
1312                 igb_txeof(txr);
1313
1314 #if __FreeBSD_version >= 800000
1315                 if (!drbr_empty(ifp, txr->br))
1316                         igb_mq_start_locked(ifp, txr, NULL);
1317 #else
1318                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1319                         igb_start_locked(txr, ifp);
1320 #endif
1321                 IGB_TX_UNLOCK(txr);
1322         }
1323
1324         igb_enable_intr(adapter);
1325 }
1326
1327 static void
1328 igb_handle_rx(void *context, int pending)
1329 {
1330         struct rx_ring  *rxr = context;
1331         struct adapter  *adapter = rxr->adapter;
1332         struct ifnet    *ifp = adapter->ifp;
1333             
1334         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1335                 if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1336                         /* More to clean, schedule another task */
1337                         taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1338                 
1339 }
1340
1341 static void
1342 igb_handle_tx(void *context, int pending)
1343 {
1344         struct tx_ring  *txr = context;
1345         struct adapter  *adapter = txr->adapter;
1346         struct ifnet    *ifp = adapter->ifp;
1347
1348         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1349                 IGB_TX_LOCK(txr);
1350                 igb_txeof(txr);
1351 #if __FreeBSD_version >= 800000
1352                 if (!drbr_empty(ifp, txr->br))
1353                         igb_mq_start_locked(ifp, txr, NULL);
1354 #else
1355                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1356                         igb_start_locked(txr, ifp);
1357 #endif
1358                 IGB_TX_UNLOCK(txr);
1359         }
1360 }
1361
1362
1363 /*********************************************************************
1364  *
1365  *  MSI/Legacy Deferred
1366  *  Interrupt Service routine  
1367  *
1368  *********************************************************************/
1369 static int
1370 igb_irq_fast(void *arg)
1371 {
1372         struct adapter  *adapter = arg;
1373         uint32_t        reg_icr;
1374
1375
1376         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1377
1378         /* Hot eject?  */
1379         if (reg_icr == 0xffffffff)
1380                 return FILTER_STRAY;
1381
1382         /* Definitely not our interrupt.  */
1383         if (reg_icr == 0x0)
1384                 return FILTER_STRAY;
1385
1386         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1387                 return FILTER_STRAY;
1388
1389         /*
1390          * Mask interrupts until the taskqueue is finished running.  This is
1391          * cheap, just assume that it is needed.  This also works around the
1392          * MSI message reordering errata on certain systems.
1393          */
1394         igb_disable_intr(adapter);
1395         taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1396
1397         /* Link status change */
1398         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1399                 adapter->hw.mac.get_link_status = 1;
1400                 igb_update_link_status(adapter);
1401         }
1402
1403         if (reg_icr & E1000_ICR_RXO)
1404                 adapter->rx_overruns++;
1405         return FILTER_HANDLED;
1406 }
1407
1408
1409 /*********************************************************************
1410  *
1411  *  MSIX TX Interrupt Service routine
1412  *
1413  **********************************************************************/
1414 static void
1415 igb_msix_tx(void *arg)
1416 {
1417         struct tx_ring *txr = arg;
1418         struct adapter *adapter = txr->adapter;
1419         u32             loop = IGB_MAX_LOOP;
1420         bool            more;
1421
1422         ++txr->tx_irq;
1423         IGB_TX_LOCK(txr);
1424
1425         do {
1426                 more = igb_txeof(txr);
1427         } while (loop-- && more);
1428
1429         IGB_TX_UNLOCK(txr);
1430
1431         /* Schedule a clean task */
1432         taskqueue_enqueue(adapter->tq, &txr->tx_task);
1433
1434         /* Reenable this interrupt */
1435         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, txr->eims);
1436         return;
1437 }
1438
1439 /*********************************************************************
1440  *
1441  *  MSIX RX Interrupt Service routine
1442  *
1443  **********************************************************************/
1444
1445 static void
1446 igb_msix_rx(void *arg)
1447 {
1448         struct rx_ring *rxr = arg;
1449         struct adapter *adapter = rxr->adapter;
1450         u32             loop = IGB_MAX_LOOP;
1451         bool            more;
1452
1453         ++rxr->rx_irq;
1454         do {
1455                 more = igb_rxeof(rxr, adapter->rx_process_limit);
1456         } while (loop-- && more);
1457
1458         /* Update interrupt rate */
1459         if (igb_enable_aim == TRUE)
1460                 igb_update_aim(rxr);
1461
1462         /* Schedule another clean */
1463         taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1464
1465         /* Reenable this interrupt */
1466         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, rxr->eims);
1467         return;
1468 }
1469
1470
1471 /*********************************************************************
1472  *
1473  *  MSIX Link Interrupt Service routine
1474  *
1475  **********************************************************************/
1476
1477 static void
1478 igb_msix_link(void *arg)
1479 {
1480         struct adapter  *adapter = arg;
1481         u32             icr;
1482
1483         ++adapter->link_irq;
1484         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1485         if (!(icr & E1000_ICR_LSC))
1486                 goto spurious;
1487         adapter->hw.mac.get_link_status = 1;
1488         igb_update_link_status(adapter);
1489
1490 spurious:
1491         /* Rearm */
1492         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1493         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1494         return;
1495 }
1496
1497
1498 /*
1499 ** Routine to adjust the RX EITR value based on traffic,
1500 ** its a simple three state model, but seems to help.
1501 **
1502 ** Note that the three EITR values are tuneable using
1503 ** sysctl in real time. The feature can be effectively
1504 ** nullified by setting them equal.
1505 */
1506 #define BULK_THRESHOLD  10000
1507 #define AVE_THRESHOLD   1600 
1508
1509 static void
1510 igb_update_aim(struct rx_ring *rxr)
1511 {
1512         struct adapter  *adapter = rxr->adapter;
1513         u32             olditr, newitr;
1514
1515         /* Update interrupt moderation based on traffic */
1516         olditr = rxr->eitr_setting;
1517         newitr = olditr;
1518
1519         /* Idle, don't change setting */
1520         if (rxr->bytes == 0)
1521                 return;
1522
1523         if (olditr == igb_low_latency) {
1524                 if (rxr->bytes > AVE_THRESHOLD)
1525                         newitr = igb_ave_latency;
1526         } else if (olditr == igb_ave_latency) {
1527                 if (rxr->bytes < AVE_THRESHOLD) 
1528                         newitr = igb_low_latency;
1529                 else if (rxr->bytes > BULK_THRESHOLD)
1530                         newitr = igb_bulk_latency;
1531         } else if (olditr == igb_bulk_latency) {
1532                 if (rxr->bytes < BULK_THRESHOLD)
1533                         newitr = igb_ave_latency;
1534         }
1535
1536         if (olditr != newitr) {
1537                 /* Change interrupt rate */
1538                 rxr->eitr_setting = newitr;
1539                 if (adapter->hw.mac.type == e1000_82575)
1540                         newitr |= newitr << 16;
1541                 else
1542                         newitr |= 0x8000000;
1543                 E1000_WRITE_REG(&adapter->hw, E1000_EITR(rxr->me), newitr);
1544         }
1545
1546         rxr->bytes = 0;
1547         return;
1548 }
1549
1550
1551 /*********************************************************************
1552  *
1553  *  Media Ioctl callback
1554  *
1555  *  This routine is called whenever the user queries the status of
1556  *  the interface using ifconfig.
1557  *
1558  **********************************************************************/
1559 static void
1560 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1561 {
1562         struct adapter *adapter = ifp->if_softc;
1563         u_char fiber_type = IFM_1000_SX;
1564
1565         INIT_DEBUGOUT("igb_media_status: begin");
1566
1567         IGB_CORE_LOCK(adapter);
1568         igb_update_link_status(adapter);
1569
1570         ifmr->ifm_status = IFM_AVALID;
1571         ifmr->ifm_active = IFM_ETHER;
1572
1573         if (!adapter->link_active) {
1574                 IGB_CORE_UNLOCK(adapter);
1575                 return;
1576         }
1577
1578         ifmr->ifm_status |= IFM_ACTIVE;
1579
1580         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1581             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1582                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1583         else {
1584                 switch (adapter->link_speed) {
1585                 case 10:
1586                         ifmr->ifm_active |= IFM_10_T;
1587                         break;
1588                 case 100:
1589                         ifmr->ifm_active |= IFM_100_TX;
1590                         break;
1591                 case 1000:
1592                         ifmr->ifm_active |= IFM_1000_T;
1593                         break;
1594                 }
1595                 if (adapter->link_duplex == FULL_DUPLEX)
1596                         ifmr->ifm_active |= IFM_FDX;
1597                 else
1598                         ifmr->ifm_active |= IFM_HDX;
1599         }
1600         IGB_CORE_UNLOCK(adapter);
1601 }
1602
1603 /*********************************************************************
1604  *
1605  *  Media Ioctl callback
1606  *
1607  *  This routine is called when the user changes speed/duplex using
1608  *  media/mediopt option with ifconfig.
1609  *
1610  **********************************************************************/
1611 static int
1612 igb_media_change(struct ifnet *ifp)
1613 {
1614         struct adapter *adapter = ifp->if_softc;
1615         struct ifmedia  *ifm = &adapter->media;
1616
1617         INIT_DEBUGOUT("igb_media_change: begin");
1618
1619         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1620                 return (EINVAL);
1621
1622         IGB_CORE_LOCK(adapter);
1623         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1624         case IFM_AUTO:
1625                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1626                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1627                 break;
1628         case IFM_1000_LX:
1629         case IFM_1000_SX:
1630         case IFM_1000_T:
1631                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1632                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1633                 break;
1634         case IFM_100_TX:
1635                 adapter->hw.mac.autoneg = FALSE;
1636                 adapter->hw.phy.autoneg_advertised = 0;
1637                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1638                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1639                 else
1640                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1641                 break;
1642         case IFM_10_T:
1643                 adapter->hw.mac.autoneg = FALSE;
1644                 adapter->hw.phy.autoneg_advertised = 0;
1645                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1646                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1647                 else
1648                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1649                 break;
1650         default:
1651                 device_printf(adapter->dev, "Unsupported media type\n");
1652         }
1653
1654         /* As the speed/duplex settings my have changed we need to
1655          * reset the PHY.
1656          */
1657         adapter->hw.phy.reset_disable = FALSE;
1658
1659         igb_init_locked(adapter);
1660         IGB_CORE_UNLOCK(adapter);
1661
1662         return (0);
1663 }
1664
1665
1666 /*********************************************************************
1667  *
1668  *  This routine maps the mbufs to Advanced TX descriptors.
1669  *  used by the 82575 adapter.
1670  *  
1671  **********************************************************************/
1672
1673 static int
1674 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1675 {
1676         struct adapter          *adapter = txr->adapter;
1677         bus_dma_segment_t       segs[IGB_MAX_SCATTER];
1678         bus_dmamap_t            map;
1679         struct igb_tx_buffer    *tx_buffer, *tx_buffer_mapped;
1680         union e1000_adv_tx_desc *txd = NULL;
1681         struct mbuf             *m_head;
1682         u32                     olinfo_status = 0, cmd_type_len = 0;
1683         int                     nsegs, i, j, error, first, last = 0;
1684         u32                     hdrlen = 0;
1685
1686         m_head = *m_headp;
1687
1688
1689         /* Set basic descriptor constants */
1690         cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1691         cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1692         if (m_head->m_flags & M_VLANTAG)
1693                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1694
1695         /*
1696          * Force a cleanup if number of TX descriptors
1697          * available hits the threshold
1698          */
1699         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1700                 igb_txeof(txr);
1701                 /* Now do we at least have a minimal? */
1702                 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1703                         txr->no_desc_avail++;
1704                         return (ENOBUFS);
1705                 }
1706         }
1707
1708         /*
1709          * Map the packet for DMA.
1710          *
1711          * Capture the first descriptor index,
1712          * this descriptor will have the index
1713          * of the EOP which is the only one that
1714          * now gets a DONE bit writeback.
1715          */
1716         first = txr->next_avail_desc;
1717         tx_buffer = &txr->tx_buffers[first];
1718         tx_buffer_mapped = tx_buffer;
1719         map = tx_buffer->map;
1720
1721         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1722             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1723
1724         if (error == EFBIG) {
1725                 struct mbuf *m;
1726
1727                 m = m_defrag(*m_headp, M_DONTWAIT);
1728                 if (m == NULL) {
1729                         adapter->mbuf_defrag_failed++;
1730                         m_freem(*m_headp);
1731                         *m_headp = NULL;
1732                         return (ENOBUFS);
1733                 }
1734                 *m_headp = m;
1735
1736                 /* Try it again */
1737                 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1738                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1739
1740                 if (error == ENOMEM) {
1741                         adapter->no_tx_dma_setup++;
1742                         return (error);
1743                 } else if (error != 0) {
1744                         adapter->no_tx_dma_setup++;
1745                         m_freem(*m_headp);
1746                         *m_headp = NULL;
1747                         return (error);
1748                 }
1749         } else if (error == ENOMEM) {
1750                 adapter->no_tx_dma_setup++;
1751                 return (error);
1752         } else if (error != 0) {
1753                 adapter->no_tx_dma_setup++;
1754                 m_freem(*m_headp);
1755                 *m_headp = NULL;
1756                 return (error);
1757         }
1758
1759         /* Check again to be sure we have enough descriptors */
1760         if (nsegs > (txr->tx_avail - 2)) {
1761                 txr->no_desc_avail++;
1762                 bus_dmamap_unload(txr->txtag, map);
1763                 return (ENOBUFS);
1764         }
1765         m_head = *m_headp;
1766
1767         /*
1768          * Set up the context descriptor:
1769          * used when any hardware offload is done.
1770          * This includes CSUM, VLAN, and TSO. It
1771          * will use the first descriptor.
1772          */
1773         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1774                 if (igb_tso_setup(txr, m_head, &hdrlen)) {
1775                         cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1776                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1777                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1778                 } else
1779                         return (ENXIO); 
1780         } else if (igb_tx_ctx_setup(txr, m_head))
1781                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1782
1783 #ifdef IGB_IEEE1588
1784         /* This is changing soon to an mtag detection */
1785         if (we detect this mbuf has a TSTAMP mtag)
1786                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
1787 #endif
1788         /* Calculate payload length */
1789         olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1790             << E1000_ADVTXD_PAYLEN_SHIFT);
1791
1792         /* Set up our transmit descriptors */
1793         i = txr->next_avail_desc;
1794         for (j = 0; j < nsegs; j++) {
1795                 bus_size_t seg_len;
1796                 bus_addr_t seg_addr;
1797
1798                 tx_buffer = &txr->tx_buffers[i];
1799                 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1800                 seg_addr = segs[j].ds_addr;
1801                 seg_len  = segs[j].ds_len;
1802
1803                 txd->read.buffer_addr = htole64(seg_addr);
1804                 txd->read.cmd_type_len = htole32(
1805                     adapter->txd_cmd | cmd_type_len | seg_len);
1806                 txd->read.olinfo_status = htole32(olinfo_status);
1807                 last = i;
1808                 if (++i == adapter->num_tx_desc)
1809                         i = 0;
1810                 tx_buffer->m_head = NULL;
1811                 tx_buffer->next_eop = -1;
1812         }
1813
1814         txr->next_avail_desc = i;
1815         txr->tx_avail -= nsegs;
1816
1817         tx_buffer->m_head = m_head;
1818         tx_buffer_mapped->map = tx_buffer->map;
1819         tx_buffer->map = map;
1820         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1821
1822         /*
1823          * Last Descriptor of Packet
1824          * needs End Of Packet (EOP)
1825          * and Report Status (RS)
1826          */
1827         txd->read.cmd_type_len |=
1828             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1829         /*
1830          * Keep track in the first buffer which
1831          * descriptor will be written back
1832          */
1833         tx_buffer = &txr->tx_buffers[first];
1834         tx_buffer->next_eop = last;
1835
1836         /*
1837          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1838          * that this frame is available to transmit.
1839          */
1840         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1841             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1842         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1843         ++txr->tx_packets;
1844
1845         return (0);
1846
1847 }
1848
1849 static void
1850 igb_set_promisc(struct adapter *adapter)
1851 {
1852         struct ifnet    *ifp = adapter->ifp;
1853         uint32_t        reg_rctl;
1854
1855         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1856
1857         if (ifp->if_flags & IFF_PROMISC) {
1858                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1859                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1860         } else if (ifp->if_flags & IFF_ALLMULTI) {
1861                 reg_rctl |= E1000_RCTL_MPE;
1862                 reg_rctl &= ~E1000_RCTL_UPE;
1863                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1864         }
1865 }
1866
1867 static void
1868 igb_disable_promisc(struct adapter *adapter)
1869 {
1870         uint32_t        reg_rctl;
1871
1872         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1873
1874         reg_rctl &=  (~E1000_RCTL_UPE);
1875         reg_rctl &=  (~E1000_RCTL_MPE);
1876         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1877 }
1878
1879
1880 /*********************************************************************
1881  *  Multicast Update
1882  *
1883  *  This routine is called whenever multicast address list is updated.
1884  *
1885  **********************************************************************/
1886
1887 static void
1888 igb_set_multi(struct adapter *adapter)
1889 {
1890         struct ifnet    *ifp = adapter->ifp;
1891         struct ifmultiaddr *ifma;
1892         u32 reg_rctl = 0;
1893         u8  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1894
1895         int mcnt = 0;
1896
1897         IOCTL_DEBUGOUT("igb_set_multi: begin");
1898
1899         if_maddr_rlock(ifp);
1900         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1901                 if (ifma->ifma_addr->sa_family != AF_LINK)
1902                         continue;
1903
1904                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1905                         break;
1906
1907                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1908                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1909                 mcnt++;
1910         }
1911         if_maddr_runlock(ifp);
1912
1913         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1914                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1915                 reg_rctl |= E1000_RCTL_MPE;
1916                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1917         } else
1918                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1919 }
1920
1921
1922 /*********************************************************************
1923  *  Timer routine
1924  *
1925  *  This routine checks for link status and updates statistics.
1926  *
1927  **********************************************************************/
1928
1929 static void
1930 igb_local_timer(void *arg)
1931 {
1932         struct adapter  *adapter = arg;
1933         struct ifnet    *ifp = adapter->ifp;
1934
1935         IGB_CORE_LOCK_ASSERT(adapter);
1936
1937         igb_update_link_status(adapter);
1938         igb_update_stats_counters(adapter);
1939
1940         if (igb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1941                 igb_print_hw_stats(adapter);
1942
1943         /*
1944          * Each second we check the watchdog to 
1945          * protect against hardware hangs.
1946          */
1947         igb_watchdog(adapter);
1948
1949         /* Trigger an RX interrupt on all queues */
1950         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->rx_mask);
1951  
1952         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1953
1954 }
1955
1956 static void
1957 igb_update_link_status(struct adapter *adapter)
1958 {
1959         struct e1000_hw *hw = &adapter->hw;
1960         struct ifnet *ifp = adapter->ifp;
1961         device_t dev = adapter->dev;
1962         struct tx_ring *txr = adapter->tx_rings;
1963         u32 link_check = 0;
1964
1965         /* Get the cached link value or read for real */
1966         switch (hw->phy.media_type) {
1967         case e1000_media_type_copper:
1968                 if (hw->mac.get_link_status) {
1969                         /* Do the work to read phy */
1970                         e1000_check_for_link(hw);
1971                         link_check = !hw->mac.get_link_status;
1972                 } else
1973                         link_check = TRUE;
1974                 break;
1975         case e1000_media_type_fiber:
1976                 e1000_check_for_link(hw);
1977                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1978                                  E1000_STATUS_LU);
1979                 break;
1980         case e1000_media_type_internal_serdes:
1981                 e1000_check_for_link(hw);
1982                 link_check = adapter->hw.mac.serdes_has_link;
1983                 break;
1984         default:
1985         case e1000_media_type_unknown:
1986                 break;
1987         }
1988
1989         /* Now we check if a transition has happened */
1990         if (link_check && (adapter->link_active == 0)) {
1991                 e1000_get_speed_and_duplex(&adapter->hw, 
1992                     &adapter->link_speed, &adapter->link_duplex);
1993                 if (bootverbose)
1994                         device_printf(dev, "Link is up %d Mbps %s\n",
1995                             adapter->link_speed,
1996                             ((adapter->link_duplex == FULL_DUPLEX) ?
1997                             "Full Duplex" : "Half Duplex"));
1998                 adapter->link_active = 1;
1999                 ifp->if_baudrate = adapter->link_speed * 1000000;
2000                 if_link_state_change(ifp, LINK_STATE_UP);
2001         } else if (!link_check && (adapter->link_active == 1)) {
2002                 ifp->if_baudrate = adapter->link_speed = 0;
2003                 adapter->link_duplex = 0;
2004                 if (bootverbose)
2005                         device_printf(dev, "Link is Down\n");
2006                 adapter->link_active = 0;
2007                 if_link_state_change(ifp, LINK_STATE_DOWN);
2008                 /* Turn off watchdogs */
2009                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2010                         txr->watchdog_timer = FALSE;
2011         }
2012 }
2013
2014 /*********************************************************************
2015  *
2016  *  This routine disables all traffic on the adapter by issuing a
2017  *  global reset on the MAC and deallocates TX/RX buffers.
2018  *
2019  **********************************************************************/
2020
2021 static void
2022 igb_stop(void *arg)
2023 {
2024         struct adapter  *adapter = arg;
2025         struct ifnet    *ifp = adapter->ifp;
2026
2027         IGB_CORE_LOCK_ASSERT(adapter);
2028
2029         INIT_DEBUGOUT("igb_stop: begin");
2030
2031         igb_disable_intr(adapter);
2032
2033         callout_stop(&adapter->timer);
2034
2035         /* Tell the stack that the interface is no longer active */
2036         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2037
2038         e1000_reset_hw(&adapter->hw);
2039         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2040 }
2041
2042
2043 /*********************************************************************
2044  *
2045  *  Determine hardware revision.
2046  *
2047  **********************************************************************/
2048 static void
2049 igb_identify_hardware(struct adapter *adapter)
2050 {
2051         device_t dev = adapter->dev;
2052
2053         /* Make sure our PCI config space has the necessary stuff set */
2054         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2055         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2056             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2057                 device_printf(dev, "Memory Access and/or Bus Master bits "
2058                     "were not set!\n");
2059                 adapter->hw.bus.pci_cmd_word |=
2060                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2061                 pci_write_config(dev, PCIR_COMMAND,
2062                     adapter->hw.bus.pci_cmd_word, 2);
2063         }
2064
2065         /* Save off the information about this board */
2066         adapter->hw.vendor_id = pci_get_vendor(dev);
2067         adapter->hw.device_id = pci_get_device(dev);
2068         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2069         adapter->hw.subsystem_vendor_id =
2070             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2071         adapter->hw.subsystem_device_id =
2072             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2073
2074         /* Do Shared Code Init and Setup */
2075         if (e1000_set_mac_type(&adapter->hw)) {
2076                 device_printf(dev, "Setup init failure\n");
2077                 return;
2078         }
2079 }
2080
2081 static int
2082 igb_allocate_pci_resources(struct adapter *adapter)
2083 {
2084         device_t        dev = adapter->dev;
2085         int             rid;
2086
2087         rid = PCIR_BAR(0);
2088         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2089             &rid, RF_ACTIVE);
2090         if (adapter->pci_mem == NULL) {
2091                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2092                 return (ENXIO);
2093         }
2094         adapter->osdep.mem_bus_space_tag =
2095             rman_get_bustag(adapter->pci_mem);
2096         adapter->osdep.mem_bus_space_handle =
2097             rman_get_bushandle(adapter->pci_mem);
2098         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2099
2100         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2101
2102         /* This will setup either MSI/X or MSI */
2103         adapter->msix = igb_setup_msix(adapter);
2104         adapter->hw.back = &adapter->osdep;
2105
2106         return (0);
2107 }
2108
2109 /*********************************************************************
2110  *
2111  *  Setup the Legacy or MSI Interrupt handler
2112  *
2113  **********************************************************************/
2114 static int
2115 igb_allocate_legacy(struct adapter *adapter)
2116 {
2117         device_t dev = adapter->dev;
2118         int error, rid = 0;
2119
2120         /* Turn off all interrupts */
2121         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2122
2123         /* MSI RID is 1 */
2124         if (adapter->msix == 1)
2125                 rid = 1;
2126
2127         /* We allocate a single interrupt resource */
2128         adapter->res = bus_alloc_resource_any(dev,
2129             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2130         if (adapter->res == NULL) {
2131                 device_printf(dev, "Unable to allocate bus resource: "
2132                     "interrupt\n");
2133                 return (ENXIO);
2134         }
2135
2136         /*
2137          * Try allocating a fast interrupt and the associated deferred
2138          * processing contexts.
2139          */
2140         TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, adapter);
2141         adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2142             taskqueue_thread_enqueue, &adapter->tq);
2143         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2144             device_get_nameunit(adapter->dev));
2145         if ((error = bus_setup_intr(dev, adapter->res,
2146             INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2147             adapter, &adapter->tag)) != 0) {
2148                 device_printf(dev, "Failed to register fast interrupt "
2149                             "handler: %d\n", error);
2150                 taskqueue_free(adapter->tq);
2151                 adapter->tq = NULL;
2152                 return (error);
2153         }
2154
2155         return (0);
2156 }
2157
2158
2159 /*********************************************************************
2160  *
2161  *  Setup the MSIX Interrupt handlers: 
2162  *
2163  **********************************************************************/
2164 static int
2165 igb_allocate_msix(struct adapter *adapter)
2166 {
2167         device_t dev = adapter->dev;
2168         struct tx_ring *txr = adapter->tx_rings;
2169         struct rx_ring *rxr = adapter->rx_rings;
2170         int error, rid, vector = 0;
2171
2172         /*
2173          * Setup the interrupt handlers
2174          */
2175
2176         /* TX Setup */
2177         for (int i = 0; i < adapter->num_queues; i++, vector++, txr++) {
2178                 rid = vector +1;
2179                 txr->res = bus_alloc_resource_any(dev,
2180                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2181                 if (txr->res == NULL) {
2182                         device_printf(dev,
2183                             "Unable to allocate bus resource: "
2184                             "MSIX TX Interrupt\n");
2185                         return (ENXIO);
2186                 }
2187                 error = bus_setup_intr(dev, txr->res,
2188                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2189                     igb_msix_tx, txr, &txr->tag);
2190                 if (error) {
2191                         txr->res = NULL;
2192                         device_printf(dev, "Failed to register TX handler");
2193                         return (error);
2194                 }
2195                 /* Make tasklet for deferred handling - one per queue */
2196                 TASK_INIT(&txr->tx_task, 0, igb_handle_tx, txr);
2197                 txr->msix = vector;
2198                 if (adapter->hw.mac.type == e1000_82575)
2199                         txr->eims = E1000_EICR_TX_QUEUE0 << i;
2200                 else
2201                         txr->eims = 1 << vector;
2202                 /*
2203                 ** Bind the msix vector, and thus the
2204                 ** ring to the corresponding cpu.
2205                 */
2206                 if (adapter->num_queues > 1)
2207                         bus_bind_intr(dev, txr->res, i);
2208         }
2209
2210         /* RX Setup */
2211         for (int i = 0; i < adapter->num_queues; i++, vector++, rxr++) {
2212                 rid = vector +1;
2213                 rxr->res = bus_alloc_resource_any(dev,
2214                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2215                 if (rxr->res == NULL) {
2216                         device_printf(dev,
2217                             "Unable to allocate bus resource: "
2218                             "MSIX RX Interrupt\n");
2219                         return (ENXIO);
2220                 }
2221                 error = bus_setup_intr(dev, rxr->res,
2222                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2223                     igb_msix_rx, rxr, &rxr->tag);
2224                 if (error) {
2225                         rxr->res = NULL;
2226                         device_printf(dev, "Failed to register RX handler");
2227                         return (error);
2228                 }
2229                 /* Make tasklet for deferred handling - one per queue */
2230                 TASK_INIT(&rxr->rx_task, 0, igb_handle_rx, rxr);
2231                 rxr->msix = vector;
2232                 if (adapter->hw.mac.type == e1000_82575)
2233                         rxr->eims = E1000_EICR_RX_QUEUE0 << i;
2234                 else
2235                         rxr->eims = 1 << vector;
2236                 /* Get a mask for local timer */
2237                 adapter->rx_mask |= rxr->eims;
2238                 /*
2239                 ** Bind the msix vector, and thus the
2240                 ** ring to the corresponding cpu.
2241                 ** Notice that this makes an RX/TX pair
2242                 ** bound to each CPU, limited by the MSIX
2243                 ** vectors.
2244                 */
2245                 if (adapter->num_queues > 1)
2246                         bus_bind_intr(dev, rxr->res, i);
2247         }
2248
2249         /* And Link */
2250         rid = vector +1;
2251         adapter->res = bus_alloc_resource_any(dev,
2252             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2253         if (adapter->res == NULL) {
2254                 device_printf(dev,
2255                     "Unable to allocate bus resource: "
2256                     "MSIX Link Interrupt\n");
2257                 return (ENXIO);
2258         }
2259         if ((error = bus_setup_intr(dev, adapter->res,
2260             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2261             igb_msix_link, adapter, &adapter->tag)) != 0) {
2262                 device_printf(dev, "Failed to register Link handler");
2263                 return (error);
2264         }
2265         adapter->linkvec = vector;
2266         adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2267             taskqueue_thread_enqueue, &adapter->tq);
2268         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2269             device_get_nameunit(adapter->dev));
2270
2271         return (0);
2272 }
2273
2274
2275 static void
2276 igb_configure_queues(struct adapter *adapter)
2277 {
2278         struct  e1000_hw *hw = &adapter->hw;
2279         struct  tx_ring *txr;
2280         struct  rx_ring *rxr;
2281
2282         /* Turn on MSIX */
2283         /*
2284         ** 82576 uses IVARs to route MSI/X
2285         ** interrupts, its not very intuitive,
2286         ** study the code carefully :)
2287         */
2288         if (adapter->hw.mac.type == e1000_82576) {
2289                 u32     ivar = 0;
2290                 /* First turn on the capability */
2291                 E1000_WRITE_REG(hw, E1000_GPIE,
2292                     E1000_GPIE_MSIX_MODE |
2293                     E1000_GPIE_EIAME |
2294                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2295                 /* RX */
2296                 for (int i = 0; i < adapter->num_queues; i++) {
2297                         u32 index = i & 0x7; /* Each IVAR has two entries */
2298                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2299                         rxr = &adapter->rx_rings[i];
2300                         if (i < 8) {
2301                                 ivar &= 0xFFFFFF00;
2302                                 ivar |= rxr->msix | E1000_IVAR_VALID;
2303                         } else {
2304                                 ivar &= 0xFF00FFFF;
2305                                 ivar |= (rxr->msix | E1000_IVAR_VALID) << 16;
2306                         }
2307                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2308                         adapter->eims_mask |= rxr->eims;
2309                 }
2310                 /* TX */
2311                 for (int i = 0; i < adapter->num_queues; i++) {
2312                         u32 index = i & 0x7; /* Each IVAR has two entries */
2313                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2314                         txr = &adapter->tx_rings[i];
2315                         if (i < 8) {
2316                                 ivar &= 0xFFFF00FF;
2317                                 ivar |= (txr->msix | E1000_IVAR_VALID) << 8;
2318                         } else {
2319                                 ivar &= 0x00FFFFFF;
2320                                 ivar |= (txr->msix | E1000_IVAR_VALID) << 24;
2321                         }
2322                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2323                         adapter->eims_mask |= txr->eims;
2324                 }
2325
2326                 /* And for the link interrupt */
2327                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2328                 adapter->link_mask = 1 << adapter->linkvec;
2329                 adapter->eims_mask |= adapter->link_mask;
2330                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2331         } else
2332         { /* 82575 */
2333                 int tmp;
2334
2335                 /* enable MSI-X PBA support*/
2336                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2337                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2338                 /* Auto-Mask interrupts upon ICR read. */
2339                 tmp |= E1000_CTRL_EXT_EIAME;
2340                 tmp |= E1000_CTRL_EXT_IRCA;
2341                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2342
2343                 /* TX */
2344                 for (int i = 0; i < adapter->num_queues; i++) {
2345                         txr = &adapter->tx_rings[i];
2346                         E1000_WRITE_REG(hw, E1000_MSIXBM(txr->msix),
2347                             txr->eims);
2348                         adapter->eims_mask |= txr->eims;
2349                 }
2350
2351                 /* RX */
2352                 for (int i = 0; i < adapter->num_queues; i++) {
2353                         rxr = &adapter->rx_rings[i];
2354                         E1000_WRITE_REG(hw, E1000_MSIXBM(rxr->msix),
2355                             rxr->eims);
2356                         adapter->eims_mask |= rxr->eims;
2357                 }
2358
2359                 /* Link */
2360                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2361                     E1000_EIMS_OTHER);
2362                 adapter->link_mask |= E1000_EIMS_OTHER;
2363                 adapter->eims_mask |= adapter->link_mask;
2364         }
2365         return;
2366 }
2367
2368
2369 static void
2370 igb_free_pci_resources(struct adapter *adapter)
2371 {
2372         struct          tx_ring *txr = adapter->tx_rings;
2373         struct          rx_ring *rxr = adapter->rx_rings;
2374         device_t        dev = adapter->dev;
2375         int             rid;
2376
2377         /*
2378         ** There is a slight possibility of a failure mode
2379         ** in attach that will result in entering this function
2380         ** before interrupt resources have been initialized, and
2381         ** in that case we do not want to execute the loops below
2382         ** We can detect this reliably by the state of the adapter
2383         ** res pointer.
2384         */
2385         if (adapter->res == NULL)
2386                 goto mem;
2387
2388         /*
2389          * First release all the TX/RX interrupt resources:
2390          */
2391         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2392                 rid = txr->msix + 1;
2393                 if (txr->tag != NULL) {
2394                         bus_teardown_intr(dev, txr->res, txr->tag);
2395                         txr->tag = NULL;
2396                 }
2397                 if (txr->res != NULL)
2398                         bus_release_resource(dev, SYS_RES_IRQ, rid, txr->res);
2399         }
2400
2401         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
2402                 rid = rxr->msix + 1;
2403                 if (rxr->tag != NULL) {
2404                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2405                         rxr->tag = NULL;
2406                 }
2407                 if (rxr->res != NULL)
2408                         bus_release_resource(dev, SYS_RES_IRQ, rid, rxr->res);
2409         }
2410
2411         /* Clean the Legacy or Link interrupt last */
2412         if (adapter->linkvec) /* we are doing MSIX */
2413                 rid = adapter->linkvec + 1;
2414         else
2415                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2416
2417         if (adapter->tag != NULL) {
2418                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2419                 adapter->tag = NULL;
2420         }
2421         if (adapter->res != NULL)
2422                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2423
2424 mem:
2425         if (adapter->msix)
2426                 pci_release_msi(dev);
2427
2428         if (adapter->msix_mem != NULL)
2429                 bus_release_resource(dev, SYS_RES_MEMORY,
2430                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2431
2432         if (adapter->pci_mem != NULL)
2433                 bus_release_resource(dev, SYS_RES_MEMORY,
2434                     PCIR_BAR(0), adapter->pci_mem);
2435
2436 }
2437
2438 /*
2439  * Setup Either MSI/X or MSI
2440  */
2441 static int
2442 igb_setup_msix(struct adapter *adapter)
2443 {
2444         device_t dev = adapter->dev;
2445         int rid, want, queues, msgs;
2446
2447         /* First try MSI/X */
2448         rid = PCIR_BAR(IGB_MSIX_BAR);
2449         adapter->msix_mem = bus_alloc_resource_any(dev,
2450             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2451         if (!adapter->msix_mem) {
2452                 /* May not be enabled */
2453                 device_printf(adapter->dev,
2454                     "Unable to map MSIX table \n");
2455                 goto msi;
2456         }
2457
2458         msgs = pci_msix_count(dev); 
2459         if (msgs == 0) { /* system has msix disabled */
2460                 bus_release_resource(dev, SYS_RES_MEMORY,
2461                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2462                 adapter->msix_mem = NULL;
2463                 goto msi;
2464         }
2465
2466         /* Figure out a reasonable auto config value */
2467         queues = (mp_ncpus > ((msgs-1)/2)) ? (msgs-1)/2 : mp_ncpus;
2468
2469         if (igb_num_queues == 0)
2470                 igb_num_queues = queues;
2471         /*
2472         ** Two vectors (RX/TX pair) per queue
2473         ** plus an additional for Link interrupt
2474         */
2475         want = (igb_num_queues * 2) + 1;
2476         if (msgs >= want)
2477                 msgs = want;
2478         else {
2479                 device_printf(adapter->dev,
2480                     "MSIX Configuration Problem, "
2481                     "%d vectors configured, but %d queues wanted!\n",
2482                     msgs, want);
2483                 return (ENXIO);
2484         }
2485         if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2486                 device_printf(adapter->dev,
2487                     "Using MSIX interrupts with %d vectors\n", msgs);
2488                 adapter->num_queues = igb_num_queues;
2489                 return (msgs);
2490         }
2491 msi:
2492         msgs = pci_msi_count(dev);
2493         if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2494                 device_printf(adapter->dev,"Using MSI interrupt\n");
2495         return (msgs);
2496 }
2497
2498 /*********************************************************************
2499  *
2500  *  Initialize the hardware to a configuration
2501  *  as specified by the adapter structure.
2502  *
2503  **********************************************************************/
2504 static int
2505 igb_hardware_init(struct adapter *adapter)
2506 {
2507         device_t        dev = adapter->dev;
2508         u32             rx_buffer_size;
2509
2510         INIT_DEBUGOUT("igb_hardware_init: begin");
2511
2512         /* Issue a global reset */
2513         e1000_reset_hw(&adapter->hw);
2514
2515         /* Let the firmware know the OS is in control */
2516         igb_get_hw_control(adapter);
2517
2518         /*
2519          * These parameters control the automatic generation (Tx) and
2520          * response (Rx) to Ethernet PAUSE frames.
2521          * - High water mark should allow for at least two frames to be
2522          *   received after sending an XOFF.
2523          * - Low water mark works best when it is very near the high water mark.
2524          *   This allows the receiver to restart by sending XON when it has
2525          *   drained a bit. Here we use an arbitary value of 1500 which will
2526          *   restart after one full frame is pulled from the buffer. There
2527          *   could be several smaller frames in the buffer and if so they will
2528          *   not trigger the XON until their total number reduces the buffer
2529          *   by 1500.
2530          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2531          */
2532         if (adapter->hw.mac.type == e1000_82576)
2533                 rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
2534                     E1000_RXPBS) & 0xffff) << 10 );
2535         else
2536                 rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
2537                     E1000_PBA) & 0xffff) << 10 );
2538
2539         adapter->hw.fc.high_water = rx_buffer_size -
2540             roundup2(adapter->max_frame_size, 1024);
2541         adapter->hw.fc.low_water = adapter->hw.fc.high_water - 1500;
2542
2543         adapter->hw.fc.pause_time = IGB_FC_PAUSE_TIME;
2544         adapter->hw.fc.send_xon = TRUE;
2545
2546         /* Set Flow control, use the tunable location if sane */
2547         if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2548                 adapter->hw.fc.requested_mode = igb_fc_setting;
2549         else
2550                 adapter->hw.fc.requested_mode = e1000_fc_none;
2551
2552         if (e1000_init_hw(&adapter->hw) < 0) {
2553                 device_printf(dev, "Hardware Initialization Failed\n");
2554                 return (EIO);
2555         }
2556
2557         e1000_check_for_link(&adapter->hw);
2558
2559         return (0);
2560 }
2561
2562 /*********************************************************************
2563  *
2564  *  Setup networking device structure and register an interface.
2565  *
2566  **********************************************************************/
2567 static void
2568 igb_setup_interface(device_t dev, struct adapter *adapter)
2569 {
2570         struct ifnet   *ifp;
2571
2572         INIT_DEBUGOUT("igb_setup_interface: begin");
2573
2574         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2575         if (ifp == NULL)
2576                 panic("%s: can not if_alloc()", device_get_nameunit(dev));
2577         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2578         ifp->if_mtu = ETHERMTU;
2579         ifp->if_init =  igb_init;
2580         ifp->if_softc = adapter;
2581         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2582         ifp->if_ioctl = igb_ioctl;
2583         ifp->if_start = igb_start;
2584 #if __FreeBSD_version >= 800000
2585         ifp->if_transmit = igb_mq_start;
2586         ifp->if_qflush = igb_qflush;
2587 #endif
2588         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2589         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2590         IFQ_SET_READY(&ifp->if_snd);
2591
2592         ether_ifattach(ifp, adapter->hw.mac.addr);
2593
2594         ifp->if_capabilities = ifp->if_capenable = 0;
2595
2596         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_MTU;
2597         ifp->if_capabilities |= IFCAP_TSO4;
2598         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2599         ifp->if_capenable = ifp->if_capabilities;
2600
2601         /*
2602          * Tell the upper layer(s) we support long frames.
2603          */
2604         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2605         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2606         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2607
2608         /*
2609          * Specify the media types supported by this adapter and register
2610          * callbacks to update media and link information
2611          */
2612         ifmedia_init(&adapter->media, IFM_IMASK,
2613             igb_media_change, igb_media_status);
2614         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2615             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2616                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
2617                             0, NULL);
2618                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2619         } else {
2620                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2621                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2622                             0, NULL);
2623                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2624                             0, NULL);
2625                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2626                             0, NULL);
2627                 if (adapter->hw.phy.type != e1000_phy_ife) {
2628                         ifmedia_add(&adapter->media,
2629                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2630                         ifmedia_add(&adapter->media,
2631                                 IFM_ETHER | IFM_1000_T, 0, NULL);
2632                 }
2633         }
2634         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2635         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2636 }
2637
2638
2639 /*
2640  * Manage DMA'able memory.
2641  */
2642 static void
2643 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2644 {
2645         if (error)
2646                 return;
2647         *(bus_addr_t *) arg = segs[0].ds_addr;
2648 }
2649
2650 static int
2651 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2652         struct igb_dma_alloc *dma, int mapflags)
2653 {
2654         int error;
2655
2656         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2657                                 1, 0,                   /* alignment, bounds */
2658                                 BUS_SPACE_MAXADDR,      /* lowaddr */
2659                                 BUS_SPACE_MAXADDR,      /* highaddr */
2660                                 NULL, NULL,             /* filter, filterarg */
2661                                 size,                   /* maxsize */
2662                                 1,                      /* nsegments */
2663                                 size,                   /* maxsegsize */
2664                                 0,                      /* flags */
2665                                 NULL,                   /* lockfunc */
2666                                 NULL,                   /* lockarg */
2667                                 &dma->dma_tag);
2668         if (error) {
2669                 device_printf(adapter->dev,
2670                     "%s: bus_dma_tag_create failed: %d\n",
2671                     __func__, error);
2672                 goto fail_0;
2673         }
2674
2675         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2676             BUS_DMA_NOWAIT, &dma->dma_map);
2677         if (error) {
2678                 device_printf(adapter->dev,
2679                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2680                     __func__, (uintmax_t)size, error);
2681                 goto fail_2;
2682         }
2683
2684         dma->dma_paddr = 0;
2685         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2686             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2687         if (error || dma->dma_paddr == 0) {
2688                 device_printf(adapter->dev,
2689                     "%s: bus_dmamap_load failed: %d\n",
2690                     __func__, error);
2691                 goto fail_3;
2692         }
2693
2694         return (0);
2695
2696 fail_3:
2697         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2698 fail_2:
2699         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2700         bus_dma_tag_destroy(dma->dma_tag);
2701 fail_0:
2702         dma->dma_map = NULL;
2703         dma->dma_tag = NULL;
2704
2705         return (error);
2706 }
2707
2708 static void
2709 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2710 {
2711         if (dma->dma_tag == NULL)
2712                 return;
2713         if (dma->dma_map != NULL) {
2714                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2715                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2716                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2717                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2718                 dma->dma_map = NULL;
2719         }
2720         bus_dma_tag_destroy(dma->dma_tag);
2721         dma->dma_tag = NULL;
2722 }
2723
2724
2725 /*********************************************************************
2726  *
2727  *  Allocate memory for the transmit and receive rings, and then
2728  *  the descriptors associated with each, called only once at attach.
2729  *
2730  **********************************************************************/
2731 static int
2732 igb_allocate_queues(struct adapter *adapter)
2733 {
2734         device_t dev = adapter->dev;
2735         struct tx_ring *txr;
2736         struct rx_ring *rxr;
2737         int rsize, tsize, error = E1000_SUCCESS;
2738         int txconf = 0, rxconf = 0;
2739
2740         /* First allocate the TX ring struct memory */
2741         if (!(adapter->tx_rings =
2742             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2743             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2744                 device_printf(dev, "Unable to allocate TX ring memory\n");
2745                 error = ENOMEM;
2746                 goto fail;
2747         }
2748         txr = adapter->tx_rings;
2749
2750         /* Next allocate the RX */
2751         if (!(adapter->rx_rings =
2752             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2753             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2754                 device_printf(dev, "Unable to allocate RX ring memory\n");
2755                 error = ENOMEM;
2756                 goto rx_fail;
2757         }
2758         rxr = adapter->rx_rings;
2759
2760         tsize = roundup2(adapter->num_tx_desc *
2761             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2762         /*
2763          * Now set up the TX queues, txconf is needed to handle the
2764          * possibility that things fail midcourse and we need to
2765          * undo memory gracefully
2766          */ 
2767         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2768                 /* Set up some basics */
2769                 txr = &adapter->tx_rings[i];
2770                 txr->adapter = adapter;
2771                 txr->me = i;
2772
2773                 /* Initialize the TX lock */
2774                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2775                     device_get_nameunit(dev), txr->me);
2776                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2777
2778                 if (igb_dma_malloc(adapter, tsize,
2779                         &txr->txdma, BUS_DMA_NOWAIT)) {
2780                         device_printf(dev,
2781                             "Unable to allocate TX Descriptor memory\n");
2782                         error = ENOMEM;
2783                         goto err_tx_desc;
2784                 }
2785                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2786                 bzero((void *)txr->tx_base, tsize);
2787
2788                 /* Now allocate transmit buffers for the ring */
2789                 if (igb_allocate_transmit_buffers(txr)) {
2790                         device_printf(dev,
2791                             "Critical Failure setting up transmit buffers\n");
2792                         error = ENOMEM;
2793                         goto err_tx_desc;
2794                 }
2795 #if __FreeBSD_version >= 800000
2796                 /* Allocate a buf ring */
2797                 txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2798                     M_WAITOK, &txr->tx_mtx);
2799 #endif
2800         }
2801
2802         /*
2803          * Next the RX queues...
2804          */ 
2805         rsize = roundup2(adapter->num_rx_desc *
2806             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2807         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2808                 rxr = &adapter->rx_rings[i];
2809                 rxr->adapter = adapter;
2810                 rxr->me = i;
2811
2812                 /* Initialize the RX lock */
2813                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2814                     device_get_nameunit(dev), txr->me);
2815                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2816
2817                 if (igb_dma_malloc(adapter, rsize,
2818                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2819                         device_printf(dev,
2820                             "Unable to allocate RxDescriptor memory\n");
2821                         error = ENOMEM;
2822                         goto err_rx_desc;
2823                 }
2824                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2825                 bzero((void *)rxr->rx_base, rsize);
2826
2827                 /* Allocate receive buffers for the ring*/
2828                 if (igb_allocate_receive_buffers(rxr)) {
2829                         device_printf(dev,
2830                             "Critical Failure setting up receive buffers\n");
2831                         error = ENOMEM;
2832                         goto err_rx_desc;
2833                 }
2834         }
2835
2836         return (0);
2837
2838 err_rx_desc:
2839         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2840                 igb_dma_free(adapter, &rxr->rxdma);
2841 err_tx_desc:
2842         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2843                 igb_dma_free(adapter, &txr->txdma);
2844         free(adapter->rx_rings, M_DEVBUF);
2845 rx_fail:
2846         free(adapter->tx_rings, M_DEVBUF);
2847 fail:
2848         return (error);
2849 }
2850
2851 /*********************************************************************
2852  *
2853  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2854  *  the information needed to transmit a packet on the wire. This is
2855  *  called only once at attach, setup is done every reset.
2856  *
2857  **********************************************************************/
2858 static int
2859 igb_allocate_transmit_buffers(struct tx_ring *txr)
2860 {
2861         struct adapter *adapter = txr->adapter;
2862         device_t dev = adapter->dev;
2863         struct igb_tx_buffer *txbuf;
2864         int error, i;
2865
2866         /*
2867          * Setup DMA descriptor areas.
2868          */
2869         if ((error = bus_dma_tag_create(NULL,           /* parent */
2870                                1, 0,                    /* alignment, bounds */
2871                                BUS_SPACE_MAXADDR,       /* lowaddr */
2872                                BUS_SPACE_MAXADDR,       /* highaddr */
2873                                NULL, NULL,              /* filter, filterarg */
2874                                IGB_TSO_SIZE,            /* maxsize */
2875                                IGB_MAX_SCATTER,         /* nsegments */
2876                                PAGE_SIZE,               /* maxsegsize */
2877                                0,                       /* flags */
2878                                NULL,                    /* lockfunc */
2879                                NULL,                    /* lockfuncarg */
2880                                &txr->txtag))) {
2881                 device_printf(dev,"Unable to allocate TX DMA tag\n");
2882                 goto fail;
2883         }
2884
2885         if (!(txr->tx_buffers =
2886             (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
2887             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2888                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
2889                 error = ENOMEM;
2890                 goto fail;
2891         }
2892
2893         /* Create the descriptor buffer dma maps */
2894         txbuf = txr->tx_buffers;
2895         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2896                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2897                 if (error != 0) {
2898                         device_printf(dev, "Unable to create TX DMA map\n");
2899                         goto fail;
2900                 }
2901         }
2902
2903         return 0;
2904 fail:
2905         /* We free all, it handles case where we are in the middle */
2906         igb_free_transmit_structures(adapter);
2907         return (error);
2908 }
2909
2910 /*********************************************************************
2911  *
2912  *  Initialize a transmit ring.
2913  *
2914  **********************************************************************/
2915 static void
2916 igb_setup_transmit_ring(struct tx_ring *txr)
2917 {
2918         struct adapter *adapter = txr->adapter;
2919         struct igb_tx_buffer *txbuf;
2920         int i;
2921
2922         /* Clear the old descriptor contents */
2923         bzero((void *)txr->tx_base,
2924               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
2925         /* Reset indices */
2926         txr->next_avail_desc = 0;
2927         txr->next_to_clean = 0;
2928
2929         /* Free any existing tx buffers. */
2930         txbuf = txr->tx_buffers;
2931         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2932                 if (txbuf->m_head != NULL) {
2933                         bus_dmamap_sync(txr->txtag, txbuf->map,
2934                             BUS_DMASYNC_POSTWRITE);
2935                         bus_dmamap_unload(txr->txtag, txbuf->map);
2936                         m_freem(txbuf->m_head);
2937                         txbuf->m_head = NULL;
2938                 }
2939                 /* clear the watch index */
2940                 txbuf->next_eop = -1;
2941         }
2942
2943         /* Set number of descriptors available */
2944         txr->tx_avail = adapter->num_tx_desc;
2945
2946         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2947             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2948
2949 }
2950
2951 /*********************************************************************
2952  *
2953  *  Initialize all transmit rings.
2954  *
2955  **********************************************************************/
2956 static void
2957 igb_setup_transmit_structures(struct adapter *adapter)
2958 {
2959         struct tx_ring *txr = adapter->tx_rings;
2960
2961         for (int i = 0; i < adapter->num_queues; i++, txr++)
2962                 igb_setup_transmit_ring(txr);
2963
2964         return;
2965 }
2966
2967 /*********************************************************************
2968  *
2969  *  Enable transmit unit.
2970  *
2971  **********************************************************************/
2972 static void
2973 igb_initialize_transmit_units(struct adapter *adapter)
2974 {
2975         struct tx_ring  *txr = adapter->tx_rings;
2976         u32             tctl, txdctl;
2977
2978          INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
2979
2980         /* Setup the Base and Length of the Tx Descriptor Rings */
2981         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2982                 u64 bus_addr = txr->txdma.dma_paddr;
2983
2984                 E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(i),
2985                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
2986                 E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(i),
2987                     (uint32_t)(bus_addr >> 32));
2988                 E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(i),
2989                     (uint32_t)bus_addr);
2990
2991                 /* Setup the HW Tx Head and Tail descriptor pointers */
2992                 E1000_WRITE_REG(&adapter->hw, E1000_TDT(i), 0);
2993                 E1000_WRITE_REG(&adapter->hw, E1000_TDH(i), 0);
2994
2995                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
2996                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
2997                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
2998
2999                 /* Setup Transmit Descriptor Base Settings */   
3000                 adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3001
3002                 txdctl = E1000_READ_REG(&adapter->hw, E1000_TXDCTL(i));
3003                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3004                 E1000_WRITE_REG(&adapter->hw, E1000_TXDCTL(i), txdctl);
3005         }
3006
3007         /* Program the Transmit Control Register */
3008         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3009         tctl &= ~E1000_TCTL_CT;
3010         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3011                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3012
3013         e1000_config_collision_dist(&adapter->hw);
3014
3015         /* This write will effectively turn on the transmit unit. */
3016         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3017
3018 }
3019
3020 /*********************************************************************
3021  *
3022  *  Free all transmit rings.
3023  *
3024  **********************************************************************/
3025 static void
3026 igb_free_transmit_structures(struct adapter *adapter)
3027 {
3028         struct tx_ring *txr = adapter->tx_rings;
3029
3030         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3031                 IGB_TX_LOCK(txr);
3032                 igb_free_transmit_buffers(txr);
3033                 igb_dma_free(adapter, &txr->txdma);
3034                 IGB_TX_UNLOCK(txr);
3035                 IGB_TX_LOCK_DESTROY(txr);
3036         }
3037         free(adapter->tx_rings, M_DEVBUF);
3038 }
3039
3040 /*********************************************************************
3041  *
3042  *  Free transmit ring related data structures.
3043  *
3044  **********************************************************************/
3045 static void
3046 igb_free_transmit_buffers(struct tx_ring *txr)
3047 {
3048         struct adapter *adapter = txr->adapter;
3049         struct igb_tx_buffer *tx_buffer;
3050         int             i;
3051
3052         INIT_DEBUGOUT("free_transmit_ring: begin");
3053
3054         if (txr->tx_buffers == NULL)
3055                 return;
3056
3057         tx_buffer = txr->tx_buffers;
3058         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3059                 if (tx_buffer->m_head != NULL) {
3060                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3061                             BUS_DMASYNC_POSTWRITE);
3062                         bus_dmamap_unload(txr->txtag,
3063                             tx_buffer->map);
3064                         m_freem(tx_buffer->m_head);
3065                         tx_buffer->m_head = NULL;
3066                         if (tx_buffer->map != NULL) {
3067                                 bus_dmamap_destroy(txr->txtag,
3068                                     tx_buffer->map);
3069                                 tx_buffer->map = NULL;
3070                         }
3071                 } else if (tx_buffer->map != NULL) {
3072                         bus_dmamap_unload(txr->txtag,
3073                             tx_buffer->map);
3074                         bus_dmamap_destroy(txr->txtag,
3075                             tx_buffer->map);
3076                         tx_buffer->map = NULL;
3077                 }
3078         }
3079 #if __FreeBSD_version >= 800000
3080         if (txr->br != NULL)
3081                 buf_ring_free(txr->br, M_DEVBUF);
3082 #endif
3083         if (txr->tx_buffers != NULL) {
3084                 free(txr->tx_buffers, M_DEVBUF);
3085                 txr->tx_buffers = NULL;
3086         }
3087         if (txr->txtag != NULL) {
3088                 bus_dma_tag_destroy(txr->txtag);
3089                 txr->txtag = NULL;
3090         }
3091         return;
3092 }
3093
3094 /**********************************************************************
3095  *
3096  *  Setup work for hardware segmentation offload (TSO) on
3097  *  adapters using advanced tx descriptors (82575)
3098  *
3099  **********************************************************************/
3100 static boolean_t
3101 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3102 {
3103         struct adapter *adapter = txr->adapter;
3104         struct e1000_adv_tx_context_desc *TXD;
3105         struct igb_tx_buffer        *tx_buffer;
3106         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3107         u32 mss_l4len_idx = 0;
3108         u16 vtag = 0;
3109         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3110         struct ether_vlan_header *eh;
3111         struct ip *ip;
3112         struct tcphdr *th;
3113
3114
3115         /*
3116          * Determine where frame payload starts.
3117          * Jump over vlan headers if already present
3118          */
3119         eh = mtod(mp, struct ether_vlan_header *);
3120         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3121                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3122         else
3123                 ehdrlen = ETHER_HDR_LEN;
3124
3125         /* Ensure we have at least the IP+TCP header in the first mbuf. */
3126         if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3127                 return FALSE;
3128
3129         /* Only supports IPV4 for now */
3130         ctxd = txr->next_avail_desc;
3131         tx_buffer = &txr->tx_buffers[ctxd];
3132         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3133
3134         ip = (struct ip *)(mp->m_data + ehdrlen);
3135         if (ip->ip_p != IPPROTO_TCP)
3136                 return FALSE;   /* 0 */
3137         ip->ip_sum = 0;
3138         ip_hlen = ip->ip_hl << 2;
3139         th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3140         th->th_sum = in_pseudo(ip->ip_src.s_addr,
3141             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3142         tcp_hlen = th->th_off << 2;
3143         /*
3144          * Calculate header length, this is used
3145          * in the transmit desc in igb_xmit
3146          */
3147         *hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3148
3149         /* VLAN MACLEN IPLEN */
3150         if (mp->m_flags & M_VLANTAG) {
3151                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3152                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3153         }
3154
3155         vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3156         vlan_macip_lens |= ip_hlen;
3157         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3158
3159         /* ADV DTYPE TUCMD */
3160         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3161         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3162         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3163         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3164
3165         /* MSS L4LEN IDX */
3166         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3167         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3168         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3169
3170         TXD->seqnum_seed = htole32(0);
3171         tx_buffer->m_head = NULL;
3172         tx_buffer->next_eop = -1;
3173
3174         if (++ctxd == adapter->num_tx_desc)
3175                 ctxd = 0;
3176
3177         txr->tx_avail--;
3178         txr->next_avail_desc = ctxd;
3179         return TRUE;
3180 }
3181
3182
3183 /*********************************************************************
3184  *
3185  *  Context Descriptor setup for VLAN or CSUM
3186  *
3187  **********************************************************************/
3188
3189 static bool
3190 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3191 {
3192         struct adapter *adapter = txr->adapter;
3193         struct e1000_adv_tx_context_desc *TXD;
3194         struct igb_tx_buffer        *tx_buffer;
3195         uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3196         struct ether_vlan_header *eh;
3197         struct ip *ip = NULL;
3198         struct ip6_hdr *ip6;
3199         int  ehdrlen, ctxd, ip_hlen = 0;
3200         u16     etype, vtag = 0;
3201         u8      ipproto = 0;
3202         bool    offload = TRUE;
3203
3204         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3205                 offload = FALSE;
3206
3207         ctxd = txr->next_avail_desc;
3208         tx_buffer = &txr->tx_buffers[ctxd];
3209         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3210
3211         /*
3212         ** In advanced descriptors the vlan tag must 
3213         ** be placed into the context descriptor, thus
3214         ** we need to be here just for that setup.
3215         */
3216         if (mp->m_flags & M_VLANTAG) {
3217                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3218                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3219         } else if (offload == FALSE)
3220                 return FALSE;
3221
3222         /*
3223          * Determine where frame payload starts.
3224          * Jump over vlan headers if already present,
3225          * helpful for QinQ too.
3226          */
3227         eh = mtod(mp, struct ether_vlan_header *);
3228         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3229                 etype = ntohs(eh->evl_proto);
3230                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3231         } else {
3232                 etype = ntohs(eh->evl_encap_proto);
3233                 ehdrlen = ETHER_HDR_LEN;
3234         }
3235
3236         /* Set the ether header length */
3237         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3238
3239         switch (etype) {
3240                 case ETHERTYPE_IP:
3241                         ip = (struct ip *)(mp->m_data + ehdrlen);
3242                         ip_hlen = ip->ip_hl << 2;
3243                         if (mp->m_len < ehdrlen + ip_hlen) {
3244                                 offload = FALSE;
3245                                 break;
3246                         }
3247                         ipproto = ip->ip_p;
3248                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3249                         break;
3250                 case ETHERTYPE_IPV6:
3251                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3252                         ip_hlen = sizeof(struct ip6_hdr);
3253                         if (mp->m_len < ehdrlen + ip_hlen)
3254                                 return (FALSE);
3255                         ipproto = ip6->ip6_nxt;
3256                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3257                         break;
3258                 default:
3259                         offload = FALSE;
3260                         break;
3261         }
3262
3263         vlan_macip_lens |= ip_hlen;
3264         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3265
3266         switch (ipproto) {
3267                 case IPPROTO_TCP:
3268                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3269                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3270                         break;
3271                 case IPPROTO_UDP:
3272                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3273                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3274                         break;
3275 #if __FreeBSD_version >= 800000
3276                 case IPPROTO_SCTP:
3277                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3278                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3279                         break;
3280 #endif
3281                 default:
3282                         offload = FALSE;
3283                         break;
3284         }
3285
3286         /* Now copy bits into descriptor */
3287         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3288         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3289         TXD->seqnum_seed = htole32(0);
3290         TXD->mss_l4len_idx = htole32(0);
3291
3292         tx_buffer->m_head = NULL;
3293         tx_buffer->next_eop = -1;
3294
3295         /* We've consumed the first desc, adjust counters */
3296         if (++ctxd == adapter->num_tx_desc)
3297                 ctxd = 0;
3298         txr->next_avail_desc = ctxd;
3299         --txr->tx_avail;
3300
3301         return (offload);
3302 }
3303
3304
3305 /**********************************************************************
3306  *
3307  *  Examine each tx_buffer in the used queue. If the hardware is done
3308  *  processing the packet then free associated resources. The
3309  *  tx_buffer is put back on the free queue.
3310  *
3311  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3312  **********************************************************************/
3313 static bool
3314 igb_txeof(struct tx_ring *txr)
3315 {
3316         struct adapter  *adapter = txr->adapter;
3317         int first, last, done, num_avail;
3318         u32     cleaned = 0;
3319         struct igb_tx_buffer *tx_buffer;
3320         struct e1000_tx_desc   *tx_desc, *eop_desc;
3321         struct ifnet   *ifp = adapter->ifp;
3322
3323         IGB_TX_LOCK_ASSERT(txr);
3324
3325         if (txr->tx_avail == adapter->num_tx_desc)
3326                 return FALSE;
3327
3328         num_avail = txr->tx_avail;
3329         first = txr->next_to_clean;
3330         tx_desc = &txr->tx_base[first];
3331         tx_buffer = &txr->tx_buffers[first];
3332         last = tx_buffer->next_eop;
3333         eop_desc = &txr->tx_base[last];
3334
3335         /*
3336          * What this does is get the index of the
3337          * first descriptor AFTER the EOP of the 
3338          * first packet, that way we can do the
3339          * simple comparison on the inner while loop.
3340          */
3341         if (++last == adapter->num_tx_desc)
3342                 last = 0;
3343         done = last;
3344
3345         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3346             BUS_DMASYNC_POSTREAD);
3347
3348         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3349                 /* We clean the range of the packet */
3350                 while (first != done) {
3351                         tx_desc->upper.data = 0;
3352                         tx_desc->lower.data = 0;
3353                         tx_desc->buffer_addr = 0;
3354                         ++num_avail; ++cleaned;
3355
3356                         if (tx_buffer->m_head) {
3357                                 ifp->if_opackets++;
3358                                 bus_dmamap_sync(txr->txtag,
3359                                     tx_buffer->map,
3360                                     BUS_DMASYNC_POSTWRITE);
3361                                 bus_dmamap_unload(txr->txtag,
3362                                     tx_buffer->map);
3363
3364                                 m_freem(tx_buffer->m_head);
3365                                 tx_buffer->m_head = NULL;
3366                         }
3367                         tx_buffer->next_eop = -1;
3368
3369                         if (++first == adapter->num_tx_desc)
3370                                 first = 0;
3371
3372                         tx_buffer = &txr->tx_buffers[first];
3373                         tx_desc = &txr->tx_base[first];
3374                 }
3375                 /* See if we can continue to the next packet */
3376                 last = tx_buffer->next_eop;
3377                 if (last != -1) {
3378                         eop_desc = &txr->tx_base[last];
3379                         /* Get new done point */
3380                         if (++last == adapter->num_tx_desc) last = 0;
3381                         done = last;
3382                 } else
3383                         break;
3384         }
3385         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3386             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3387
3388         txr->next_to_clean = first;
3389
3390         /*
3391          * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3392          * that it is OK to send packets.
3393          * If there are no pending descriptors, clear the timeout. Otherwise,
3394          * if some descriptors have been freed, restart the timeout.
3395          */
3396         if (num_avail > IGB_TX_CLEANUP_THRESHOLD) {                
3397                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3398                 /* All clean, turn off the timer */
3399                 if (num_avail == adapter->num_tx_desc) {
3400                         txr->watchdog_timer = 0;
3401                         txr->tx_avail = num_avail;
3402                         return FALSE;
3403                 }
3404         }
3405
3406         /* Some cleaned, reset the timer */
3407         if (cleaned)
3408                 txr->watchdog_timer = IGB_TX_TIMEOUT;
3409         txr->tx_avail = num_avail;
3410         return TRUE;
3411 }
3412
3413
3414 /*********************************************************************
3415  *
3416  *  Setup descriptor buffer(s) from system mbuf buffer pools.
3417  *              i - designates the ring index
3418  *              clean - tells the function whether to update
3419  *                      the header, the packet buffer, or both.
3420  *
3421  **********************************************************************/
3422 static int
3423 igb_get_buf(struct rx_ring *rxr, int i, u8 clean)
3424 {
3425         struct adapter          *adapter = rxr->adapter;
3426         struct mbuf             *mh, *mp;
3427         bus_dma_segment_t       seg[2];
3428         bus_dmamap_t            map;
3429         struct igb_rx_buffer    *rx_buffer;
3430         int                     error, nsegs;
3431         int                     merr = 0;
3432
3433
3434         rx_buffer = &rxr->rx_buffers[i];
3435
3436         /* First get our header and payload mbuf */
3437         if (clean & IGB_CLEAN_HEADER) {
3438                 mh = m_gethdr(M_DONTWAIT, MT_DATA);
3439                 if (mh == NULL)
3440                         goto remap;
3441         } else  /* reuse */
3442                 mh = rxr->rx_buffers[i].m_head;
3443
3444         mh->m_len = MHLEN;
3445         mh->m_flags |= M_PKTHDR;
3446
3447         if (clean & IGB_CLEAN_PAYLOAD) {
3448                 mp = m_getjcl(M_DONTWAIT, MT_DATA,
3449                     M_PKTHDR, adapter->rx_mbuf_sz);
3450                 if (mp == NULL)
3451                         goto remap;
3452                 mp->m_len = adapter->rx_mbuf_sz;
3453                 mp->m_flags &= ~M_PKTHDR;
3454         } else {        /* reusing */
3455                 mp = rxr->rx_buffers[i].m_pack;
3456                 mp->m_len = adapter->rx_mbuf_sz;
3457                 mp->m_flags &= ~M_PKTHDR;
3458         }
3459         /*
3460         ** Need to create a chain for the following
3461         ** dmamap call at this point.
3462         */
3463         mh->m_next = mp;
3464         mh->m_pkthdr.len = mh->m_len + mp->m_len;
3465
3466         /* Get the memory mapping */
3467         error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3468             rxr->rx_spare_map, mh, seg, &nsegs, BUS_DMA_NOWAIT);
3469         if (error != 0) {
3470                 printf("GET BUF: dmamap load failure - %d\n", error);
3471                 m_free(mh);
3472                 return (error);
3473         }
3474
3475         /* Unload old mapping and update buffer struct */
3476         if (rx_buffer->m_head != NULL)
3477                         bus_dmamap_unload(rxr->rxtag, rx_buffer->map);
3478         map = rx_buffer->map;
3479         rx_buffer->map = rxr->rx_spare_map;
3480         rxr->rx_spare_map = map;
3481         rx_buffer->m_head = mh;
3482         rx_buffer->m_pack = mp;
3483         bus_dmamap_sync(rxr->rxtag,
3484             rx_buffer->map, BUS_DMASYNC_PREREAD);
3485
3486         /* Update descriptor */
3487         rxr->rx_base[i].read.hdr_addr = htole64(seg[0].ds_addr);
3488         rxr->rx_base[i].read.pkt_addr = htole64(seg[1].ds_addr);
3489
3490         return (0);
3491
3492         /*
3493         ** If we get here, we have an mbuf resource
3494         ** issue, so we discard the incoming packet
3495         ** and attempt to reuse existing mbufs next
3496         ** pass thru the ring, but to do so we must
3497         ** fix up the descriptor which had the address
3498         ** clobbered with writeback info.
3499         */
3500 remap:
3501         adapter->mbuf_header_failed++;
3502         merr = ENOBUFS;
3503         /* Is there a reusable buffer? */
3504         mh = rxr->rx_buffers[i].m_head;
3505         if (mh == NULL) /* Nope, init error */
3506                 return (merr);
3507         mp = rxr->rx_buffers[i].m_pack;
3508         if (mp == NULL) /* Nope, init error */
3509                 return (merr);
3510         /* Get our old mapping */
3511         rx_buffer = &rxr->rx_buffers[i];
3512         error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3513             rx_buffer->map, mh, seg, &nsegs, BUS_DMA_NOWAIT);
3514         if (error != 0) {
3515                 /* We really have a problem */
3516                 m_free(mh);
3517                 return (error);
3518         }
3519         /* Now fix the descriptor as needed */
3520         rxr->rx_base[i].read.hdr_addr = htole64(seg[0].ds_addr);
3521         rxr->rx_base[i].read.pkt_addr = htole64(seg[1].ds_addr);
3522         return (merr);
3523 }
3524
3525
3526 /*********************************************************************
3527  *
3528  *  Allocate memory for rx_buffer structures. Since we use one
3529  *  rx_buffer per received packet, the maximum number of rx_buffer's
3530  *  that we'll need is equal to the number of receive descriptors
3531  *  that we've allocated.
3532  *
3533  **********************************************************************/
3534 static int
3535 igb_allocate_receive_buffers(struct rx_ring *rxr)
3536 {
3537         struct  adapter         *adapter = rxr->adapter;
3538         device_t                dev = adapter->dev;
3539         struct igb_rx_buffer    *rxbuf;
3540         int                     i, bsize, error;
3541
3542         bsize = sizeof(struct igb_rx_buffer) * adapter->num_rx_desc;
3543         if (!(rxr->rx_buffers =
3544             (struct igb_rx_buffer *) malloc(bsize,
3545             M_DEVBUF, M_NOWAIT | M_ZERO))) {
3546                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3547                 error = ENOMEM;
3548                 goto fail;
3549         }
3550
3551         /*
3552         ** The tag is made to accomodate the largest buffer size
3553         ** with packet split (hence the two segments, even though
3554         ** it may not always use this.
3555         */
3556         if ((error = bus_dma_tag_create(NULL,           /* parent */
3557                                    1, 0,                /* alignment, bounds */
3558                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3559                                    BUS_SPACE_MAXADDR,   /* highaddr */
3560                                    NULL, NULL,          /* filter, filterarg */
3561                                    MJUM16BYTES,         /* maxsize */
3562                                    2,                   /* nsegments */
3563                                    MJUMPAGESIZE,        /* maxsegsize */
3564                                    0,                   /* flags */
3565                                    NULL,                /* lockfunc */
3566                                    NULL,                /* lockfuncarg */
3567                                    &rxr->rxtag))) {
3568                 device_printf(dev, "Unable to create RX DMA tag\n");
3569                 goto fail;
3570         }
3571
3572         /* Create the spare map (used by getbuf) */
3573         error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3574              &rxr->rx_spare_map);
3575         if (error) {
3576                 device_printf(dev,
3577                     "%s: bus_dmamap_create header spare failed: %d\n",
3578                     __func__, error);
3579                 goto fail;
3580         }
3581
3582         for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3583                 rxbuf = &rxr->rx_buffers[i];
3584                 error = bus_dmamap_create(rxr->rxtag,
3585                     BUS_DMA_NOWAIT, &rxbuf->map);
3586                 if (error) {
3587                         device_printf(dev, "Unable to create RX DMA maps\n");
3588                         goto fail;
3589                 }
3590         }
3591
3592         return (0);
3593
3594 fail:
3595         /* Frees all, but can handle partial completion */
3596         igb_free_receive_structures(adapter);
3597         return (error);
3598 }
3599
3600 /*********************************************************************
3601  *
3602  *  Initialize a receive ring and its buffers.
3603  *
3604  **********************************************************************/
3605 static int
3606 igb_setup_receive_ring(struct rx_ring *rxr)
3607 {
3608         struct  adapter         *adapter;
3609         struct  ifnet           *ifp;
3610         device_t                dev;
3611         struct igb_rx_buffer    *rxbuf;
3612         struct lro_ctrl         *lro = &rxr->lro;
3613         int                     j, rsize;
3614
3615         adapter = rxr->adapter;
3616         dev = adapter->dev;
3617         ifp = adapter->ifp;
3618         rxr->lro_enabled = FALSE;
3619         rxr->hdr_split = FALSE;
3620
3621         /* Clear the ring contents */
3622         rsize = roundup2(adapter->num_rx_desc *
3623             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3624         bzero((void *)rxr->rx_base, rsize);
3625
3626         /*
3627         ** Free current RX buffer structures and their mbufs
3628         */
3629         for (int i = 0; i < adapter->num_rx_desc; i++) {
3630                 rxbuf = &rxr->rx_buffers[i];
3631                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3632                     BUS_DMASYNC_POSTREAD);
3633                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3634                 if (rxbuf->m_head) {
3635                         rxbuf->m_head->m_next = rxbuf->m_pack;
3636                         m_freem(rxbuf->m_head);
3637                 }
3638                 rxbuf->m_head = NULL;
3639                 rxbuf->m_pack = NULL;
3640         }
3641
3642         /* Next replenish the ring */
3643         for (j = 0; j < adapter->num_rx_desc; j++) {
3644                 if (igb_get_buf(rxr, j, IGB_CLEAN_BOTH) == ENOBUFS) {
3645                         rxr->rx_buffers[j].m_head = NULL;
3646                         rxr->rx_buffers[j].m_pack = NULL;
3647                         rxr->rx_base[j].read.hdr_addr = 0;
3648                         rxr->rx_base[j].read.pkt_addr = 0;
3649                         goto fail;
3650                 }
3651         }
3652
3653         /* Setup our descriptor indices */
3654         rxr->next_to_check = 0;
3655         rxr->last_cleaned = 0;
3656
3657         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3658             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3659
3660         /*
3661         ** Now set up the LRO interface, we
3662         ** also only do head split when LRO
3663         ** is enabled, since so often they
3664         ** are undesireable in similar setups.
3665         */
3666         if (ifp->if_capenable & IFCAP_LRO) {
3667                 int err = tcp_lro_init(lro);
3668                 if (err) {
3669                         device_printf(dev,"LRO Initialization failed!\n");
3670                         goto fail;
3671                 }
3672                 INIT_DEBUGOUT("RX LRO Initialized\n");
3673                 rxr->lro_enabled = TRUE;
3674                 rxr->hdr_split = TRUE;
3675                 lro->ifp = adapter->ifp;
3676         }
3677
3678         return (0);
3679 fail:
3680         /*
3681          * We need to clean up any buffers allocated
3682          * so far, 'j' is the failing index.
3683          */
3684         for (int i = 0; i < j; i++) {
3685                 rxbuf = &rxr->rx_buffers[i];
3686                 if (rxbuf->m_head != NULL) {
3687                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3688                             BUS_DMASYNC_POSTREAD);
3689                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3690                         m_freem(rxbuf->m_head);
3691                         rxbuf->m_head = NULL;
3692                 }
3693         }
3694         return (ENOBUFS);
3695 }
3696
3697 /*********************************************************************
3698  *
3699  *  Initialize all receive rings.
3700  *
3701  **********************************************************************/
3702 static int
3703 igb_setup_receive_structures(struct adapter *adapter)
3704 {
3705         struct rx_ring *rxr = adapter->rx_rings;
3706         int i, j;
3707
3708         for (i = 0; i < adapter->num_queues; i++, rxr++)
3709                 if (igb_setup_receive_ring(rxr))
3710                         goto fail;
3711
3712         return (0);
3713 fail:
3714         /*
3715          * Free RX buffers allocated so far, we will only handle
3716          * the rings that completed, the failing case will have
3717          * cleaned up for itself. The value of 'i' will be the
3718          * failed ring so we must pre-decrement it.
3719          */
3720         rxr = adapter->rx_rings;
3721         for (--i; i > 0; i--, rxr++) {
3722                 for (j = 0; j < adapter->num_rx_desc; j++) {
3723                         struct igb_rx_buffer *rxbuf;
3724                         rxbuf = &rxr->rx_buffers[j];
3725                         if (rxbuf->m_head != NULL) {
3726                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3727                                   BUS_DMASYNC_POSTREAD);
3728                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3729                                 m_freem(rxbuf->m_head);
3730                                 rxbuf->m_head = NULL;
3731                         }
3732                 }
3733         }
3734
3735         return (ENOBUFS);
3736 }
3737
3738 /*********************************************************************
3739  *
3740  *  Enable receive unit.
3741  *
3742  **********************************************************************/
3743 static void
3744 igb_initialize_receive_units(struct adapter *adapter)
3745 {
3746         struct rx_ring  *rxr = adapter->rx_rings;
3747         struct ifnet    *ifp = adapter->ifp;
3748         u32             rctl, rxcsum, psize, srrctl = 0;
3749
3750         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3751
3752         /*
3753          * Make sure receives are disabled while setting
3754          * up the descriptor ring
3755          */
3756         rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
3757         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3758
3759         /*
3760         ** Set up for header split
3761         */
3762         if (rxr->hdr_split) {
3763                 /* Use a standard mbuf for the header */
3764                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3765                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3766         } else
3767                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3768
3769         /*
3770         ** Set up for jumbo frames
3771         */
3772         if (ifp->if_mtu > ETHERMTU) {
3773                 rctl |= E1000_RCTL_LPE;
3774                 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3775                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3776
3777                 /* Set maximum packet len */
3778                 psize = adapter->max_frame_size;
3779                 /* are we on a vlan? */
3780                 if (adapter->ifp->if_vlantrunk != NULL)
3781                         psize += VLAN_TAG_SIZE;
3782                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3783         } else {
3784                 rctl &= ~E1000_RCTL_LPE;
3785                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3786                 rctl |= E1000_RCTL_SZ_2048;
3787         }
3788
3789         /* Setup the Base and Length of the Rx Descriptor Rings */
3790         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3791                 u64 bus_addr = rxr->rxdma.dma_paddr;
3792                 u32 rxdctl;
3793
3794                 E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(i),
3795                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3796                 E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(i),
3797                     (uint32_t)(bus_addr >> 32));
3798                 E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(i),
3799                     (uint32_t)bus_addr);
3800                 E1000_WRITE_REG(&adapter->hw, E1000_SRRCTL(i), srrctl);
3801                 /* Enable this Queue */
3802                 rxdctl = E1000_READ_REG(&adapter->hw, E1000_RXDCTL(i));
3803                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3804                 rxdctl &= 0xFFF00000;
3805                 rxdctl |= IGB_RX_PTHRESH;
3806                 rxdctl |= IGB_RX_HTHRESH << 8;
3807                 rxdctl |= IGB_RX_WTHRESH << 16;
3808                 E1000_WRITE_REG(&adapter->hw, E1000_RXDCTL(i), rxdctl);
3809         }
3810
3811         /*
3812         ** Setup for RX MultiQueue
3813         */
3814         rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
3815         if (adapter->num_queues >1) {
3816                 u32 random[10], mrqc, shift = 0;
3817                 union igb_reta {
3818                         u32 dword;
3819                         u8  bytes[4];
3820                 } reta;
3821
3822                 arc4rand(&random, sizeof(random), 0);
3823                 if (adapter->hw.mac.type == e1000_82575)
3824                         shift = 6;
3825                 /* Warning FM follows */
3826                 for (int i = 0; i < 128; i++) {
3827                         reta.bytes[i & 3] =
3828                             (i % adapter->num_queues) << shift;
3829                         if ((i & 3) == 3)
3830                                 E1000_WRITE_REG(&adapter->hw,
3831                                     E1000_RETA(i >> 2), reta.dword);
3832                 }
3833                 /* Now fill in hash table */
3834                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3835                 for (int i = 0; i < 10; i++)
3836                         E1000_WRITE_REG_ARRAY(&adapter->hw,
3837                             E1000_RSSRK(0), i, random[i]);
3838
3839                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
3840                     E1000_MRQC_RSS_FIELD_IPV4_TCP);
3841                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
3842                     E1000_MRQC_RSS_FIELD_IPV6_TCP);
3843                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
3844                     E1000_MRQC_RSS_FIELD_IPV6_UDP);
3845                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
3846                     E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
3847
3848                 E1000_WRITE_REG(&adapter->hw, E1000_MRQC, mrqc);
3849
3850                 /*
3851                 ** NOTE: Receive Full-Packet Checksum Offload 
3852                 ** is mutually exclusive with Multiqueue. However
3853                 ** this is not the same as TCP/IP checksums which
3854                 ** still work.
3855                 */
3856                 rxcsum |= E1000_RXCSUM_PCSD;
3857 #if __FreeBSD_version >= 800000
3858                 /* For SCTP Offload */
3859                 if ((adapter->hw.mac.type == e1000_82576)
3860                     && (ifp->if_capenable & IFCAP_RXCSUM))
3861                         rxcsum |= E1000_RXCSUM_CRCOFL;
3862 #endif
3863         } else {
3864                 /* Non RSS setup */
3865                 if (ifp->if_capenable & IFCAP_RXCSUM) {
3866                         rxcsum |= E1000_RXCSUM_IPPCSE;
3867 #if __FreeBSD_version >= 800000
3868                         if (adapter->hw.mac.type == e1000_82576)
3869                                 rxcsum |= E1000_RXCSUM_CRCOFL;
3870 #endif
3871                 } else
3872                         rxcsum &= ~E1000_RXCSUM_TUOFL;
3873         }
3874         E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
3875
3876         /* Setup the Receive Control Register */
3877         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3878         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3879                    E1000_RCTL_RDMTS_HALF |
3880                    (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3881
3882         /* Make sure VLAN Filters are off */
3883         rctl &= ~E1000_RCTL_VFE;
3884         /* Don't store bad packets */
3885         rctl &= ~E1000_RCTL_SBP;
3886
3887         /* Enable Receives */
3888         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
3889
3890         /*
3891          * Setup the HW Rx Head and Tail Descriptor Pointers
3892          *   - needs to be after enable
3893          */
3894         for (int i = 0; i < adapter->num_queues; i++) {
3895                 E1000_WRITE_REG(&adapter->hw, E1000_RDH(i), 0);
3896                 E1000_WRITE_REG(&adapter->hw, E1000_RDT(i),
3897                      adapter->num_rx_desc - 1);
3898         }
3899         return;
3900 }
3901
3902 /*********************************************************************
3903  *
3904  *  Free receive rings.
3905  *
3906  **********************************************************************/
3907 static void
3908 igb_free_receive_structures(struct adapter *adapter)
3909 {
3910         struct rx_ring *rxr = adapter->rx_rings;
3911
3912         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3913                 struct lro_ctrl *lro = &rxr->lro;
3914                 igb_free_receive_buffers(rxr);
3915                 tcp_lro_free(lro);
3916                 igb_dma_free(adapter, &rxr->rxdma);
3917         }
3918
3919         free(adapter->rx_rings, M_DEVBUF);
3920 }
3921
3922 /*********************************************************************
3923  *
3924  *  Free receive ring data structures.
3925  *
3926  **********************************************************************/
3927 static void
3928 igb_free_receive_buffers(struct rx_ring *rxr)
3929 {
3930         struct adapter  *adapter = rxr->adapter;
3931         struct igb_rx_buffer *rx_buffer;
3932
3933         INIT_DEBUGOUT("free_receive_structures: begin");
3934
3935         if (rxr->rx_spare_map) {
3936                 bus_dmamap_destroy(rxr->rxtag, rxr->rx_spare_map);
3937                 rxr->rx_spare_map = NULL;
3938         }
3939
3940         /* Cleanup any existing buffers */
3941         if (rxr->rx_buffers != NULL) {
3942                 rx_buffer = &rxr->rx_buffers[0];
3943                 for (int i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3944                         if (rx_buffer->m_head != NULL) {
3945                                 bus_dmamap_sync(rxr->rxtag, rx_buffer->map,
3946                                     BUS_DMASYNC_POSTREAD);
3947                                 bus_dmamap_unload(rxr->rxtag,
3948                                     rx_buffer->map);
3949                                 m_freem(rx_buffer->m_head);
3950                                 rx_buffer->m_head = NULL;
3951                         } else if (rx_buffer->map != NULL)
3952                                 bus_dmamap_unload(rxr->rxtag,
3953                                     rx_buffer->map);
3954                         if (rx_buffer->map != NULL) {
3955                                 bus_dmamap_destroy(rxr->rxtag,
3956                                     rx_buffer->map);
3957                                 rx_buffer->map = NULL;
3958                         }
3959                 }
3960         }
3961
3962         if (rxr->rx_buffers != NULL) {
3963                 free(rxr->rx_buffers, M_DEVBUF);
3964                 rxr->rx_buffers = NULL;
3965         }
3966
3967         if (rxr->rxtag != NULL) {
3968                 bus_dma_tag_destroy(rxr->rxtag);
3969                 rxr->rxtag = NULL;
3970         }
3971 }
3972 /*********************************************************************
3973  *
3974  *  This routine executes in interrupt context. It replenishes
3975  *  the mbufs in the descriptor and sends data which has been
3976  *  dma'ed into host memory to upper layer.
3977  *
3978  *  We loop at most count times if count is > 0, or until done if
3979  *  count < 0.
3980  *
3981  *  Return TRUE if more to clean, FALSE otherwise
3982  *********************************************************************/
3983 static bool
3984 igb_rxeof(struct rx_ring *rxr, int count)
3985 {
3986         struct adapter          *adapter = rxr->adapter;
3987         struct ifnet            *ifp;
3988         struct lro_ctrl         *lro = &rxr->lro;
3989         struct lro_entry        *queued;
3990         int                     i;
3991         u32                     staterr;
3992         union e1000_adv_rx_desc *cur;
3993
3994
3995         IGB_RX_LOCK(rxr);
3996         ifp = adapter->ifp;
3997         i = rxr->next_to_check;
3998         cur = &rxr->rx_base[i];
3999         staterr = cur->wb.upper.status_error;
4000
4001         if (!(staterr & E1000_RXD_STAT_DD)) {
4002                 IGB_RX_UNLOCK(rxr);
4003                 return FALSE;
4004         }
4005
4006         /* Sync the ring */
4007         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4008             BUS_DMASYNC_POSTREAD);
4009
4010         /* Main clean loop */
4011         while ((staterr & E1000_RXD_STAT_DD) &&
4012             (count != 0) &&
4013             (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4014                 struct mbuf *sendmp, *mh, *mp;
4015                 u16 hlen, plen, hdr, ptype, len_adj, vtag;
4016                 u8 dopayload, accept_frame, eop;
4017  
4018                 accept_frame = 1;
4019                 hlen = plen = len_adj = vtag = 0;
4020                 sendmp = mh = mp = NULL;
4021                 ptype = (u16)(cur->wb.lower.lo_dword.data >> 4);
4022
4023                 /* Sync the buffers */
4024                 bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[i].map,
4025                             BUS_DMASYNC_POSTREAD);
4026
4027                 /*
4028                 ** The way the hardware is configured to
4029                 ** split, it will ONLY use the header buffer
4030                 ** when header split is enabled, otherwise we
4031                 ** get normal behavior, ie, both header and
4032                 ** payload are DMA'd into the payload buffer.
4033                 **
4034                 ** The fmp test is to catch the case where a
4035                 ** packet spans multiple descriptors, in that
4036                 ** case only the first header is valid.
4037                 */
4038                 if ((rxr->hdr_split) && (rxr->fmp == NULL)){
4039                         hdr = le16toh(cur->
4040                             wb.lower.lo_dword.hs_rss.hdr_info);
4041                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4042                             E1000_RXDADV_HDRBUFLEN_SHIFT;
4043                         if (hlen > IGB_HDR_BUF)
4044                                 hlen = IGB_HDR_BUF;
4045                         plen = le16toh(cur->wb.upper.length);
4046                         /* Handle the header mbuf */
4047                         mh = rxr->rx_buffers[i].m_head;
4048                         mh->m_len = hlen;
4049                         dopayload = IGB_CLEAN_HEADER;
4050                         /*
4051                         ** Get the payload length, this
4052                         ** could be zero if its a small
4053                         ** packet.
4054                         */
4055                         if (plen) {
4056                                 mp = rxr->rx_buffers[i].m_pack;
4057                                 mp->m_len = plen;
4058                                 mp->m_next = NULL;
4059                                 mp->m_flags &= ~M_PKTHDR;
4060                                 mh->m_next = mp;
4061                                 mh->m_flags |= M_PKTHDR;
4062                                 dopayload = IGB_CLEAN_BOTH;
4063                                 rxr->rx_split_packets++;
4064                         } else {  /* small packets */
4065                                 mh->m_flags &= ~M_PKTHDR;
4066                                 mh->m_next = NULL;
4067                         }
4068                 } else {
4069                         /*
4070                         ** Either no header split, or a
4071                         ** secondary piece of a fragmented
4072                         ** split packet.
4073                         */
4074                         mh = rxr->rx_buffers[i].m_pack;
4075                         mh->m_flags |= M_PKTHDR;
4076                         mh->m_len = le16toh(cur->wb.upper.length);
4077                         dopayload = IGB_CLEAN_PAYLOAD;
4078                 }
4079
4080                 if (staterr & E1000_RXD_STAT_EOP) {
4081                         count--;
4082                         eop = 1;
4083                         /*
4084                         ** Strip CRC and account for frag
4085                         */
4086                         if (mp) { 
4087                                 if (mp->m_len < ETHER_CRC_LEN) {
4088                                         /* a frag, how much is left? */
4089                                         len_adj = ETHER_CRC_LEN - mp->m_len;
4090                                         mp->m_len = 0;
4091                                 } else
4092                                         mp->m_len -= ETHER_CRC_LEN;
4093                         } else { /* not split */
4094                                 if (mh->m_len < ETHER_CRC_LEN) {
4095                                         len_adj = ETHER_CRC_LEN - mh->m_len;
4096                                         mh->m_len = 0;
4097                                 } else
4098                                         mh->m_len -= ETHER_CRC_LEN;
4099                         }
4100                 } else 
4101                         eop = 0;
4102
4103                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK)
4104                         accept_frame = 0;
4105 #ifdef IGB_IEEE1588
4106         This linux code needs to be converted to work here
4107         -----------------------------------------------------
4108                if (unlikely(staterr & E1000_RXD_STAT_TS)) {
4109                        u64 regval;
4110                        u64 ns;
4111 // Create an mtag and set it up
4112                        struct skb_shared_hwtstamps *shhwtstamps =
4113                                skb_hwtstamps(skb);
4114
4115                        rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID),
4116                        "igb: no RX time stamp available for time stamped packet");
4117                        regval = rd32(E1000_RXSTMPL);
4118                        regval |= (u64)rd32(E1000_RXSTMPH) << 32;
4119 // Do time conversion from the register
4120                        ns = timecounter_cyc2time(&adapter->clock, regval);
4121                        clocksync_update(&adapter->sync, ns);
4122                        memset(shhwtstamps, 0, sizeof(*shhwtstamps));
4123                        shhwtstamps->hwtstamp = ns_to_ktime(ns);
4124                        shhwtstamps->syststamp =
4125                                clocksync_hw2sys(&adapter->sync, ns);
4126                }
4127 #endif
4128                 if (accept_frame) {
4129                         /*
4130                         ** get_buf will overwrite the writeback
4131                         ** descriptor so save the VLAN tag now.
4132                         */
4133                         vtag = le16toh(cur->wb.upper.vlan);
4134                         if (igb_get_buf(rxr, i, dopayload) != 0) {
4135                                 ifp->if_iqdrops++;
4136                                 goto discard;
4137                         }
4138                         /* Initial frame - setup */
4139                         if (rxr->fmp == NULL) {
4140                                 mh->m_flags |= M_PKTHDR;
4141                                 mh->m_pkthdr.len = mh->m_len;
4142                                 rxr->fmp = mh; /* Store the first mbuf */
4143                                 rxr->lmp = mh;
4144                                 if (mp) { /* Add payload if split */
4145                                         mh->m_pkthdr.len += mp->m_len;
4146                                         rxr->lmp = mh->m_next;
4147                                 }
4148                         } else {
4149                                 /* Chain mbuf's together */
4150                                 mh->m_flags &= ~M_PKTHDR;
4151                                 rxr->lmp->m_next = mh;
4152                                 rxr->lmp = rxr->lmp->m_next;
4153                                 rxr->fmp->m_pkthdr.len += mh->m_len;
4154                                 /* Adjust for CRC frag */
4155                                 if (len_adj) {
4156                                         rxr->lmp->m_len -= len_adj;
4157                                         rxr->fmp->m_pkthdr.len -= len_adj;
4158                                 }
4159                         }
4160
4161                         if (eop) {
4162                                 bool sctp = ((ptype & 0x40) != 0);
4163                                 rxr->fmp->m_pkthdr.rcvif = ifp;
4164                                 ifp->if_ipackets++;
4165                                 rxr->rx_packets++;
4166                                 /* capture data for AIM */
4167                                 rxr->bytes += rxr->fmp->m_pkthdr.len;
4168                                 rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4169
4170                                 igb_rx_checksum(staterr, rxr->fmp, sctp);
4171                                 if (staterr & E1000_RXD_STAT_VP) {
4172                                         rxr->fmp->m_pkthdr.ether_vtag = vtag;
4173                                         rxr->fmp->m_flags |= M_VLANTAG;
4174                                 }
4175 #if __FreeBSD_version >= 800000
4176                                 rxr->fmp->m_pkthdr.flowid = curcpu;
4177                                 rxr->fmp->m_flags |= M_FLOWID;
4178 #endif
4179                                 sendmp = rxr->fmp;
4180                                 rxr->fmp = NULL;
4181                                 rxr->lmp = NULL;
4182                         }
4183                 } else {
4184                         ifp->if_ierrors++;
4185 discard:
4186                         /* Reuse loaded DMA map and just update mbuf chain */
4187                         if (hlen) {
4188                                 mh = rxr->rx_buffers[i].m_head;
4189                                 mh->m_len = MHLEN;
4190                                 mh->m_next = NULL;
4191                         }
4192                         mp = rxr->rx_buffers[i].m_pack;
4193                         mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz;
4194                         mp->m_data = mp->m_ext.ext_buf;
4195                         mp->m_next = NULL;
4196                         if (adapter->max_frame_size <=
4197                             (MCLBYTES - ETHER_ALIGN))
4198                                 m_adj(mp, ETHER_ALIGN);
4199                         if (rxr->fmp != NULL) {
4200                                 /* handles the whole chain */
4201                                 m_freem(rxr->fmp);
4202                                 rxr->fmp = NULL;
4203                                 rxr->lmp = NULL;
4204                         }
4205                         sendmp = NULL;
4206                 }
4207
4208                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4209                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4210
4211                 rxr->last_cleaned = i; /* For updating tail */
4212
4213                 /* Advance our pointers to the next descriptor. */
4214                 if (++i == adapter->num_rx_desc)
4215                         i = 0;
4216  
4217                 /*
4218                 ** Note that we hold the RX lock thru
4219                 ** the following call so this ring's
4220                 ** next_to_check is not gonna change.
4221                 */
4222                 if (sendmp != NULL) {
4223                         /*
4224                         ** Send to the stack if:
4225                         **  - LRO not enabled, or
4226                         **  - no LRO resources, or
4227                         **  - lro enqueue fails
4228                         */
4229                         if ((!rxr->lro_enabled) ||
4230                             ((!lro->lro_cnt) || (tcp_lro_rx(lro, sendmp, 0))))
4231                                 (*ifp->if_input)(ifp, sendmp);
4232                 }
4233
4234                 /* Get the next descriptor */
4235                 cur = &rxr->rx_base[i];
4236                 staterr = cur->wb.upper.status_error;
4237         }
4238         rxr->next_to_check = i;
4239
4240         /* Advance the E1000's Receive Queue #0  "Tail Pointer". */
4241         E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->last_cleaned);
4242
4243         /*
4244          * Flush any outstanding LRO work
4245          */
4246         while (!SLIST_EMPTY(&lro->lro_active)) {
4247                 queued = SLIST_FIRST(&lro->lro_active);
4248                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4249                 tcp_lro_flush(lro, queued);
4250         }
4251
4252         IGB_RX_UNLOCK(rxr);
4253
4254         /*
4255         ** We still have cleaning to do?
4256         ** Schedule another interrupt if so.
4257         */
4258         if (staterr & E1000_RXD_STAT_DD) {
4259                 E1000_WRITE_REG(&adapter->hw, E1000_EICS, rxr->eims);
4260                 return TRUE;
4261         }
4262
4263         return FALSE;
4264 }
4265
4266
4267 /*********************************************************************
4268  *
4269  *  Verify that the hardware indicated that the checksum is valid.
4270  *  Inform the stack about the status of checksum so that stack
4271  *  doesn't spend time verifying the checksum.
4272  *
4273  *********************************************************************/
4274 static void
4275 igb_rx_checksum(u32 staterr, struct mbuf *mp, bool sctp)
4276 {
4277         u16 status = (u16)staterr;
4278         u8  errors = (u8) (staterr >> 24);
4279
4280         /* Ignore Checksum bit is set */
4281         if (status & E1000_RXD_STAT_IXSM) {
4282                 mp->m_pkthdr.csum_flags = 0;
4283                 return;
4284         }
4285
4286         if (status & E1000_RXD_STAT_IPCS) {
4287                 /* Did it pass? */
4288                 if (!(errors & E1000_RXD_ERR_IPE)) {
4289                         /* IP Checksum Good */
4290                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4291                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4292                 } else
4293                         mp->m_pkthdr.csum_flags = 0;
4294         }
4295
4296         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4297                 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4298 #if __FreeBSD_version >= 800000
4299                 if (sctp) /* reassign */
4300                         type = CSUM_SCTP_VALID;
4301 #endif
4302                 /* Did it pass? */
4303                 if (!(errors & E1000_RXD_ERR_TCPE)) {
4304                         mp->m_pkthdr.csum_flags |= type;
4305                         if (!sctp)
4306                                 mp->m_pkthdr.csum_data = htons(0xffff);
4307                 }
4308         }
4309         return;
4310 }
4311
4312 /*
4313  * This routine is run via an vlan
4314  * config EVENT
4315  */
4316 static void
4317 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4318 {
4319         struct adapter  *adapter = ifp->if_softc;
4320         u32             index, bit;
4321
4322         if (ifp->if_softc !=  arg)   /* Not our event */
4323                 return;
4324
4325         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4326                 return;
4327
4328         index = (vtag >> 5) & 0x7F;
4329         bit = vtag & 0x1F;
4330         igb_shadow_vfta[index] |= (1 << bit);
4331         ++adapter->num_vlans;
4332         /* Re-init to load the changes */
4333         igb_init(adapter);
4334 }
4335
4336 /*
4337  * This routine is run via an vlan
4338  * unconfig EVENT
4339  */
4340 static void
4341 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4342 {
4343         struct adapter  *adapter = ifp->if_softc;
4344         u32             index, bit;
4345
4346         if (ifp->if_softc !=  arg)
4347                 return;
4348
4349         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4350                 return;
4351
4352         index = (vtag >> 5) & 0x7F;
4353         bit = vtag & 0x1F;
4354         igb_shadow_vfta[index] &= ~(1 << bit);
4355         --adapter->num_vlans;
4356         /* Re-init to load the changes */
4357         igb_init(adapter);
4358 }
4359
4360 static void
4361 igb_setup_vlan_hw_support(struct adapter *adapter)
4362 {
4363         struct e1000_hw *hw = &adapter->hw;
4364         u32             reg;
4365
4366         /*
4367         ** We get here thru init_locked, meaning
4368         ** a soft reset, this has already cleared
4369         ** the VFTA and other state, so if there
4370         ** have been no vlan's registered do nothing.
4371         */
4372         if (adapter->num_vlans == 0)
4373                 return;
4374
4375         /*
4376         ** A soft reset zero's out the VFTA, so
4377         ** we need to repopulate it now.
4378         */
4379         for (int i = 0; i < IGB_VFTA_SIZE; i++)
4380                 if (igb_shadow_vfta[i] != 0)
4381                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4382                             i, igb_shadow_vfta[i]);
4383
4384         reg = E1000_READ_REG(hw, E1000_CTRL);
4385         reg |= E1000_CTRL_VME;
4386         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4387
4388         /* Enable the Filter Table */
4389         reg = E1000_READ_REG(hw, E1000_RCTL);
4390         reg &= ~E1000_RCTL_CFIEN;
4391         reg |= E1000_RCTL_VFE;
4392         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4393
4394         /* Update the frame size */
4395         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4396             adapter->max_frame_size + VLAN_TAG_SIZE);
4397 }
4398
4399 static void
4400 igb_enable_intr(struct adapter *adapter)
4401 {
4402         /* With RSS set up what to auto clear */
4403         if (adapter->msix_mem) {
4404                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4405                     adapter->eims_mask);
4406                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4407                     adapter->eims_mask);
4408                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4409                     adapter->eims_mask);
4410                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4411                     E1000_IMS_LSC);
4412         } else {
4413                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4414                     IMS_ENABLE_MASK);
4415         }
4416         E1000_WRITE_FLUSH(&adapter->hw);
4417
4418         return;
4419 }
4420
4421 static void
4422 igb_disable_intr(struct adapter *adapter)
4423 {
4424         if (adapter->msix_mem) {
4425                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4426                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4427         } 
4428         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4429         E1000_WRITE_FLUSH(&adapter->hw);
4430         return;
4431 }
4432
4433 /*
4434  * Bit of a misnomer, what this really means is
4435  * to enable OS management of the system... aka
4436  * to disable special hardware management features 
4437  */
4438 static void
4439 igb_init_manageability(struct adapter *adapter)
4440 {
4441         if (adapter->has_manage) {
4442                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4443                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4444
4445                 /* disable hardware interception of ARP */
4446                 manc &= ~(E1000_MANC_ARP_EN);
4447
4448                 /* enable receiving management packets to the host */
4449                 manc |= E1000_MANC_EN_MNG2HOST;
4450                 manc2h |= 1 << 5;  /* Mng Port 623 */
4451                 manc2h |= 1 << 6;  /* Mng Port 664 */
4452                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4453                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4454         }
4455 }
4456
4457 /*
4458  * Give control back to hardware management
4459  * controller if there is one.
4460  */
4461 static void
4462 igb_release_manageability(struct adapter *adapter)
4463 {
4464         if (adapter->has_manage) {
4465                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4466
4467                 /* re-enable hardware interception of ARP */
4468                 manc |= E1000_MANC_ARP_EN;
4469                 manc &= ~E1000_MANC_EN_MNG2HOST;
4470
4471                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4472         }
4473 }
4474
4475 /*
4476  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4477  * For ASF and Pass Through versions of f/w this means that
4478  * the driver is loaded. 
4479  *
4480  */
4481 static void
4482 igb_get_hw_control(struct adapter *adapter)
4483 {
4484         u32 ctrl_ext;
4485
4486         /* Let firmware know the driver has taken over */
4487         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4488         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4489             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4490 }
4491
4492 /*
4493  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4494  * For ASF and Pass Through versions of f/w this means that the
4495  * driver is no longer loaded.
4496  *
4497  */
4498 static void
4499 igb_release_hw_control(struct adapter *adapter)
4500 {
4501         u32 ctrl_ext;
4502
4503         /* Let firmware taken over control of h/w */
4504         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4505         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4506             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4507 }
4508
4509 static int
4510 igb_is_valid_ether_addr(uint8_t *addr)
4511 {
4512         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4513
4514         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4515                 return (FALSE);
4516         }
4517
4518         return (TRUE);
4519 }
4520
4521
4522 /*
4523  * Enable PCI Wake On Lan capability
4524  */
4525 void
4526 igb_enable_wakeup(device_t dev)
4527 {
4528         u16     cap, status;
4529         u8      id;
4530
4531         /* First find the capabilities pointer*/
4532         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4533         /* Read the PM Capabilities */
4534         id = pci_read_config(dev, cap, 1);
4535         if (id != PCIY_PMG)     /* Something wrong */
4536                 return;
4537         /* OK, we have the power capabilities, so
4538            now get the status register */
4539         cap += PCIR_POWER_STATUS;
4540         status = pci_read_config(dev, cap, 2);
4541         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4542         pci_write_config(dev, cap, status, 2);
4543         return;
4544 }
4545
4546
4547 /**********************************************************************
4548  *
4549  *  Update the board statistics counters.
4550  *
4551  **********************************************************************/
4552 static void
4553 igb_update_stats_counters(struct adapter *adapter)
4554 {
4555         struct ifnet   *ifp;
4556
4557         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4558            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4559                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4560                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4561         }
4562         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4563         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4564         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4565         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4566
4567         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4568         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4569         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4570         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4571         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4572         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4573         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4574         adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4575         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4576         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4577         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4578         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4579         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4580         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4581         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4582         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4583         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4584         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4585         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4586         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4587
4588         /* For the 64-bit byte counters the low dword must be read first. */
4589         /* Both registers clear on the read of the high dword */
4590
4591         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4592         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4593
4594         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4595         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4596         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4597         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4598         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4599
4600         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4601         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4602
4603         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4604         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4605         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4606         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4607         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4608         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4609         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4610         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4611         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4612         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4613
4614         adapter->stats.algnerrc += 
4615                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4616         adapter->stats.rxerrc += 
4617                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4618         adapter->stats.tncrs += 
4619                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4620         adapter->stats.cexterr += 
4621                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4622         adapter->stats.tsctc += 
4623                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4624         adapter->stats.tsctfc += 
4625                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4626         ifp = adapter->ifp;
4627
4628         ifp->if_collisions = adapter->stats.colc;
4629
4630         /* Rx Errors */
4631         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4632             adapter->stats.crcerrs + adapter->stats.algnerrc +
4633             adapter->stats.ruc + adapter->stats.roc +
4634             adapter->stats.mpc + adapter->stats.cexterr;
4635
4636         /* Tx Errors */
4637         ifp->if_oerrors = adapter->stats.ecol +
4638             adapter->stats.latecol + adapter->watchdog_events;
4639 }
4640
4641
4642 /**********************************************************************
4643  *
4644  *  This routine is called only when igb_display_debug_stats is enabled.
4645  *  This routine provides a way to take a look at important statistics
4646  *  maintained by the driver and hardware.
4647  *
4648  **********************************************************************/
4649 static void
4650 igb_print_debug_info(struct adapter *adapter)
4651 {
4652         device_t dev = adapter->dev;
4653         struct rx_ring *rxr = adapter->rx_rings;
4654         struct tx_ring *txr = adapter->tx_rings;
4655         uint8_t *hw_addr = adapter->hw.hw_addr;
4656
4657         device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4658         device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4659             E1000_READ_REG(&adapter->hw, E1000_CTRL),
4660             E1000_READ_REG(&adapter->hw, E1000_RCTL));
4661
4662 #if     (DEBUG_HW > 0)  /* Dont output these errors normally */
4663         device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4664             E1000_READ_REG(&adapter->hw, E1000_IMS),
4665             E1000_READ_REG(&adapter->hw, E1000_EIMS));
4666 #endif
4667
4668         device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4669             ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4670             (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4671         device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4672             adapter->hw.fc.high_water,
4673             adapter->hw.fc.low_water);
4674
4675         for (int i = 0; i < adapter->num_queues; i++, txr++) {
4676                 device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
4677                     E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4678                     E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4679                 device_printf(dev, "TX(%d) no descriptors avail event = %lld\n",
4680                     txr->me, (long long)txr->no_desc_avail);
4681                 device_printf(dev, "TX(%d) MSIX IRQ Handled = %lld\n", txr->me,
4682                     (long long)txr->tx_irq);
4683                 device_printf(dev, "TX(%d) Packets sent = %lld\n", txr->me,
4684                     (long long)txr->tx_packets);
4685         }
4686
4687         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4688                 struct lro_ctrl *lro = &rxr->lro;
4689                 device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4690                     E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4691                     E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4692                 device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4693                     (long long)rxr->rx_packets);
4694                 device_printf(dev, "RX(%d) Split Packets = %lld\n", rxr->me,
4695                     (long long)rxr->rx_split_packets);
4696                 device_printf(dev, "RX(%d) Byte count = %lld\n", rxr->me,
4697                     (long long)rxr->rx_bytes);
4698                 device_printf(dev, "RX(%d) MSIX IRQ Handled = %lld\n", rxr->me,
4699                     (long long)rxr->rx_irq);
4700                 device_printf(dev,"RX(%d) LRO Queued= %d\n",
4701                     rxr->me, lro->lro_queued);
4702                 device_printf(dev,"RX(%d) LRO Flushed= %d\n",
4703                     rxr->me, lro->lro_flushed);
4704         }
4705
4706         device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4707
4708         device_printf(dev, "Mbuf defrag failed = %ld\n",
4709             adapter->mbuf_defrag_failed);
4710         device_printf(dev, "Std mbuf header failed = %ld\n",
4711             adapter->mbuf_header_failed);
4712         device_printf(dev, "Std mbuf packet failed = %ld\n",
4713             adapter->mbuf_packet_failed);
4714         device_printf(dev, "Driver dropped packets = %ld\n",
4715             adapter->dropped_pkts);
4716         device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4717                 adapter->no_tx_dma_setup);
4718 }
4719
4720 static void
4721 igb_print_hw_stats(struct adapter *adapter)
4722 {
4723         device_t dev = adapter->dev;
4724
4725         device_printf(dev, "Excessive collisions = %lld\n",
4726             (long long)adapter->stats.ecol);
4727 #if     (DEBUG_HW > 0)  /* Dont output these errors normally */
4728         device_printf(dev, "Symbol errors = %lld\n",
4729             (long long)adapter->stats.symerrs);
4730 #endif
4731         device_printf(dev, "Sequence errors = %lld\n",
4732             (long long)adapter->stats.sec);
4733         device_printf(dev, "Defer count = %lld\n",
4734             (long long)adapter->stats.dc);
4735         device_printf(dev, "Missed Packets = %lld\n",
4736             (long long)adapter->stats.mpc);
4737         device_printf(dev, "Receive No Buffers = %lld\n",
4738             (long long)adapter->stats.rnbc);
4739         /* RLEC is inaccurate on some hardware, calculate our own. */
4740         device_printf(dev, "Receive Length Errors = %lld\n",
4741             ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4742         device_printf(dev, "Receive errors = %lld\n",
4743             (long long)adapter->stats.rxerrc);
4744         device_printf(dev, "Crc errors = %lld\n",
4745             (long long)adapter->stats.crcerrs);
4746         device_printf(dev, "Alignment errors = %lld\n",
4747             (long long)adapter->stats.algnerrc);
4748         /* On 82575 these are collision counts */
4749         device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4750             (long long)adapter->stats.cexterr);
4751         device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4752         device_printf(dev, "watchdog timeouts = %ld\n",
4753             adapter->watchdog_events);
4754         device_printf(dev, "XON Rcvd = %lld\n",
4755             (long long)adapter->stats.xonrxc);
4756         device_printf(dev, "XON Xmtd = %lld\n",
4757             (long long)adapter->stats.xontxc);
4758         device_printf(dev, "XOFF Rcvd = %lld\n",
4759             (long long)adapter->stats.xoffrxc);
4760         device_printf(dev, "XOFF Xmtd = %lld\n",
4761             (long long)adapter->stats.xofftxc);
4762         device_printf(dev, "Good Packets Rcvd = %lld\n",
4763             (long long)adapter->stats.gprc);
4764         device_printf(dev, "Good Packets Xmtd = %lld\n",
4765             (long long)adapter->stats.gptc);
4766         device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4767             (long long)adapter->stats.tsctc);
4768         device_printf(dev, "TSO Contexts Failed = %lld\n",
4769             (long long)adapter->stats.tsctfc);
4770 }
4771
4772 /**********************************************************************
4773  *
4774  *  This routine provides a way to dump out the adapter eeprom,
4775  *  often a useful debug/service tool. This only dumps the first
4776  *  32 words, stuff that matters is in that extent.
4777  *
4778  **********************************************************************/
4779 static void
4780 igb_print_nvm_info(struct adapter *adapter)
4781 {
4782         u16     eeprom_data;
4783         int     i, j, row = 0;
4784
4785         /* Its a bit crude, but it gets the job done */
4786         printf("\nInterface EEPROM Dump:\n");
4787         printf("Offset\n0x0000  ");
4788         for (i = 0, j = 0; i < 32; i++, j++) {
4789                 if (j == 8) { /* Make the offset block */
4790                         j = 0; ++row;
4791                         printf("\n0x00%x0  ",row);
4792                 }
4793                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4794                 printf("%04x ", eeprom_data);
4795         }
4796         printf("\n");
4797 }
4798
4799 static int
4800 igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4801 {
4802         struct adapter *adapter;
4803         int error;
4804         int result;
4805
4806         result = -1;
4807         error = sysctl_handle_int(oidp, &result, 0, req);
4808
4809         if (error || !req->newptr)
4810                 return (error);
4811
4812         if (result == 1) {
4813                 adapter = (struct adapter *)arg1;
4814                 igb_print_debug_info(adapter);
4815         }
4816         /*
4817          * This value will cause a hex dump of the
4818          * first 32 16-bit words of the EEPROM to
4819          * the screen.
4820          */
4821         if (result == 2) {
4822                 adapter = (struct adapter *)arg1;
4823                 igb_print_nvm_info(adapter);
4824         }
4825
4826         return (error);
4827 }
4828
4829
4830 static int
4831 igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
4832 {
4833         struct adapter *adapter;
4834         int error;
4835         int result;
4836
4837         result = -1;
4838         error = sysctl_handle_int(oidp, &result, 0, req);
4839
4840         if (error || !req->newptr)
4841                 return (error);
4842
4843         if (result == 1) {
4844                 adapter = (struct adapter *)arg1;
4845                 igb_print_hw_stats(adapter);
4846         }
4847
4848         return (error);
4849 }
4850
4851 static void
4852 igb_add_rx_process_limit(struct adapter *adapter, const char *name,
4853         const char *description, int *limit, int value)
4854 {
4855         *limit = value;
4856         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4857             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4858             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4859 }
4860
4861 #ifdef IGB_IEEE1588
4862 /*
4863 ** igb_hwtstamp_ioctl - control hardware time stamping
4864 **
4865 ** Outgoing time stamping can be enabled and disabled. Play nice and
4866 ** disable it when requested, although it shouldn't case any overhead
4867 ** when no packet needs it. At most one packet in the queue may be
4868 ** marked for time stamping, otherwise it would be impossible to tell
4869 ** for sure to which packet the hardware time stamp belongs.
4870 **
4871 ** Incoming time stamping has to be configured via the hardware
4872 ** filters. Not all combinations are supported, in particular event
4873 ** type has to be specified. Matching the kind of event packet is
4874 ** not supported, with the exception of "all V2 events regardless of
4875 ** level 2 or 4".
4876 **
4877 */
4878 static int
4879 igb_hwtstamp_ioctl(struct adapter *adapter, struct ifreq *ifr)
4880 {
4881         struct e1000_hw *hw = &adapter->hw;
4882         struct hwtstamp_ctrl *config;
4883         u32 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
4884         u32 tsync_rx_ctl_bit = E1000_TSYNCRXCTL_ENABLED;
4885         u32 tsync_rx_ctl_type = 0;
4886         u32 tsync_rx_cfg = 0;
4887         int is_l4 = 0;
4888         int is_l2 = 0;
4889         u16 port = 319; /* PTP */
4890         u32 regval;
4891
4892         config = (struct hwtstamp_ctrl *) ifr->ifr_data;
4893
4894         /* reserved for future extensions */
4895         if (config->flags)
4896                 return (EINVAL);
4897
4898         switch (config->tx_type) {
4899         case HWTSTAMP_TX_OFF:
4900                 tsync_tx_ctl_bit = 0;
4901                 break;
4902         case HWTSTAMP_TX_ON:
4903                 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
4904                 break;
4905         default:
4906                 return (ERANGE);
4907         }
4908
4909         switch (config->rx_filter) {
4910         case HWTSTAMP_FILTER_NONE:
4911                 tsync_rx_ctl_bit = 0;
4912                 break;
4913         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
4914         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
4915         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
4916         case HWTSTAMP_FILTER_ALL:
4917                 /*
4918                  * register TSYNCRXCFG must be set, therefore it is not
4919                  * possible to time stamp both Sync and Delay_Req messages
4920                  * => fall back to time stamping all packets
4921                  */
4922                 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_ALL;
4923                 config->rx_filter = HWTSTAMP_FILTER_ALL;
4924                 break;
4925         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
4926                 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
4927                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
4928                 is_l4 = 1;
4929                 break;
4930         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
4931                 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
4932                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
4933                 is_l4 = 1;
4934                 break;
4935         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
4936         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
4937                 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
4938                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
4939                 is_l2 = 1;
4940                 is_l4 = 1;
4941                 config->rx_filter = HWTSTAMP_FILTER_SOME;
4942                 break;
4943         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
4944         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
4945                 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
4946                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
4947                 is_l2 = 1;
4948                 is_l4 = 1;
4949                 config->rx_filter = HWTSTAMP_FILTER_SOME;
4950                 break;
4951         case HWTSTAMP_FILTER_PTP_V2_EVENT:
4952         case HWTSTAMP_FILTER_PTP_V2_SYNC:
4953         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
4954                 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_EVENT_V2;
4955                 config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
4956                 is_l2 = 1;
4957                 break;
4958         default:
4959                 return -ERANGE;
4960         }
4961
4962         /* enable/disable TX */
4963         regval = E1000_READ_REG(hw, E1000_TSYNCTXCTL);
4964         regval = (regval & ~E1000_TSYNCTXCTL_ENABLED) | tsync_tx_ctl_bit;
4965         E1000_WRITE_REG(hw, E1000_TSYNCTXCTL, regval);
4966
4967         /* enable/disable RX, define which PTP packets are time stamped */
4968         regval = E1000_READ_REG(hw, E1000_TSYNCRXCTL);
4969         regval = (regval & ~E1000_TSYNCRXCTL_ENABLED) | tsync_rx_ctl_bit;
4970         regval = (regval & ~0xE) | tsync_rx_ctl_type;
4971         E1000_WRITE_REG(hw, E1000_TSYNCRXCTL, regval);
4972         E1000_WRITE_REG(hw, E1000_TSYNCRXCFG, tsync_rx_cfg);
4973
4974         /*
4975          * Ethertype Filter Queue Filter[0][15:0] = 0x88F7
4976          *                                          (Ethertype to filter on)
4977          * Ethertype Filter Queue Filter[0][26] = 0x1 (Enable filter)
4978          * Ethertype Filter Queue Filter[0][30] = 0x1 (Enable Timestamping)
4979          */
4980         E1000_WRITE_REG(hw, E1000_ETQF0, is_l2 ? 0x440088f7 : 0);
4981
4982         /* L4 Queue Filter[0]: only filter by source and destination port */
4983         E1000_WRITE_REG(hw, E1000_SPQF0, htons(port));
4984         E1000_WRITE_REG(hw, E1000_IMIREXT(0), is_l4 ?
4985              ((1<<12) | (1<<19) /* bypass size and control flags */) : 0);
4986         E1000_WRITE_REG(hw, E1000_IMIR(0), is_l4 ?
4987              (htons(port)
4988               | (0<<16) /* immediate interrupt disabled */
4989               | 0 /* (1<<17) bit cleared: do not bypass
4990                      destination port check */)
4991                 : 0);
4992         E1000_WRITE_REG(hw, E1000_FTQF0, is_l4 ?
4993              (0x11 /* UDP */
4994               | (1<<15) /* VF not compared */
4995               | (1<<27) /* Enable Timestamping */
4996               | (7<<28) /* only source port filter enabled,
4997                            source/target address and protocol
4998                            masked */)
4999              : ((1<<15) | (15<<28) /* all mask bits set = filter not
5000                                       enabled */));
5001
5002         wrfl();
5003
5004         adapter->hwtstamp_ctrl = config;
5005
5006         /* clear TX/RX time stamp registers, just to be sure */
5007         regval = E1000_READ_REG(hw, E1000_TXSTMPH);
5008         regval = E1000_READ_REG(hw, E1000_RXSTMPH);
5009
5010         return (error);
5011 }
5012
5013 /*
5014 ** igb_read_clock - read raw cycle counter (to be used by time counter)
5015 */
5016 static cycle_t igb_read_clock(const struct cyclecounter *tc)
5017 {
5018        struct igb_adapter *adapter =
5019                container_of(tc, struct igb_adapter, cycles);
5020        struct e1000_hw *hw = &adapter->hw;
5021        u64 stamp;
5022
5023        stamp =  E1000_READ_REG(hw, E1000_SYSTIML);
5024        stamp |= (u64)E1000_READ_REG(hw, E1000_SYSTIMH) << 32ULL;
5025
5026        return (stamp);
5027 }
5028
5029 #endif /* IGB_IEEE1588 */