]> CyberLeo.Net >> Repos - FreeBSD/stable/9.git/blob - sys/dev/e1000/if_igb.c
MFC r238466 and r242777:
[FreeBSD/stable/9.git] / sys / dev / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2012, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #ifdef HAVE_KERNEL_OPTION_HEADERS
37 #include "opt_device_polling.h"
38 #include "opt_inet.h"
39 #include "opt_inet6.h"
40 #include "opt_altq.h"
41 #endif
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #if __FreeBSD_version >= 800000
46 #include <sys/buf_ring.h>
47 #endif
48 #include <sys/bus.h>
49 #include <sys/endian.h>
50 #include <sys/kernel.h>
51 #include <sys/kthread.h>
52 #include <sys/malloc.h>
53 #include <sys/mbuf.h>
54 #include <sys/module.h>
55 #include <sys/rman.h>
56 #include <sys/socket.h>
57 #include <sys/sockio.h>
58 #include <sys/sysctl.h>
59 #include <sys/taskqueue.h>
60 #include <sys/eventhandler.h>
61 #include <sys/pcpu.h>
62 #include <sys/smp.h>
63 #include <machine/smp.h>
64 #include <machine/bus.h>
65 #include <machine/resource.h>
66
67 #include <net/bpf.h>
68 #include <net/ethernet.h>
69 #include <net/if.h>
70 #include <net/if_arp.h>
71 #include <net/if_dl.h>
72 #include <net/if_media.h>
73
74 #include <net/if_types.h>
75 #include <net/if_vlan_var.h>
76
77 #include <netinet/in_systm.h>
78 #include <netinet/in.h>
79 #include <netinet/if_ether.h>
80 #include <netinet/ip.h>
81 #include <netinet/ip6.h>
82 #include <netinet/tcp.h>
83 #include <netinet/tcp_lro.h>
84 #include <netinet/udp.h>
85
86 #include <machine/in_cksum.h>
87 #include <dev/led/led.h>
88 #include <dev/pci/pcivar.h>
89 #include <dev/pci/pcireg.h>
90
91 #include "e1000_api.h"
92 #include "e1000_82575.h"
93 #include "if_igb.h"
94
95 /*********************************************************************
96  *  Set this to one to display debug statistics
97  *********************************************************************/
98 int     igb_display_debug_stats = 0;
99
100 /*********************************************************************
101  *  Driver version:
102  *********************************************************************/
103 char igb_driver_version[] = "version - 2.3.4";
104
105
106 /*********************************************************************
107  *  PCI Device ID Table
108  *
109  *  Used by probe to select devices to load on
110  *  Last field stores an index into e1000_strings
111  *  Last entry must be all 0s
112  *
113  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
114  *********************************************************************/
115
116 static igb_vendor_info_t igb_vendor_info_array[] =
117 {
118         { 0x8086, E1000_DEV_ID_82575EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
119         { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
120                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
122                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82576,           PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82576_NS,        PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82576_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82576_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
129                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
131                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
133                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82576_VF,        PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82580_COPPER,    PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82580_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82580_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_82580_SGMII,     PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
140                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
141         { 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
142                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
143         { 0x8086, E1000_DEV_ID_DH89XXCC_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,  PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_DH89XXCC_SFP,    PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
147                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_I350_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_I350_FIBER,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_I350_SERDES,     PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_I350_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_I350_VF,         PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_I210_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_I210_COPPER_IT,  PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
156                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_I210_FIBER,      PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_I210_SERDES,     PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_I210_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_I211_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
161         /* required last entry */
162         { 0, 0, 0, 0, 0}
163 };
164
165 /*********************************************************************
166  *  Table of branding strings for all supported NICs.
167  *********************************************************************/
168
169 static char *igb_strings[] = {
170         "Intel(R) PRO/1000 Network Connection"
171 };
172
173 /*********************************************************************
174  *  Function prototypes
175  *********************************************************************/
176 static int      igb_probe(device_t);
177 static int      igb_attach(device_t);
178 static int      igb_detach(device_t);
179 static int      igb_shutdown(device_t);
180 static int      igb_suspend(device_t);
181 static int      igb_resume(device_t);
182 #if __FreeBSD_version >= 800000
183 static int      igb_mq_start(struct ifnet *, struct mbuf *);
184 static int      igb_mq_start_locked(struct ifnet *,
185                     struct tx_ring *, struct mbuf *);
186 static void     igb_qflush(struct ifnet *);
187 static void     igb_deferred_mq_start(void *, int);
188 #else
189 static void     igb_start(struct ifnet *);
190 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
191 #endif
192 static int      igb_ioctl(struct ifnet *, u_long, caddr_t);
193 static void     igb_init(void *);
194 static void     igb_init_locked(struct adapter *);
195 static void     igb_stop(void *);
196 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
197 static int      igb_media_change(struct ifnet *);
198 static void     igb_identify_hardware(struct adapter *);
199 static int      igb_allocate_pci_resources(struct adapter *);
200 static int      igb_allocate_msix(struct adapter *);
201 static int      igb_allocate_legacy(struct adapter *);
202 static int      igb_setup_msix(struct adapter *);
203 static void     igb_free_pci_resources(struct adapter *);
204 static void     igb_local_timer(void *);
205 static void     igb_reset(struct adapter *);
206 static int      igb_setup_interface(device_t, struct adapter *);
207 static int      igb_allocate_queues(struct adapter *);
208 static void     igb_configure_queues(struct adapter *);
209
210 static int      igb_allocate_transmit_buffers(struct tx_ring *);
211 static void     igb_setup_transmit_structures(struct adapter *);
212 static void     igb_setup_transmit_ring(struct tx_ring *);
213 static void     igb_initialize_transmit_units(struct adapter *);
214 static void     igb_free_transmit_structures(struct adapter *);
215 static void     igb_free_transmit_buffers(struct tx_ring *);
216
217 static int      igb_allocate_receive_buffers(struct rx_ring *);
218 static int      igb_setup_receive_structures(struct adapter *);
219 static int      igb_setup_receive_ring(struct rx_ring *);
220 static void     igb_initialize_receive_units(struct adapter *);
221 static void     igb_free_receive_structures(struct adapter *);
222 static void     igb_free_receive_buffers(struct rx_ring *);
223 static void     igb_free_receive_ring(struct rx_ring *);
224
225 static void     igb_enable_intr(struct adapter *);
226 static void     igb_disable_intr(struct adapter *);
227 static void     igb_update_stats_counters(struct adapter *);
228 static bool     igb_txeof(struct tx_ring *);
229
230 static __inline void igb_rx_discard(struct rx_ring *, int);
231 static __inline void igb_rx_input(struct rx_ring *,
232                     struct ifnet *, struct mbuf *, u32);
233
234 static bool     igb_rxeof(struct igb_queue *, int, int *);
235 static void     igb_rx_checksum(u32, struct mbuf *, u32);
236 static bool     igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
237 static bool     igb_tso_setup(struct tx_ring *, struct mbuf *, int,
238                     struct ip *, struct tcphdr *);
239 static void     igb_set_promisc(struct adapter *);
240 static void     igb_disable_promisc(struct adapter *);
241 static void     igb_set_multi(struct adapter *);
242 static void     igb_update_link_status(struct adapter *);
243 static void     igb_refresh_mbufs(struct rx_ring *, int);
244
245 static void     igb_register_vlan(void *, struct ifnet *, u16);
246 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
247 static void     igb_setup_vlan_hw_support(struct adapter *);
248
249 static int      igb_xmit(struct tx_ring *, struct mbuf **);
250 static int      igb_dma_malloc(struct adapter *, bus_size_t,
251                     struct igb_dma_alloc *, int);
252 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
253 static int      igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
254 static void     igb_print_nvm_info(struct adapter *);
255 static int      igb_is_valid_ether_addr(u8 *);
256 static void     igb_add_hw_stats(struct adapter *);
257
258 static void     igb_vf_init_stats(struct adapter *);
259 static void     igb_update_vf_stats_counters(struct adapter *);
260
261 /* Management and WOL Support */
262 static void     igb_init_manageability(struct adapter *);
263 static void     igb_release_manageability(struct adapter *);
264 static void     igb_get_hw_control(struct adapter *);
265 static void     igb_release_hw_control(struct adapter *);
266 static void     igb_enable_wakeup(device_t);
267 static void     igb_led_func(void *, int);
268
269 static int      igb_irq_fast(void *);
270 static void     igb_msix_que(void *);
271 static void     igb_msix_link(void *);
272 static void     igb_handle_que(void *context, int pending);
273 static void     igb_handle_link(void *context, int pending);
274 static void     igb_handle_link_locked(struct adapter *);
275
276 static void     igb_set_sysctl_value(struct adapter *, const char *,
277                     const char *, int *, int);
278 static int      igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
279 static int      igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
280 static int      igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
281
282 #ifdef DEVICE_POLLING
283 static poll_handler_t igb_poll;
284 #endif /* POLLING */
285
286 /*********************************************************************
287  *  FreeBSD Device Interface Entry Points
288  *********************************************************************/
289
290 static device_method_t igb_methods[] = {
291         /* Device interface */
292         DEVMETHOD(device_probe, igb_probe),
293         DEVMETHOD(device_attach, igb_attach),
294         DEVMETHOD(device_detach, igb_detach),
295         DEVMETHOD(device_shutdown, igb_shutdown),
296         DEVMETHOD(device_suspend, igb_suspend),
297         DEVMETHOD(device_resume, igb_resume),
298         {0, 0}
299 };
300
301 static driver_t igb_driver = {
302         "igb", igb_methods, sizeof(struct adapter),
303 };
304
305 static devclass_t igb_devclass;
306 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
307 MODULE_DEPEND(igb, pci, 1, 1, 1);
308 MODULE_DEPEND(igb, ether, 1, 1, 1);
309
310 /*********************************************************************
311  *  Tunable default values.
312  *********************************************************************/
313
314 static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
315
316 /* Descriptor defaults */
317 static int igb_rxd = IGB_DEFAULT_RXD;
318 static int igb_txd = IGB_DEFAULT_TXD;
319 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
320 TUNABLE_INT("hw.igb.txd", &igb_txd);
321 SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
322     "Number of receive descriptors per queue");
323 SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
324     "Number of transmit descriptors per queue");
325
326 /*
327 ** AIM: Adaptive Interrupt Moderation
328 ** which means that the interrupt rate
329 ** is varied over time based on the
330 ** traffic for that interrupt vector
331 */
332 static int igb_enable_aim = TRUE;
333 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
334 SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
335     "Enable adaptive interrupt moderation");
336
337 /*
338  * MSIX should be the default for best performance,
339  * but this allows it to be forced off for testing.
340  */         
341 static int igb_enable_msix = 1;
342 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
343 SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
344     "Enable MSI-X interrupts");
345
346 /*
347 ** Tuneable Interrupt rate
348 */
349 static int igb_max_interrupt_rate = 8000;
350 TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
351 SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
352     &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
353
354 /*
355 ** Header split causes the packet header to
356 ** be dma'd to a seperate mbuf from the payload.
357 ** this can have memory alignment benefits. But
358 ** another plus is that small packets often fit
359 ** into the header and thus use no cluster. Its
360 ** a very workload dependent type feature.
361 */
362 static int igb_header_split = FALSE;
363 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
364 SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
365     "Enable receive mbuf header split");
366
367 /*
368 ** This will autoconfigure based on
369 ** the number of CPUs if left at 0.
370 */
371 static int igb_num_queues = 0;
372 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
373 SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
374     "Number of queues to configure, 0 indicates autoconfigure");
375
376 /*
377 ** Global variable to store last used CPU when binding queues
378 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
379 ** queue is bound to a cpu.
380 */
381 static int igb_last_bind_cpu = -1;
382
383 /* How many packets rxeof tries to clean at a time */
384 static int igb_rx_process_limit = 100;
385 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
386 SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
387     &igb_rx_process_limit, 0,
388     "Maximum number of received packets to process at a time, -1 means unlimited");
389
390 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
391 #include <dev/netmap/if_igb_netmap.h>
392 #endif /* DEV_NETMAP */
393 /*********************************************************************
394  *  Device identification routine
395  *
396  *  igb_probe determines if the driver should be loaded on
397  *  adapter based on PCI vendor/device id of the adapter.
398  *
399  *  return BUS_PROBE_DEFAULT on success, positive on failure
400  *********************************************************************/
401
402 static int
403 igb_probe(device_t dev)
404 {
405         char            adapter_name[60];
406         uint16_t        pci_vendor_id = 0;
407         uint16_t        pci_device_id = 0;
408         uint16_t        pci_subvendor_id = 0;
409         uint16_t        pci_subdevice_id = 0;
410         igb_vendor_info_t *ent;
411
412         INIT_DEBUGOUT("igb_probe: begin");
413
414         pci_vendor_id = pci_get_vendor(dev);
415         if (pci_vendor_id != IGB_VENDOR_ID)
416                 return (ENXIO);
417
418         pci_device_id = pci_get_device(dev);
419         pci_subvendor_id = pci_get_subvendor(dev);
420         pci_subdevice_id = pci_get_subdevice(dev);
421
422         ent = igb_vendor_info_array;
423         while (ent->vendor_id != 0) {
424                 if ((pci_vendor_id == ent->vendor_id) &&
425                     (pci_device_id == ent->device_id) &&
426
427                     ((pci_subvendor_id == ent->subvendor_id) ||
428                     (ent->subvendor_id == PCI_ANY_ID)) &&
429
430                     ((pci_subdevice_id == ent->subdevice_id) ||
431                     (ent->subdevice_id == PCI_ANY_ID))) {
432                         sprintf(adapter_name, "%s %s",
433                                 igb_strings[ent->index],
434                                 igb_driver_version);
435                         device_set_desc_copy(dev, adapter_name);
436                         return (BUS_PROBE_DEFAULT);
437                 }
438                 ent++;
439         }
440
441         return (ENXIO);
442 }
443
444 /*********************************************************************
445  *  Device initialization routine
446  *
447  *  The attach entry point is called when the driver is being loaded.
448  *  This routine identifies the type of hardware, allocates all resources
449  *  and initializes the hardware.
450  *
451  *  return 0 on success, positive on failure
452  *********************************************************************/
453
454 static int
455 igb_attach(device_t dev)
456 {
457         struct adapter  *adapter;
458         int             error = 0;
459         u16             eeprom_data;
460
461         INIT_DEBUGOUT("igb_attach: begin");
462
463         if (resource_disabled("igb", device_get_unit(dev))) {
464                 device_printf(dev, "Disabled by device hint\n");
465                 return (ENXIO);
466         }
467
468         adapter = device_get_softc(dev);
469         adapter->dev = adapter->osdep.dev = dev;
470         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
471
472         /* SYSCTL stuff */
473         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
474             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
475             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
476             igb_sysctl_nvm_info, "I", "NVM Information");
477
478         igb_set_sysctl_value(adapter, "enable_aim",
479             "Interrupt Moderation", &adapter->enable_aim,
480             igb_enable_aim);
481
482         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
483             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
484             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
485             adapter, 0, igb_set_flowcntl, "I", "Flow Control");
486
487         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
488
489         /* Determine hardware and mac info */
490         igb_identify_hardware(adapter);
491
492         /* Setup PCI resources */
493         if (igb_allocate_pci_resources(adapter)) {
494                 device_printf(dev, "Allocation of PCI resources failed\n");
495                 error = ENXIO;
496                 goto err_pci;
497         }
498
499         /* Do Shared Code initialization */
500         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
501                 device_printf(dev, "Setup of Shared code failed\n");
502                 error = ENXIO;
503                 goto err_pci;
504         }
505
506         e1000_get_bus_info(&adapter->hw);
507
508         /* Sysctl for limiting the amount of work done in the taskqueue */
509         igb_set_sysctl_value(adapter, "rx_processing_limit",
510             "max number of rx packets to process",
511             &adapter->rx_process_limit, igb_rx_process_limit);
512
513         /*
514          * Validate number of transmit and receive descriptors. It
515          * must not exceed hardware maximum, and must be multiple
516          * of E1000_DBA_ALIGN.
517          */
518         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
519             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
520                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
521                     IGB_DEFAULT_TXD, igb_txd);
522                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
523         } else
524                 adapter->num_tx_desc = igb_txd;
525         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
526             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
527                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
528                     IGB_DEFAULT_RXD, igb_rxd);
529                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
530         } else
531                 adapter->num_rx_desc = igb_rxd;
532
533         adapter->hw.mac.autoneg = DO_AUTO_NEG;
534         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
535         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
536
537         /* Copper options */
538         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
539                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
540                 adapter->hw.phy.disable_polarity_correction = FALSE;
541                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
542         }
543
544         /*
545          * Set the frame limits assuming
546          * standard ethernet sized frames.
547          */
548         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
549         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
550
551         /*
552         ** Allocate and Setup Queues
553         */
554         if (igb_allocate_queues(adapter)) {
555                 error = ENOMEM;
556                 goto err_pci;
557         }
558
559         /* Allocate the appropriate stats memory */
560         if (adapter->vf_ifp) {
561                 adapter->stats =
562                     (struct e1000_vf_stats *)malloc(sizeof \
563                     (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
564                 igb_vf_init_stats(adapter);
565         } else
566                 adapter->stats =
567                     (struct e1000_hw_stats *)malloc(sizeof \
568                     (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
569         if (adapter->stats == NULL) {
570                 device_printf(dev, "Can not allocate stats memory\n");
571                 error = ENOMEM;
572                 goto err_late;
573         }
574
575         /* Allocate multicast array memory. */
576         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
577             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
578         if (adapter->mta == NULL) {
579                 device_printf(dev, "Can not allocate multicast setup array\n");
580                 error = ENOMEM;
581                 goto err_late;
582         }
583
584         /* Some adapter-specific advanced features */
585         if (adapter->hw.mac.type >= e1000_i350) {
586                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
587                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
588                     OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
589                     adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
590                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
591                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
592                     OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
593                     adapter, 0, igb_sysctl_eee, "I",
594                     "Disable Energy Efficient Ethernet");
595                 if (adapter->hw.phy.media_type == e1000_media_type_copper)
596                         e1000_set_eee_i350(&adapter->hw);
597         }
598
599         /*
600         ** Start from a known state, this is
601         ** important in reading the nvm and
602         ** mac from that.
603         */
604         e1000_reset_hw(&adapter->hw);
605
606         /* Make sure we have a good EEPROM before we read from it */
607         if (((adapter->hw.mac.type != e1000_i210) &&
608             (adapter->hw.mac.type != e1000_i211)) &&
609             (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
610                 /*
611                 ** Some PCI-E parts fail the first check due to
612                 ** the link being in sleep state, call it again,
613                 ** if it fails a second time its a real issue.
614                 */
615                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
616                         device_printf(dev,
617                             "The EEPROM Checksum Is Not Valid\n");
618                         error = EIO;
619                         goto err_late;
620                 }
621         }
622
623         /*
624         ** Copy the permanent MAC address out of the EEPROM
625         */
626         if (e1000_read_mac_addr(&adapter->hw) < 0) {
627                 device_printf(dev, "EEPROM read error while reading MAC"
628                     " address\n");
629                 error = EIO;
630                 goto err_late;
631         }
632         /* Check its sanity */
633         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
634                 device_printf(dev, "Invalid MAC address\n");
635                 error = EIO;
636                 goto err_late;
637         }
638
639         /* Setup OS specific network interface */
640         if (igb_setup_interface(dev, adapter) != 0)
641                 goto err_late;
642
643         /* Now get a good starting state */
644         igb_reset(adapter);
645
646         /* Initialize statistics */
647         igb_update_stats_counters(adapter);
648
649         adapter->hw.mac.get_link_status = 1;
650         igb_update_link_status(adapter);
651
652         /* Indicate SOL/IDER usage */
653         if (e1000_check_reset_block(&adapter->hw))
654                 device_printf(dev,
655                     "PHY reset is blocked due to SOL/IDER session.\n");
656
657         /* Determine if we have to control management hardware */
658         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
659
660         /*
661          * Setup Wake-on-Lan
662          */
663         /* APME bit in EEPROM is mapped to WUC.APME */
664         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
665         if (eeprom_data)
666                 adapter->wol = E1000_WUFC_MAG;
667
668         /* Register for VLAN events */
669         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
670              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
671         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
672              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
673
674         igb_add_hw_stats(adapter);
675
676         /* Tell the stack that the interface is not active */
677         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
678         adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
679
680         adapter->led_dev = led_create(igb_led_func, adapter,
681             device_get_nameunit(dev));
682
683         /* 
684         ** Configure Interrupts
685         */
686         if ((adapter->msix > 1) && (igb_enable_msix))
687                 error = igb_allocate_msix(adapter);
688         else /* MSI or Legacy */
689                 error = igb_allocate_legacy(adapter);
690         if (error)
691                 goto err_late;
692
693 #ifdef DEV_NETMAP
694         igb_netmap_attach(adapter);
695 #endif /* DEV_NETMAP */
696         INIT_DEBUGOUT("igb_attach: end");
697
698         return (0);
699
700 err_late:
701         igb_detach(dev);
702         igb_free_transmit_structures(adapter);
703         igb_free_receive_structures(adapter);
704         igb_release_hw_control(adapter);
705 err_pci:
706         igb_free_pci_resources(adapter);
707         if (adapter->ifp != NULL)
708                 if_free(adapter->ifp);
709         free(adapter->mta, M_DEVBUF);
710         IGB_CORE_LOCK_DESTROY(adapter);
711
712         return (error);
713 }
714
715 /*********************************************************************
716  *  Device removal routine
717  *
718  *  The detach entry point is called when the driver is being removed.
719  *  This routine stops the adapter and deallocates all the resources
720  *  that were allocated for driver operation.
721  *
722  *  return 0 on success, positive on failure
723  *********************************************************************/
724
725 static int
726 igb_detach(device_t dev)
727 {
728         struct adapter  *adapter = device_get_softc(dev);
729         struct ifnet    *ifp = adapter->ifp;
730
731         INIT_DEBUGOUT("igb_detach: begin");
732
733         /* Make sure VLANS are not using driver */
734         if (adapter->ifp->if_vlantrunk != NULL) {
735                 device_printf(dev,"Vlan in use, detach first\n");
736                 return (EBUSY);
737         }
738
739         ether_ifdetach(adapter->ifp);
740
741         if (adapter->led_dev != NULL)
742                 led_destroy(adapter->led_dev);
743
744 #ifdef DEVICE_POLLING
745         if (ifp->if_capenable & IFCAP_POLLING)
746                 ether_poll_deregister(ifp);
747 #endif
748
749         IGB_CORE_LOCK(adapter);
750         adapter->in_detach = 1;
751         igb_stop(adapter);
752         IGB_CORE_UNLOCK(adapter);
753
754         e1000_phy_hw_reset(&adapter->hw);
755
756         /* Give control back to firmware */
757         igb_release_manageability(adapter);
758         igb_release_hw_control(adapter);
759
760         if (adapter->wol) {
761                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
762                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
763                 igb_enable_wakeup(dev);
764         }
765
766         /* Unregister VLAN events */
767         if (adapter->vlan_attach != NULL)
768                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
769         if (adapter->vlan_detach != NULL)
770                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
771
772         callout_drain(&adapter->timer);
773
774 #ifdef DEV_NETMAP
775         netmap_detach(adapter->ifp);
776 #endif /* DEV_NETMAP */
777         igb_free_pci_resources(adapter);
778         bus_generic_detach(dev);
779         if_free(ifp);
780
781         igb_free_transmit_structures(adapter);
782         igb_free_receive_structures(adapter);
783         if (adapter->mta != NULL)
784                 free(adapter->mta, M_DEVBUF);
785
786         IGB_CORE_LOCK_DESTROY(adapter);
787
788         return (0);
789 }
790
791 /*********************************************************************
792  *
793  *  Shutdown entry point
794  *
795  **********************************************************************/
796
797 static int
798 igb_shutdown(device_t dev)
799 {
800         return igb_suspend(dev);
801 }
802
803 /*
804  * Suspend/resume device methods.
805  */
806 static int
807 igb_suspend(device_t dev)
808 {
809         struct adapter *adapter = device_get_softc(dev);
810
811         IGB_CORE_LOCK(adapter);
812
813         igb_stop(adapter);
814
815         igb_release_manageability(adapter);
816         igb_release_hw_control(adapter);
817
818         if (adapter->wol) {
819                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
820                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
821                 igb_enable_wakeup(dev);
822         }
823
824         IGB_CORE_UNLOCK(adapter);
825
826         return bus_generic_suspend(dev);
827 }
828
829 static int
830 igb_resume(device_t dev)
831 {
832         struct adapter *adapter = device_get_softc(dev);
833         struct tx_ring  *txr = adapter->tx_rings;
834         struct ifnet *ifp = adapter->ifp;
835
836         IGB_CORE_LOCK(adapter);
837         igb_init_locked(adapter);
838         igb_init_manageability(adapter);
839
840         if ((ifp->if_flags & IFF_UP) &&
841             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
842                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
843                         IGB_TX_LOCK(txr);
844 #if __FreeBSD_version >= 800000
845                         /* Process the stack queue only if not depleted */
846                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
847                             !drbr_empty(ifp, txr->br))
848                                 igb_mq_start_locked(ifp, txr, NULL);
849 #else
850                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
851                                 igb_start_locked(txr, ifp);
852 #endif
853                         IGB_TX_UNLOCK(txr);
854                 }
855         }
856         IGB_CORE_UNLOCK(adapter);
857
858         return bus_generic_resume(dev);
859 }
860
861
862 #if __FreeBSD_version < 800000
863
864 /*********************************************************************
865  *  Transmit entry point
866  *
867  *  igb_start is called by the stack to initiate a transmit.
868  *  The driver will remain in this routine as long as there are
869  *  packets to transmit and transmit resources are available.
870  *  In case resources are not available stack is notified and
871  *  the packet is requeued.
872  **********************************************************************/
873
874 static void
875 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
876 {
877         struct adapter  *adapter = ifp->if_softc;
878         struct mbuf     *m_head;
879
880         IGB_TX_LOCK_ASSERT(txr);
881
882         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
883             IFF_DRV_RUNNING)
884                 return;
885         if (!adapter->link_active)
886                 return;
887
888         /* Call cleanup if number of TX descriptors low */
889         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
890                 igb_txeof(txr);
891
892         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
893                 if (txr->tx_avail <= IGB_MAX_SCATTER) {
894                         txr->queue_status |= IGB_QUEUE_DEPLETED;
895                         break;
896                 }
897                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
898                 if (m_head == NULL)
899                         break;
900                 /*
901                  *  Encapsulation can modify our pointer, and or make it
902                  *  NULL on failure.  In that event, we can't requeue.
903                  */
904                 if (igb_xmit(txr, &m_head)) {
905                         if (m_head != NULL)
906                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
907                         if (txr->tx_avail <= IGB_MAX_SCATTER)
908                                 txr->queue_status |= IGB_QUEUE_DEPLETED;
909                         break;
910                 }
911
912                 /* Send a copy of the frame to the BPF listener */
913                 ETHER_BPF_MTAP(ifp, m_head);
914
915                 /* Set watchdog on */
916                 txr->watchdog_time = ticks;
917                 txr->queue_status |= IGB_QUEUE_WORKING;
918         }
919 }
920  
921 /*
922  * Legacy TX driver routine, called from the
923  * stack, always uses tx[0], and spins for it.
924  * Should not be used with multiqueue tx
925  */
926 static void
927 igb_start(struct ifnet *ifp)
928 {
929         struct adapter  *adapter = ifp->if_softc;
930         struct tx_ring  *txr = adapter->tx_rings;
931
932         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
933                 IGB_TX_LOCK(txr);
934                 igb_start_locked(txr, ifp);
935                 IGB_TX_UNLOCK(txr);
936         }
937         return;
938 }
939
940 #else /* __FreeBSD_version >= 800000 */
941
942 /*
943 ** Multiqueue Transmit driver
944 **
945 */
946 static int
947 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
948 {
949         struct adapter          *adapter = ifp->if_softc;
950         struct igb_queue        *que;
951         struct tx_ring          *txr;
952         int                     i, err = 0;
953
954         /* Which queue to use */
955         if ((m->m_flags & M_FLOWID) != 0)
956                 i = m->m_pkthdr.flowid % adapter->num_queues;
957         else
958                 i = curcpu % adapter->num_queues;
959
960         txr = &adapter->tx_rings[i];
961         que = &adapter->queues[i];
962         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
963             IGB_TX_TRYLOCK(txr)) {
964                 err = igb_mq_start_locked(ifp, txr, m);
965                 IGB_TX_UNLOCK(txr);
966         } else {
967                 err = drbr_enqueue(ifp, txr->br, m);
968                 taskqueue_enqueue(que->tq, &txr->txq_task);
969         }
970
971         return (err);
972 }
973
974 static int
975 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
976 {
977         struct adapter  *adapter = txr->adapter;
978         struct mbuf     *next;
979         int             err = 0, enq;
980
981         IGB_TX_LOCK_ASSERT(txr);
982
983         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
984             (txr->queue_status == IGB_QUEUE_DEPLETED) ||
985             adapter->link_active == 0) {
986                 if (m != NULL)
987                         err = drbr_enqueue(ifp, txr->br, m);
988                 return (err);
989         }
990
991         enq = 0;
992         if (m == NULL) {
993                 next = drbr_dequeue(ifp, txr->br);
994         } else if (drbr_needs_enqueue(ifp, txr->br)) {
995                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
996                         return (err);
997                 next = drbr_dequeue(ifp, txr->br);
998         } else
999                 next = m;
1000
1001         /* Process the queue */
1002         while (next != NULL) {
1003                 if ((err = igb_xmit(txr, &next)) != 0) {
1004                         if (next != NULL)
1005                                 err = drbr_enqueue(ifp, txr->br, next);
1006                         break;
1007                 }
1008                 enq++;
1009                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
1010                 ETHER_BPF_MTAP(ifp, next);
1011                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1012                         break;
1013                 next = drbr_dequeue(ifp, txr->br);
1014         }
1015         if (enq > 0) {
1016                 /* Set the watchdog */
1017                 txr->queue_status |= IGB_QUEUE_WORKING;
1018                 txr->watchdog_time = ticks;
1019         }
1020         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1021                 igb_txeof(txr);
1022         if (txr->tx_avail <= IGB_MAX_SCATTER)
1023                 txr->queue_status |= IGB_QUEUE_DEPLETED;
1024         return (err);
1025 }
1026
1027 /*
1028  * Called from a taskqueue to drain queued transmit packets.
1029  */
1030 static void
1031 igb_deferred_mq_start(void *arg, int pending)
1032 {
1033         struct tx_ring *txr = arg;
1034         struct adapter *adapter = txr->adapter;
1035         struct ifnet *ifp = adapter->ifp;
1036
1037         IGB_TX_LOCK(txr);
1038         if (!drbr_empty(ifp, txr->br))
1039                 igb_mq_start_locked(ifp, txr, NULL);
1040         IGB_TX_UNLOCK(txr);
1041 }
1042
1043 /*
1044 ** Flush all ring buffers
1045 */
1046 static void
1047 igb_qflush(struct ifnet *ifp)
1048 {
1049         struct adapter  *adapter = ifp->if_softc;
1050         struct tx_ring  *txr = adapter->tx_rings;
1051         struct mbuf     *m;
1052
1053         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1054                 IGB_TX_LOCK(txr);
1055                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1056                         m_freem(m);
1057                 IGB_TX_UNLOCK(txr);
1058         }
1059         if_qflush(ifp);
1060 }
1061 #endif /* __FreeBSD_version >= 800000 */
1062
1063 /*********************************************************************
1064  *  Ioctl entry point
1065  *
1066  *  igb_ioctl is called when the user wants to configure the
1067  *  interface.
1068  *
1069  *  return 0 on success, positive on failure
1070  **********************************************************************/
1071
1072 static int
1073 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1074 {
1075         struct adapter  *adapter = ifp->if_softc;
1076         struct ifreq    *ifr = (struct ifreq *)data;
1077 #if defined(INET) || defined(INET6)
1078         struct ifaddr   *ifa = (struct ifaddr *)data;
1079 #endif
1080         bool            avoid_reset = FALSE;
1081         int             error = 0;
1082
1083         if (adapter->in_detach)
1084                 return (error);
1085
1086         switch (command) {
1087         case SIOCSIFADDR:
1088 #ifdef INET
1089                 if (ifa->ifa_addr->sa_family == AF_INET)
1090                         avoid_reset = TRUE;
1091 #endif
1092 #ifdef INET6
1093                 if (ifa->ifa_addr->sa_family == AF_INET6)
1094                         avoid_reset = TRUE;
1095 #endif
1096                 /*
1097                 ** Calling init results in link renegotiation,
1098                 ** so we avoid doing it when possible.
1099                 */
1100                 if (avoid_reset) {
1101                         ifp->if_flags |= IFF_UP;
1102                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1103                                 igb_init(adapter);
1104 #ifdef INET
1105                         if (!(ifp->if_flags & IFF_NOARP))
1106                                 arp_ifinit(ifp, ifa);
1107 #endif
1108                 } else
1109                         error = ether_ioctl(ifp, command, data);
1110                 break;
1111         case SIOCSIFMTU:
1112             {
1113                 int max_frame_size;
1114
1115                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1116
1117                 IGB_CORE_LOCK(adapter);
1118                 max_frame_size = 9234;
1119                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1120                     ETHER_CRC_LEN) {
1121                         IGB_CORE_UNLOCK(adapter);
1122                         error = EINVAL;
1123                         break;
1124                 }
1125
1126                 ifp->if_mtu = ifr->ifr_mtu;
1127                 adapter->max_frame_size =
1128                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1129                 igb_init_locked(adapter);
1130                 IGB_CORE_UNLOCK(adapter);
1131                 break;
1132             }
1133         case SIOCSIFFLAGS:
1134                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1135                     SIOCSIFFLAGS (Set Interface Flags)");
1136                 IGB_CORE_LOCK(adapter);
1137                 if (ifp->if_flags & IFF_UP) {
1138                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1139                                 if ((ifp->if_flags ^ adapter->if_flags) &
1140                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1141                                         igb_disable_promisc(adapter);
1142                                         igb_set_promisc(adapter);
1143                                 }
1144                         } else
1145                                 igb_init_locked(adapter);
1146                 } else
1147                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1148                                 igb_stop(adapter);
1149                 adapter->if_flags = ifp->if_flags;
1150                 IGB_CORE_UNLOCK(adapter);
1151                 break;
1152         case SIOCADDMULTI:
1153         case SIOCDELMULTI:
1154                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1155                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1156                         IGB_CORE_LOCK(adapter);
1157                         igb_disable_intr(adapter);
1158                         igb_set_multi(adapter);
1159 #ifdef DEVICE_POLLING
1160                         if (!(ifp->if_capenable & IFCAP_POLLING))
1161 #endif
1162                                 igb_enable_intr(adapter);
1163                         IGB_CORE_UNLOCK(adapter);
1164                 }
1165                 break;
1166         case SIOCSIFMEDIA:
1167                 /* Check SOL/IDER usage */
1168                 IGB_CORE_LOCK(adapter);
1169                 if (e1000_check_reset_block(&adapter->hw)) {
1170                         IGB_CORE_UNLOCK(adapter);
1171                         device_printf(adapter->dev, "Media change is"
1172                             " blocked due to SOL/IDER session.\n");
1173                         break;
1174                 }
1175                 IGB_CORE_UNLOCK(adapter);
1176         case SIOCGIFMEDIA:
1177                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1178                     SIOCxIFMEDIA (Get/Set Interface Media)");
1179                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1180                 break;
1181         case SIOCSIFCAP:
1182             {
1183                 int mask, reinit;
1184
1185                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1186                 reinit = 0;
1187                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1188 #ifdef DEVICE_POLLING
1189                 if (mask & IFCAP_POLLING) {
1190                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1191                                 error = ether_poll_register(igb_poll, ifp);
1192                                 if (error)
1193                                         return (error);
1194                                 IGB_CORE_LOCK(adapter);
1195                                 igb_disable_intr(adapter);
1196                                 ifp->if_capenable |= IFCAP_POLLING;
1197                                 IGB_CORE_UNLOCK(adapter);
1198                         } else {
1199                                 error = ether_poll_deregister(ifp);
1200                                 /* Enable interrupt even in error case */
1201                                 IGB_CORE_LOCK(adapter);
1202                                 igb_enable_intr(adapter);
1203                                 ifp->if_capenable &= ~IFCAP_POLLING;
1204                                 IGB_CORE_UNLOCK(adapter);
1205                         }
1206                 }
1207 #endif
1208                 if (mask & IFCAP_HWCSUM) {
1209                         ifp->if_capenable ^= IFCAP_HWCSUM;
1210                         reinit = 1;
1211                 }
1212                 if (mask & IFCAP_TSO4) {
1213                         ifp->if_capenable ^= IFCAP_TSO4;
1214                         reinit = 1;
1215                 }
1216                 if (mask & IFCAP_VLAN_HWTAGGING) {
1217                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1218                         reinit = 1;
1219                 }
1220                 if (mask & IFCAP_VLAN_HWFILTER) {
1221                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1222                         reinit = 1;
1223                 }
1224                 if (mask & IFCAP_VLAN_HWTSO) {
1225                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1226                         reinit = 1;
1227                 }
1228                 if (mask & IFCAP_LRO) {
1229                         ifp->if_capenable ^= IFCAP_LRO;
1230                         reinit = 1;
1231                 }
1232                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1233                         igb_init(adapter);
1234                 VLAN_CAPABILITIES(ifp);
1235                 break;
1236             }
1237
1238         default:
1239                 error = ether_ioctl(ifp, command, data);
1240                 break;
1241         }
1242
1243         return (error);
1244 }
1245
1246
1247 /*********************************************************************
1248  *  Init entry point
1249  *
1250  *  This routine is used in two ways. It is used by the stack as
1251  *  init entry point in network interface structure. It is also used
1252  *  by the driver as a hw/sw initialization routine to get to a
1253  *  consistent state.
1254  *
1255  *  return 0 on success, positive on failure
1256  **********************************************************************/
1257
1258 static void
1259 igb_init_locked(struct adapter *adapter)
1260 {
1261         struct ifnet    *ifp = adapter->ifp;
1262         device_t        dev = adapter->dev;
1263
1264         INIT_DEBUGOUT("igb_init: begin");
1265
1266         IGB_CORE_LOCK_ASSERT(adapter);
1267
1268         igb_disable_intr(adapter);
1269         callout_stop(&adapter->timer);
1270
1271         /* Get the latest mac address, User can use a LAA */
1272         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1273               ETHER_ADDR_LEN);
1274
1275         /* Put the address into the Receive Address Array */
1276         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1277
1278         igb_reset(adapter);
1279         igb_update_link_status(adapter);
1280
1281         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1282
1283         /* Set hardware offload abilities */
1284         ifp->if_hwassist = 0;
1285         if (ifp->if_capenable & IFCAP_TXCSUM) {
1286                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1287 #if __FreeBSD_version >= 800000
1288                 if (adapter->hw.mac.type == e1000_82576)
1289                         ifp->if_hwassist |= CSUM_SCTP;
1290 #endif
1291         }
1292
1293         if (ifp->if_capenable & IFCAP_TSO4)
1294                 ifp->if_hwassist |= CSUM_TSO;
1295
1296         /* Configure for OS presence */
1297         igb_init_manageability(adapter);
1298
1299         /* Prepare transmit descriptors and buffers */
1300         igb_setup_transmit_structures(adapter);
1301         igb_initialize_transmit_units(adapter);
1302
1303         /* Setup Multicast table */
1304         igb_set_multi(adapter);
1305
1306         /*
1307         ** Figure out the desired mbuf pool
1308         ** for doing jumbo/packetsplit
1309         */
1310         if (adapter->max_frame_size <= 2048)
1311                 adapter->rx_mbuf_sz = MCLBYTES;
1312         else if (adapter->max_frame_size <= 4096)
1313                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1314         else
1315                 adapter->rx_mbuf_sz = MJUM9BYTES;
1316
1317         /* Prepare receive descriptors and buffers */
1318         if (igb_setup_receive_structures(adapter)) {
1319                 device_printf(dev, "Could not setup receive structures\n");
1320                 return;
1321         }
1322         igb_initialize_receive_units(adapter);
1323
1324         /* Enable VLAN support */
1325         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1326                 igb_setup_vlan_hw_support(adapter);
1327                                 
1328         /* Don't lose promiscuous settings */
1329         igb_set_promisc(adapter);
1330
1331         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1332         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1333
1334         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1335         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1336
1337         if (adapter->msix > 1) /* Set up queue routing */
1338                 igb_configure_queues(adapter);
1339
1340         /* this clears any pending interrupts */
1341         E1000_READ_REG(&adapter->hw, E1000_ICR);
1342 #ifdef DEVICE_POLLING
1343         /*
1344          * Only enable interrupts if we are not polling, make sure
1345          * they are off otherwise.
1346          */
1347         if (ifp->if_capenable & IFCAP_POLLING)
1348                 igb_disable_intr(adapter);
1349         else
1350 #endif /* DEVICE_POLLING */
1351         {
1352                 igb_enable_intr(adapter);
1353                 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1354         }
1355
1356         /* Set Energy Efficient Ethernet */
1357         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1358                 e1000_set_eee_i350(&adapter->hw);
1359 }
1360
1361 static void
1362 igb_init(void *arg)
1363 {
1364         struct adapter *adapter = arg;
1365
1366         IGB_CORE_LOCK(adapter);
1367         igb_init_locked(adapter);
1368         IGB_CORE_UNLOCK(adapter);
1369 }
1370
1371
1372 static void
1373 igb_handle_que(void *context, int pending)
1374 {
1375         struct igb_queue *que = context;
1376         struct adapter *adapter = que->adapter;
1377         struct tx_ring *txr = que->txr;
1378         struct ifnet    *ifp = adapter->ifp;
1379
1380         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1381                 bool    more;
1382
1383                 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1384
1385                 IGB_TX_LOCK(txr);
1386                 igb_txeof(txr);
1387 #if __FreeBSD_version >= 800000
1388                 /* Process the stack queue only if not depleted */
1389                 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1390                     !drbr_empty(ifp, txr->br))
1391                         igb_mq_start_locked(ifp, txr, NULL);
1392 #else
1393                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1394                         igb_start_locked(txr, ifp);
1395 #endif
1396                 IGB_TX_UNLOCK(txr);
1397                 /* Do we need another? */
1398                 if (more) {
1399                         taskqueue_enqueue(que->tq, &que->que_task);
1400                         return;
1401                 }
1402         }
1403
1404 #ifdef DEVICE_POLLING
1405         if (ifp->if_capenable & IFCAP_POLLING)
1406                 return;
1407 #endif
1408         /* Reenable this interrupt */
1409         if (que->eims)
1410                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1411         else
1412                 igb_enable_intr(adapter);
1413 }
1414
1415 /* Deal with link in a sleepable context */
1416 static void
1417 igb_handle_link(void *context, int pending)
1418 {
1419         struct adapter *adapter = context;
1420
1421         IGB_CORE_LOCK(adapter);
1422         igb_handle_link_locked(adapter);
1423         IGB_CORE_UNLOCK(adapter);
1424 }
1425
1426 static void
1427 igb_handle_link_locked(struct adapter *adapter)
1428 {
1429         struct tx_ring  *txr = adapter->tx_rings;
1430         struct ifnet *ifp = adapter->ifp;
1431
1432         IGB_CORE_LOCK_ASSERT(adapter);
1433         adapter->hw.mac.get_link_status = 1;
1434         igb_update_link_status(adapter);
1435         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1436                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1437                         IGB_TX_LOCK(txr);
1438 #if __FreeBSD_version >= 800000
1439                         /* Process the stack queue only if not depleted */
1440                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1441                             !drbr_empty(ifp, txr->br))
1442                                 igb_mq_start_locked(ifp, txr, NULL);
1443 #else
1444                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1445                                 igb_start_locked(txr, ifp);
1446 #endif
1447                         IGB_TX_UNLOCK(txr);
1448                 }
1449         }
1450 }
1451
1452 /*********************************************************************
1453  *
1454  *  MSI/Legacy Deferred
1455  *  Interrupt Service routine  
1456  *
1457  *********************************************************************/
1458 static int
1459 igb_irq_fast(void *arg)
1460 {
1461         struct adapter          *adapter = arg;
1462         struct igb_queue        *que = adapter->queues;
1463         u32                     reg_icr;
1464
1465
1466         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1467
1468         /* Hot eject?  */
1469         if (reg_icr == 0xffffffff)
1470                 return FILTER_STRAY;
1471
1472         /* Definitely not our interrupt.  */
1473         if (reg_icr == 0x0)
1474                 return FILTER_STRAY;
1475
1476         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1477                 return FILTER_STRAY;
1478
1479         /*
1480          * Mask interrupts until the taskqueue is finished running.  This is
1481          * cheap, just assume that it is needed.  This also works around the
1482          * MSI message reordering errata on certain systems.
1483          */
1484         igb_disable_intr(adapter);
1485         taskqueue_enqueue(que->tq, &que->que_task);
1486
1487         /* Link status change */
1488         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1489                 taskqueue_enqueue(que->tq, &adapter->link_task);
1490
1491         if (reg_icr & E1000_ICR_RXO)
1492                 adapter->rx_overruns++;
1493         return FILTER_HANDLED;
1494 }
1495
1496 #ifdef DEVICE_POLLING
1497 /*********************************************************************
1498  *
1499  *  Legacy polling routine : if using this code you MUST be sure that
1500  *  multiqueue is not defined, ie, set igb_num_queues to 1.
1501  *
1502  *********************************************************************/
1503 #if __FreeBSD_version >= 800000
1504 #define POLL_RETURN_COUNT(a) (a)
1505 static int
1506 #else
1507 #define POLL_RETURN_COUNT(a)
1508 static void
1509 #endif
1510 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1511 {
1512         struct adapter          *adapter = ifp->if_softc;
1513         struct igb_queue        *que = adapter->queues;
1514         struct tx_ring          *txr = adapter->tx_rings;
1515         u32                     reg_icr, rx_done = 0;
1516         u32                     loop = IGB_MAX_LOOP;
1517         bool                    more;
1518
1519         IGB_CORE_LOCK(adapter);
1520         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1521                 IGB_CORE_UNLOCK(adapter);
1522                 return POLL_RETURN_COUNT(rx_done);
1523         }
1524
1525         if (cmd == POLL_AND_CHECK_STATUS) {
1526                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1527                 /* Link status change */
1528                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1529                         igb_handle_link_locked(adapter);
1530
1531                 if (reg_icr & E1000_ICR_RXO)
1532                         adapter->rx_overruns++;
1533         }
1534         IGB_CORE_UNLOCK(adapter);
1535
1536         igb_rxeof(que, count, &rx_done);
1537
1538         IGB_TX_LOCK(txr);
1539         do {
1540                 more = igb_txeof(txr);
1541         } while (loop-- && more);
1542 #if __FreeBSD_version >= 800000
1543         if (!drbr_empty(ifp, txr->br))
1544                 igb_mq_start_locked(ifp, txr, NULL);
1545 #else
1546         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1547                 igb_start_locked(txr, ifp);
1548 #endif
1549         IGB_TX_UNLOCK(txr);
1550         return POLL_RETURN_COUNT(rx_done);
1551 }
1552 #endif /* DEVICE_POLLING */
1553
1554 /*********************************************************************
1555  *
1556  *  MSIX Que Interrupt Service routine
1557  *
1558  **********************************************************************/
1559 static void
1560 igb_msix_que(void *arg)
1561 {
1562         struct igb_queue *que = arg;
1563         struct adapter *adapter = que->adapter;
1564         struct ifnet   *ifp = adapter->ifp;
1565         struct tx_ring *txr = que->txr;
1566         struct rx_ring *rxr = que->rxr;
1567         u32             newitr = 0;
1568         bool            more_rx;
1569
1570         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1571         ++que->irqs;
1572
1573         IGB_TX_LOCK(txr);
1574         igb_txeof(txr);
1575 #if __FreeBSD_version >= 800000
1576         /* Process the stack queue only if not depleted */
1577         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1578             !drbr_empty(ifp, txr->br))
1579                 igb_mq_start_locked(ifp, txr, NULL);
1580 #else
1581         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1582                 igb_start_locked(txr, ifp);
1583 #endif
1584         IGB_TX_UNLOCK(txr);
1585
1586         more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1587
1588         if (adapter->enable_aim == FALSE)
1589                 goto no_calc;
1590         /*
1591         ** Do Adaptive Interrupt Moderation:
1592         **  - Write out last calculated setting
1593         **  - Calculate based on average size over
1594         **    the last interval.
1595         */
1596         if (que->eitr_setting)
1597                 E1000_WRITE_REG(&adapter->hw,
1598                     E1000_EITR(que->msix), que->eitr_setting);
1599  
1600         que->eitr_setting = 0;
1601
1602         /* Idle, do nothing */
1603         if ((txr->bytes == 0) && (rxr->bytes == 0))
1604                 goto no_calc;
1605                                 
1606         /* Used half Default if sub-gig */
1607         if (adapter->link_speed != 1000)
1608                 newitr = IGB_DEFAULT_ITR / 2;
1609         else {
1610                 if ((txr->bytes) && (txr->packets))
1611                         newitr = txr->bytes/txr->packets;
1612                 if ((rxr->bytes) && (rxr->packets))
1613                         newitr = max(newitr,
1614                             (rxr->bytes / rxr->packets));
1615                 newitr += 24; /* account for hardware frame, crc */
1616                 /* set an upper boundary */
1617                 newitr = min(newitr, 3000);
1618                 /* Be nice to the mid range */
1619                 if ((newitr > 300) && (newitr < 1200))
1620                         newitr = (newitr / 3);
1621                 else
1622                         newitr = (newitr / 2);
1623         }
1624         newitr &= 0x7FFC;  /* Mask invalid bits */
1625         if (adapter->hw.mac.type == e1000_82575)
1626                 newitr |= newitr << 16;
1627         else
1628                 newitr |= E1000_EITR_CNT_IGNR;
1629                  
1630         /* save for next interrupt */
1631         que->eitr_setting = newitr;
1632
1633         /* Reset state */
1634         txr->bytes = 0;
1635         txr->packets = 0;
1636         rxr->bytes = 0;
1637         rxr->packets = 0;
1638
1639 no_calc:
1640         /* Schedule a clean task if needed*/
1641         if (more_rx)
1642                 taskqueue_enqueue(que->tq, &que->que_task);
1643         else
1644                 /* Reenable this interrupt */
1645                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1646         return;
1647 }
1648
1649
1650 /*********************************************************************
1651  *
1652  *  MSIX Link Interrupt Service routine
1653  *
1654  **********************************************************************/
1655
1656 static void
1657 igb_msix_link(void *arg)
1658 {
1659         struct adapter  *adapter = arg;
1660         u32             icr;
1661
1662         ++adapter->link_irq;
1663         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1664         if (!(icr & E1000_ICR_LSC))
1665                 goto spurious;
1666         igb_handle_link(adapter, 0);
1667
1668 spurious:
1669         /* Rearm */
1670         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1671         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1672         return;
1673 }
1674
1675
1676 /*********************************************************************
1677  *
1678  *  Media Ioctl callback
1679  *
1680  *  This routine is called whenever the user queries the status of
1681  *  the interface using ifconfig.
1682  *
1683  **********************************************************************/
1684 static void
1685 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1686 {
1687         struct adapter *adapter = ifp->if_softc;
1688         u_char fiber_type = IFM_1000_SX;
1689
1690         INIT_DEBUGOUT("igb_media_status: begin");
1691
1692         IGB_CORE_LOCK(adapter);
1693         igb_update_link_status(adapter);
1694
1695         ifmr->ifm_status = IFM_AVALID;
1696         ifmr->ifm_active = IFM_ETHER;
1697
1698         if (!adapter->link_active) {
1699                 IGB_CORE_UNLOCK(adapter);
1700                 return;
1701         }
1702
1703         ifmr->ifm_status |= IFM_ACTIVE;
1704
1705         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1706             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1707                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1708         else {
1709                 switch (adapter->link_speed) {
1710                 case 10:
1711                         ifmr->ifm_active |= IFM_10_T;
1712                         break;
1713                 case 100:
1714                         ifmr->ifm_active |= IFM_100_TX;
1715                         break;
1716                 case 1000:
1717                         ifmr->ifm_active |= IFM_1000_T;
1718                         break;
1719                 }
1720                 if (adapter->link_duplex == FULL_DUPLEX)
1721                         ifmr->ifm_active |= IFM_FDX;
1722                 else
1723                         ifmr->ifm_active |= IFM_HDX;
1724         }
1725         IGB_CORE_UNLOCK(adapter);
1726 }
1727
1728 /*********************************************************************
1729  *
1730  *  Media Ioctl callback
1731  *
1732  *  This routine is called when the user changes speed/duplex using
1733  *  media/mediopt option with ifconfig.
1734  *
1735  **********************************************************************/
1736 static int
1737 igb_media_change(struct ifnet *ifp)
1738 {
1739         struct adapter *adapter = ifp->if_softc;
1740         struct ifmedia  *ifm = &adapter->media;
1741
1742         INIT_DEBUGOUT("igb_media_change: begin");
1743
1744         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1745                 return (EINVAL);
1746
1747         IGB_CORE_LOCK(adapter);
1748         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1749         case IFM_AUTO:
1750                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1751                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1752                 break;
1753         case IFM_1000_LX:
1754         case IFM_1000_SX:
1755         case IFM_1000_T:
1756                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1757                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1758                 break;
1759         case IFM_100_TX:
1760                 adapter->hw.mac.autoneg = FALSE;
1761                 adapter->hw.phy.autoneg_advertised = 0;
1762                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1763                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1764                 else
1765                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1766                 break;
1767         case IFM_10_T:
1768                 adapter->hw.mac.autoneg = FALSE;
1769                 adapter->hw.phy.autoneg_advertised = 0;
1770                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1771                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1772                 else
1773                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1774                 break;
1775         default:
1776                 device_printf(adapter->dev, "Unsupported media type\n");
1777         }
1778
1779         igb_init_locked(adapter);
1780         IGB_CORE_UNLOCK(adapter);
1781
1782         return (0);
1783 }
1784
1785
1786 /*********************************************************************
1787  *
1788  *  This routine maps the mbufs to Advanced TX descriptors.
1789  *  
1790  **********************************************************************/
1791 static int
1792 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1793 {
1794         struct adapter          *adapter = txr->adapter;
1795         bus_dma_segment_t       segs[IGB_MAX_SCATTER];
1796         bus_dmamap_t            map;
1797         struct igb_tx_buffer    *tx_buffer, *tx_buffer_mapped;
1798         union e1000_adv_tx_desc *txd = NULL;
1799         struct mbuf             *m_head = *m_headp;
1800         struct ether_vlan_header *eh = NULL;
1801         struct ip               *ip = NULL;
1802         struct tcphdr           *th = NULL;
1803         u32                     hdrlen, cmd_type_len, olinfo_status = 0;
1804         int                     ehdrlen, poff;
1805         int                     nsegs, i, first, last = 0;
1806         int                     error, do_tso, remap = 1;
1807
1808         /* Set basic descriptor constants */
1809         cmd_type_len = E1000_ADVTXD_DTYP_DATA;
1810         cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1811         if (m_head->m_flags & M_VLANTAG)
1812                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1813
1814 retry:
1815         m_head = *m_headp;
1816         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1817         hdrlen = ehdrlen = poff = 0;
1818
1819         /*
1820          * Intel recommends entire IP/TCP header length reside in a single
1821          * buffer. If multiple descriptors are used to describe the IP and
1822          * TCP header, each descriptor should describe one or more
1823          * complete headers; descriptors referencing only parts of headers
1824          * are not supported. If all layer headers are not coalesced into
1825          * a single buffer, each buffer should not cross a 4KB boundary,
1826          * or be larger than the maximum read request size.
1827          * Controller also requires modifing IP/TCP header to make TSO work
1828          * so we firstly get a writable mbuf chain then coalesce ethernet/
1829          * IP/TCP header into a single buffer to meet the requirement of
1830          * controller. This also simplifies IP/TCP/UDP checksum offloading
1831          * which also has similiar restrictions.
1832          */
1833         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1834                 if (do_tso || (m_head->m_next != NULL && 
1835                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1836                         if (M_WRITABLE(*m_headp) == 0) {
1837                                 m_head = m_dup(*m_headp, M_DONTWAIT);
1838                                 m_freem(*m_headp);
1839                                 if (m_head == NULL) {
1840                                         *m_headp = NULL;
1841                                         return (ENOBUFS);
1842                                 }
1843                                 *m_headp = m_head;
1844                         }
1845                 }
1846                 /*
1847                  * Assume IPv4, we don't have TSO/checksum offload support
1848                  * for IPv6 yet.
1849                  */
1850                 ehdrlen = sizeof(struct ether_header);
1851                 m_head = m_pullup(m_head, ehdrlen);
1852                 if (m_head == NULL) {
1853                         *m_headp = NULL;
1854                         return (ENOBUFS);
1855                 }
1856                 eh = mtod(m_head, struct ether_vlan_header *);
1857                 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1858                         ehdrlen = sizeof(struct ether_vlan_header);
1859                         m_head = m_pullup(m_head, ehdrlen);
1860                         if (m_head == NULL) {
1861                                 *m_headp = NULL;
1862                                 return (ENOBUFS);
1863                         }
1864                 }
1865                 m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip));
1866                 if (m_head == NULL) {
1867                         *m_headp = NULL;
1868                         return (ENOBUFS);
1869                 }
1870                 ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1871                 poff = ehdrlen + (ip->ip_hl << 2);
1872                 if (do_tso) {
1873                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1874                         if (m_head == NULL) {
1875                                 *m_headp = NULL;
1876                                 return (ENOBUFS);
1877                         }
1878                         /*
1879                          * The pseudo TCP checksum does not include TCP payload
1880                          * length so driver should recompute the checksum here
1881                          * what hardware expect to see. This is adherence of
1882                          * Microsoft's Large Send specification.
1883                          */
1884                         th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1885                         th->th_sum = in_pseudo(ip->ip_src.s_addr,
1886                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1887                         /* Keep track of the full header length */
1888                         hdrlen = poff + (th->th_off << 2);
1889                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1890                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1891                         if (m_head == NULL) {
1892                                 *m_headp = NULL;
1893                                 return (ENOBUFS);
1894                         }
1895                         th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1896                         m_head = m_pullup(m_head, poff + (th->th_off << 2));
1897                         if (m_head == NULL) {
1898                                 *m_headp = NULL;
1899                                 return (ENOBUFS);
1900                         }
1901                         ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1902                         th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1903                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1904                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1905                         if (m_head == NULL) {
1906                                 *m_headp = NULL;
1907                                 return (ENOBUFS);
1908                         }
1909                         ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1910                 }
1911                 *m_headp = m_head;
1912         }
1913
1914         /*
1915          * Map the packet for DMA
1916          *
1917          * Capture the first descriptor index,
1918          * this descriptor will have the index
1919          * of the EOP which is the only one that
1920          * now gets a DONE bit writeback.
1921          */
1922         first = txr->next_avail_desc;
1923         tx_buffer = &txr->tx_buffers[first];
1924         tx_buffer_mapped = tx_buffer;
1925         map = tx_buffer->map;
1926
1927         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1928             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1929
1930         /*
1931          * There are two types of errors we can (try) to handle:
1932          * - EFBIG means the mbuf chain was too long and bus_dma ran
1933          *   out of segments.  Defragment the mbuf chain and try again.
1934          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1935          *   at this point in time.  Defer sending and try again later.
1936          * All other errors, in particular EINVAL, are fatal and prevent the
1937          * mbuf chain from ever going through.  Drop it and report error.
1938          */
1939         if (error == EFBIG && remap) {
1940                 struct mbuf *m;
1941
1942                 m = m_defrag(*m_headp, M_DONTWAIT);
1943                 if (m == NULL) {
1944                         adapter->mbuf_defrag_failed++;
1945                         m_freem(*m_headp);
1946                         *m_headp = NULL;
1947                         return (ENOBUFS);
1948                 }
1949                 *m_headp = m;
1950
1951                 /* Try it again, but only once */
1952                 remap = 0;
1953                 goto retry;
1954         } else if (error == ENOMEM) {
1955                 adapter->no_tx_dma_setup++;
1956                 return (error);
1957         } else if (error != 0) {
1958                 adapter->no_tx_dma_setup++;
1959                 m_freem(*m_headp);
1960                 *m_headp = NULL;
1961                 return (error);
1962         }
1963
1964         /*
1965         ** Make sure we don't overrun the ring,
1966         ** we need nsegs descriptors and one for
1967         ** the context descriptor used for the
1968         ** offloads.
1969         */
1970         if ((nsegs + 1) > (txr->tx_avail - 2)) {
1971                 txr->no_desc_avail++;
1972                 bus_dmamap_unload(txr->txtag, map);
1973                 return (ENOBUFS);
1974         }
1975         m_head = *m_headp;
1976
1977         /* Do hardware assists:
1978          * Set up the context descriptor, used
1979          * when any hardware offload is done.
1980          * This includes CSUM, VLAN, and TSO.
1981          * It will use the first descriptor.
1982          */
1983
1984         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1985                 if (igb_tso_setup(txr, m_head, ehdrlen, ip, th)) {
1986                         cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1987                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1988                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1989                 } else
1990                         return (ENXIO);
1991         } else if (igb_tx_ctx_setup(txr, m_head))
1992                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1993
1994         /* Calculate payload length */
1995         olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1996             << E1000_ADVTXD_PAYLEN_SHIFT);
1997
1998         /* 82575 needs the queue index added */
1999         if (adapter->hw.mac.type == e1000_82575)
2000                 olinfo_status |= txr->me << 4;
2001
2002         /* Set up our transmit descriptors */
2003         i = txr->next_avail_desc;
2004         for (int j = 0; j < nsegs; j++) {
2005                 bus_size_t seg_len;
2006                 bus_addr_t seg_addr;
2007
2008                 tx_buffer = &txr->tx_buffers[i];
2009                 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
2010                 seg_addr = segs[j].ds_addr;
2011                 seg_len  = segs[j].ds_len;
2012
2013                 txd->read.buffer_addr = htole64(seg_addr);
2014                 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
2015                 txd->read.olinfo_status = htole32(olinfo_status);
2016                 last = i;
2017                 if (++i == adapter->num_tx_desc)
2018                         i = 0;
2019                 tx_buffer->m_head = NULL;
2020                 tx_buffer->next_eop = -1;
2021         }
2022
2023         txr->next_avail_desc = i;
2024         txr->tx_avail -= nsegs;
2025         tx_buffer->m_head = m_head;
2026
2027         /*
2028         ** Here we swap the map so the last descriptor,
2029         ** which gets the completion interrupt has the
2030         ** real map, and the first descriptor gets the
2031         ** unused map from this descriptor.
2032         */
2033         tx_buffer_mapped->map = tx_buffer->map;
2034         tx_buffer->map = map;
2035         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2036
2037         /*
2038          * Last Descriptor of Packet
2039          * needs End Of Packet (EOP)
2040          * and Report Status (RS)
2041          */
2042         txd->read.cmd_type_len |=
2043             htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
2044         /*
2045          * Keep track in the first buffer which
2046          * descriptor will be written back
2047          */
2048         tx_buffer = &txr->tx_buffers[first];
2049         tx_buffer->next_eop = last;
2050         /* Update the watchdog time early and often */
2051         txr->watchdog_time = ticks;
2052
2053         /*
2054          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2055          * that this frame is available to transmit.
2056          */
2057         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2058             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2059         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2060         ++txr->tx_packets;
2061
2062         return (0);
2063 }
2064 static void
2065 igb_set_promisc(struct adapter *adapter)
2066 {
2067         struct ifnet    *ifp = adapter->ifp;
2068         struct e1000_hw *hw = &adapter->hw;
2069         u32             reg;
2070
2071         if (adapter->vf_ifp) {
2072                 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
2073                 return;
2074         }
2075
2076         reg = E1000_READ_REG(hw, E1000_RCTL);
2077         if (ifp->if_flags & IFF_PROMISC) {
2078                 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2079                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
2080         } else if (ifp->if_flags & IFF_ALLMULTI) {
2081                 reg |= E1000_RCTL_MPE;
2082                 reg &= ~E1000_RCTL_UPE;
2083                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
2084         }
2085 }
2086
2087 static void
2088 igb_disable_promisc(struct adapter *adapter)
2089 {
2090         struct e1000_hw *hw = &adapter->hw;
2091         u32             reg;
2092
2093         if (adapter->vf_ifp) {
2094                 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2095                 return;
2096         }
2097         reg = E1000_READ_REG(hw, E1000_RCTL);
2098         reg &=  (~E1000_RCTL_UPE);
2099         reg &=  (~E1000_RCTL_MPE);
2100         E1000_WRITE_REG(hw, E1000_RCTL, reg);
2101 }
2102
2103
2104 /*********************************************************************
2105  *  Multicast Update
2106  *
2107  *  This routine is called whenever multicast address list is updated.
2108  *
2109  **********************************************************************/
2110
2111 static void
2112 igb_set_multi(struct adapter *adapter)
2113 {
2114         struct ifnet    *ifp = adapter->ifp;
2115         struct ifmultiaddr *ifma;
2116         u32 reg_rctl = 0;
2117         u8  *mta;
2118
2119         int mcnt = 0;
2120
2121         IOCTL_DEBUGOUT("igb_set_multi: begin");
2122
2123         mta = adapter->mta;
2124         bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2125             MAX_NUM_MULTICAST_ADDRESSES);
2126
2127 #if __FreeBSD_version < 800000
2128         IF_ADDR_LOCK(ifp);
2129 #else
2130         if_maddr_rlock(ifp);
2131 #endif
2132         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2133                 if (ifma->ifma_addr->sa_family != AF_LINK)
2134                         continue;
2135
2136                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2137                         break;
2138
2139                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2140                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2141                 mcnt++;
2142         }
2143 #if __FreeBSD_version < 800000
2144         IF_ADDR_UNLOCK(ifp);
2145 #else
2146         if_maddr_runlock(ifp);
2147 #endif
2148
2149         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2150                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2151                 reg_rctl |= E1000_RCTL_MPE;
2152                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2153         } else
2154                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2155 }
2156
2157
2158 /*********************************************************************
2159  *  Timer routine:
2160  *      This routine checks for link status,
2161  *      updates statistics, and does the watchdog.
2162  *
2163  **********************************************************************/
2164
2165 static void
2166 igb_local_timer(void *arg)
2167 {
2168         struct adapter          *adapter = arg;
2169         device_t                dev = adapter->dev;
2170         struct ifnet            *ifp = adapter->ifp;
2171         struct tx_ring          *txr = adapter->tx_rings;
2172         struct igb_queue        *que = adapter->queues;
2173         int                     hung = 0, busy = 0;
2174
2175
2176         IGB_CORE_LOCK_ASSERT(adapter);
2177
2178         igb_update_link_status(adapter);
2179         igb_update_stats_counters(adapter);
2180
2181         /*
2182         ** Check the TX queues status
2183         **      - central locked handling of OACTIVE
2184         **      - watchdog only if all queues show hung
2185         */
2186         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2187                 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2188                     (adapter->pause_frames == 0))
2189                         ++hung;
2190                 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2191                         ++busy;
2192                 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2193                         taskqueue_enqueue(que->tq, &que->que_task);
2194         }
2195         if (hung == adapter->num_queues)
2196                 goto timeout;
2197         if (busy == adapter->num_queues)
2198                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2199         else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2200             (busy < adapter->num_queues))
2201                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2202
2203         adapter->pause_frames = 0;
2204         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2205 #ifndef DEVICE_POLLING
2206         /* Schedule all queue interrupts - deadlock protection */
2207         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2208 #endif
2209         return;
2210
2211 timeout:
2212         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2213         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2214             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2215             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2216         device_printf(dev,"TX(%d) desc avail = %d,"
2217             "Next TX to Clean = %d\n",
2218             txr->me, txr->tx_avail, txr->next_to_clean);
2219         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2220         adapter->watchdog_events++;
2221         igb_init_locked(adapter);
2222 }
2223
2224 static void
2225 igb_update_link_status(struct adapter *adapter)
2226 {
2227         struct e1000_hw *hw = &adapter->hw;
2228         struct ifnet *ifp = adapter->ifp;
2229         device_t dev = adapter->dev;
2230         struct tx_ring *txr = adapter->tx_rings;
2231         u32 link_check, thstat, ctrl;
2232
2233         link_check = thstat = ctrl = 0;
2234
2235         /* Get the cached link value or read for real */
2236         switch (hw->phy.media_type) {
2237         case e1000_media_type_copper:
2238                 if (hw->mac.get_link_status) {
2239                         /* Do the work to read phy */
2240                         e1000_check_for_link(hw);
2241                         link_check = !hw->mac.get_link_status;
2242                 } else
2243                         link_check = TRUE;
2244                 break;
2245         case e1000_media_type_fiber:
2246                 e1000_check_for_link(hw);
2247                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2248                                  E1000_STATUS_LU);
2249                 break;
2250         case e1000_media_type_internal_serdes:
2251                 e1000_check_for_link(hw);
2252                 link_check = adapter->hw.mac.serdes_has_link;
2253                 break;
2254         /* VF device is type_unknown */
2255         case e1000_media_type_unknown:
2256                 e1000_check_for_link(hw);
2257                 link_check = !hw->mac.get_link_status;
2258                 /* Fall thru */
2259         default:
2260                 break;
2261         }
2262
2263         /* Check for thermal downshift or shutdown */
2264         if (hw->mac.type == e1000_i350) {
2265                 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2266                 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2267         }
2268
2269         /* Now we check if a transition has happened */
2270         if (link_check && (adapter->link_active == 0)) {
2271                 e1000_get_speed_and_duplex(&adapter->hw, 
2272                     &adapter->link_speed, &adapter->link_duplex);
2273                 if (bootverbose)
2274                         device_printf(dev, "Link is up %d Mbps %s\n",
2275                             adapter->link_speed,
2276                             ((adapter->link_duplex == FULL_DUPLEX) ?
2277                             "Full Duplex" : "Half Duplex"));
2278                 adapter->link_active = 1;
2279                 ifp->if_baudrate = adapter->link_speed * 1000000;
2280                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2281                     (thstat & E1000_THSTAT_LINK_THROTTLE))
2282                         device_printf(dev, "Link: thermal downshift\n");
2283                 /* This can sleep */
2284                 if_link_state_change(ifp, LINK_STATE_UP);
2285         } else if (!link_check && (adapter->link_active == 1)) {
2286                 ifp->if_baudrate = adapter->link_speed = 0;
2287                 adapter->link_duplex = 0;
2288                 if (bootverbose)
2289                         device_printf(dev, "Link is Down\n");
2290                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2291                     (thstat & E1000_THSTAT_PWR_DOWN))
2292                         device_printf(dev, "Link: thermal shutdown\n");
2293                 adapter->link_active = 0;
2294                 /* This can sleep */
2295                 if_link_state_change(ifp, LINK_STATE_DOWN);
2296                 /* Reset queue state */
2297                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2298                         txr->queue_status = IGB_QUEUE_IDLE;
2299         }
2300 }
2301
2302 /*********************************************************************
2303  *
2304  *  This routine disables all traffic on the adapter by issuing a
2305  *  global reset on the MAC and deallocates TX/RX buffers.
2306  *
2307  **********************************************************************/
2308
2309 static void
2310 igb_stop(void *arg)
2311 {
2312         struct adapter  *adapter = arg;
2313         struct ifnet    *ifp = adapter->ifp;
2314         struct tx_ring *txr = adapter->tx_rings;
2315
2316         IGB_CORE_LOCK_ASSERT(adapter);
2317
2318         INIT_DEBUGOUT("igb_stop: begin");
2319
2320         igb_disable_intr(adapter);
2321
2322         callout_stop(&adapter->timer);
2323
2324         /* Tell the stack that the interface is no longer active */
2325         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2326         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2327
2328         /* Disarm watchdog timer. */
2329         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2330                 IGB_TX_LOCK(txr);
2331                 txr->queue_status = IGB_QUEUE_IDLE;
2332                 IGB_TX_UNLOCK(txr);
2333         }
2334
2335         e1000_reset_hw(&adapter->hw);
2336         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2337
2338         e1000_led_off(&adapter->hw);
2339         e1000_cleanup_led(&adapter->hw);
2340 }
2341
2342
2343 /*********************************************************************
2344  *
2345  *  Determine hardware revision.
2346  *
2347  **********************************************************************/
2348 static void
2349 igb_identify_hardware(struct adapter *adapter)
2350 {
2351         device_t dev = adapter->dev;
2352
2353         /* Make sure our PCI config space has the necessary stuff set */
2354         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2355         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2356             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2357                 INIT_DEBUGOUT("Memory Access and/or Bus Master "
2358                     "bits were not set!\n");
2359                 adapter->hw.bus.pci_cmd_word |=
2360                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2361                 pci_write_config(dev, PCIR_COMMAND,
2362                     adapter->hw.bus.pci_cmd_word, 2);
2363         }
2364
2365         /* Save off the information about this board */
2366         adapter->hw.vendor_id = pci_get_vendor(dev);
2367         adapter->hw.device_id = pci_get_device(dev);
2368         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2369         adapter->hw.subsystem_vendor_id =
2370             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2371         adapter->hw.subsystem_device_id =
2372             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2373
2374         /* Set MAC type early for PCI setup */
2375         e1000_set_mac_type(&adapter->hw);
2376
2377         /* Are we a VF device? */
2378         if ((adapter->hw.mac.type == e1000_vfadapt) ||
2379             (adapter->hw.mac.type == e1000_vfadapt_i350))
2380                 adapter->vf_ifp = 1;
2381         else
2382                 adapter->vf_ifp = 0;
2383 }
2384
2385 static int
2386 igb_allocate_pci_resources(struct adapter *adapter)
2387 {
2388         device_t        dev = adapter->dev;
2389         int             rid;
2390
2391         rid = PCIR_BAR(0);
2392         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2393             &rid, RF_ACTIVE);
2394         if (adapter->pci_mem == NULL) {
2395                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2396                 return (ENXIO);
2397         }
2398         adapter->osdep.mem_bus_space_tag =
2399             rman_get_bustag(adapter->pci_mem);
2400         adapter->osdep.mem_bus_space_handle =
2401             rman_get_bushandle(adapter->pci_mem);
2402         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2403
2404         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2405
2406         /* This will setup either MSI/X or MSI */
2407         adapter->msix = igb_setup_msix(adapter);
2408         adapter->hw.back = &adapter->osdep;
2409
2410         return (0);
2411 }
2412
2413 /*********************************************************************
2414  *
2415  *  Setup the Legacy or MSI Interrupt handler
2416  *
2417  **********************************************************************/
2418 static int
2419 igb_allocate_legacy(struct adapter *adapter)
2420 {
2421         device_t                dev = adapter->dev;
2422         struct igb_queue        *que = adapter->queues;
2423         struct tx_ring          *txr = adapter->tx_rings;
2424         int                     error, rid = 0;
2425
2426         /* Turn off all interrupts */
2427         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2428
2429         /* MSI RID is 1 */
2430         if (adapter->msix == 1)
2431                 rid = 1;
2432
2433         /* We allocate a single interrupt resource */
2434         adapter->res = bus_alloc_resource_any(dev,
2435             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2436         if (adapter->res == NULL) {
2437                 device_printf(dev, "Unable to allocate bus resource: "
2438                     "interrupt\n");
2439                 return (ENXIO);
2440         }
2441
2442 #if __FreeBSD_version >= 800000
2443         TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2444 #endif
2445
2446         /*
2447          * Try allocating a fast interrupt and the associated deferred
2448          * processing contexts.
2449          */
2450         TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2451         /* Make tasklet for deferred link handling */
2452         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2453         que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2454             taskqueue_thread_enqueue, &que->tq);
2455         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2456             device_get_nameunit(adapter->dev));
2457         if ((error = bus_setup_intr(dev, adapter->res,
2458             INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2459             adapter, &adapter->tag)) != 0) {
2460                 device_printf(dev, "Failed to register fast interrupt "
2461                             "handler: %d\n", error);
2462                 taskqueue_free(que->tq);
2463                 que->tq = NULL;
2464                 return (error);
2465         }
2466
2467         return (0);
2468 }
2469
2470
2471 /*********************************************************************
2472  *
2473  *  Setup the MSIX Queue Interrupt handlers: 
2474  *
2475  **********************************************************************/
2476 static int
2477 igb_allocate_msix(struct adapter *adapter)
2478 {
2479         device_t                dev = adapter->dev;
2480         struct igb_queue        *que = adapter->queues;
2481         int                     error, rid, vector = 0;
2482
2483         /* Be sure to start with all interrupts disabled */
2484         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2485         E1000_WRITE_FLUSH(&adapter->hw);
2486
2487         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2488                 rid = vector +1;
2489                 que->res = bus_alloc_resource_any(dev,
2490                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2491                 if (que->res == NULL) {
2492                         device_printf(dev,
2493                             "Unable to allocate bus resource: "
2494                             "MSIX Queue Interrupt\n");
2495                         return (ENXIO);
2496                 }
2497                 error = bus_setup_intr(dev, que->res,
2498                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2499                     igb_msix_que, que, &que->tag);
2500                 if (error) {
2501                         que->res = NULL;
2502                         device_printf(dev, "Failed to register Queue handler");
2503                         return (error);
2504                 }
2505 #if __FreeBSD_version >= 800504
2506                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2507 #endif
2508                 que->msix = vector;
2509                 if (adapter->hw.mac.type == e1000_82575)
2510                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2511                 else
2512                         que->eims = 1 << vector;
2513                 /*
2514                 ** Bind the msix vector, and thus the
2515                 ** rings to the corresponding cpu.
2516                 */
2517                 if (adapter->num_queues > 1) {
2518                         if (igb_last_bind_cpu < 0)
2519                                 igb_last_bind_cpu = CPU_FIRST();
2520                         bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2521                         device_printf(dev,
2522                                 "Bound queue %d to cpu %d\n",
2523                                 i,igb_last_bind_cpu);
2524                         igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2525                         igb_last_bind_cpu = igb_last_bind_cpu % mp_ncpus;
2526                 }
2527 #if __FreeBSD_version >= 800000
2528                 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2529                     que->txr);
2530 #endif
2531                 /* Make tasklet for deferred handling */
2532                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2533                 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2534                     taskqueue_thread_enqueue, &que->tq);
2535                 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2536                     device_get_nameunit(adapter->dev));
2537         }
2538
2539         /* And Link */
2540         rid = vector + 1;
2541         adapter->res = bus_alloc_resource_any(dev,
2542             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2543         if (adapter->res == NULL) {
2544                 device_printf(dev,
2545                     "Unable to allocate bus resource: "
2546                     "MSIX Link Interrupt\n");
2547                 return (ENXIO);
2548         }
2549         if ((error = bus_setup_intr(dev, adapter->res,
2550             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2551             igb_msix_link, adapter, &adapter->tag)) != 0) {
2552                 device_printf(dev, "Failed to register Link handler");
2553                 return (error);
2554         }
2555 #if __FreeBSD_version >= 800504
2556         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2557 #endif
2558         adapter->linkvec = vector;
2559
2560         return (0);
2561 }
2562
2563
2564 static void
2565 igb_configure_queues(struct adapter *adapter)
2566 {
2567         struct  e1000_hw        *hw = &adapter->hw;
2568         struct  igb_queue       *que;
2569         u32                     tmp, ivar = 0, newitr = 0;
2570
2571         /* First turn on RSS capability */
2572         if (adapter->hw.mac.type != e1000_82575)
2573                 E1000_WRITE_REG(hw, E1000_GPIE,
2574                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2575                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2576
2577         /* Turn on MSIX */
2578         switch (adapter->hw.mac.type) {
2579         case e1000_82580:
2580         case e1000_i350:
2581         case e1000_i210:
2582         case e1000_i211:
2583         case e1000_vfadapt:
2584         case e1000_vfadapt_i350:
2585                 /* RX entries */
2586                 for (int i = 0; i < adapter->num_queues; i++) {
2587                         u32 index = i >> 1;
2588                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2589                         que = &adapter->queues[i];
2590                         if (i & 1) {
2591                                 ivar &= 0xFF00FFFF;
2592                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2593                         } else {
2594                                 ivar &= 0xFFFFFF00;
2595                                 ivar |= que->msix | E1000_IVAR_VALID;
2596                         }
2597                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2598                 }
2599                 /* TX entries */
2600                 for (int i = 0; i < adapter->num_queues; i++) {
2601                         u32 index = i >> 1;
2602                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2603                         que = &adapter->queues[i];
2604                         if (i & 1) {
2605                                 ivar &= 0x00FFFFFF;
2606                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2607                         } else {
2608                                 ivar &= 0xFFFF00FF;
2609                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2610                         }
2611                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2612                         adapter->que_mask |= que->eims;
2613                 }
2614
2615                 /* And for the link interrupt */
2616                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2617                 adapter->link_mask = 1 << adapter->linkvec;
2618                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2619                 break;
2620         case e1000_82576:
2621                 /* RX entries */
2622                 for (int i = 0; i < adapter->num_queues; i++) {
2623                         u32 index = i & 0x7; /* Each IVAR has two entries */
2624                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2625                         que = &adapter->queues[i];
2626                         if (i < 8) {
2627                                 ivar &= 0xFFFFFF00;
2628                                 ivar |= que->msix | E1000_IVAR_VALID;
2629                         } else {
2630                                 ivar &= 0xFF00FFFF;
2631                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2632                         }
2633                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2634                         adapter->que_mask |= que->eims;
2635                 }
2636                 /* TX entries */
2637                 for (int i = 0; i < adapter->num_queues; i++) {
2638                         u32 index = i & 0x7; /* Each IVAR has two entries */
2639                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2640                         que = &adapter->queues[i];
2641                         if (i < 8) {
2642                                 ivar &= 0xFFFF00FF;
2643                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2644                         } else {
2645                                 ivar &= 0x00FFFFFF;
2646                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2647                         }
2648                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2649                         adapter->que_mask |= que->eims;
2650                 }
2651
2652                 /* And for the link interrupt */
2653                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2654                 adapter->link_mask = 1 << adapter->linkvec;
2655                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2656                 break;
2657
2658         case e1000_82575:
2659                 /* enable MSI-X support*/
2660                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2661                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2662                 /* Auto-Mask interrupts upon ICR read. */
2663                 tmp |= E1000_CTRL_EXT_EIAME;
2664                 tmp |= E1000_CTRL_EXT_IRCA;
2665                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2666
2667                 /* Queues */
2668                 for (int i = 0; i < adapter->num_queues; i++) {
2669                         que = &adapter->queues[i];
2670                         tmp = E1000_EICR_RX_QUEUE0 << i;
2671                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2672                         que->eims = tmp;
2673                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2674                             i, que->eims);
2675                         adapter->que_mask |= que->eims;
2676                 }
2677
2678                 /* Link */
2679                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2680                     E1000_EIMS_OTHER);
2681                 adapter->link_mask |= E1000_EIMS_OTHER;
2682         default:
2683                 break;
2684         }
2685
2686         /* Set the starting interrupt rate */
2687         if (igb_max_interrupt_rate > 0)
2688                 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2689
2690         if (hw->mac.type == e1000_82575)
2691                 newitr |= newitr << 16;
2692         else
2693                 newitr |= E1000_EITR_CNT_IGNR;
2694
2695         for (int i = 0; i < adapter->num_queues; i++) {
2696                 que = &adapter->queues[i];
2697                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2698         }
2699
2700         return;
2701 }
2702
2703
2704 static void
2705 igb_free_pci_resources(struct adapter *adapter)
2706 {
2707         struct          igb_queue *que = adapter->queues;
2708         device_t        dev = adapter->dev;
2709         int             rid;
2710
2711         /*
2712         ** There is a slight possibility of a failure mode
2713         ** in attach that will result in entering this function
2714         ** before interrupt resources have been initialized, and
2715         ** in that case we do not want to execute the loops below
2716         ** We can detect this reliably by the state of the adapter
2717         ** res pointer.
2718         */
2719         if (adapter->res == NULL)
2720                 goto mem;
2721
2722         /*
2723          * First release all the interrupt resources:
2724          */
2725         for (int i = 0; i < adapter->num_queues; i++, que++) {
2726                 rid = que->msix + 1;
2727                 if (que->tag != NULL) {
2728                         bus_teardown_intr(dev, que->res, que->tag);
2729                         que->tag = NULL;
2730                 }
2731                 if (que->res != NULL)
2732                         bus_release_resource(dev,
2733                             SYS_RES_IRQ, rid, que->res);
2734         }
2735
2736         /* Clean the Legacy or Link interrupt last */
2737         if (adapter->linkvec) /* we are doing MSIX */
2738                 rid = adapter->linkvec + 1;
2739         else
2740                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2741
2742         que = adapter->queues;
2743         if (adapter->tag != NULL) {
2744                 taskqueue_drain(que->tq, &adapter->link_task);
2745                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2746                 adapter->tag = NULL;
2747         }
2748         if (adapter->res != NULL)
2749                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2750
2751         for (int i = 0; i < adapter->num_queues; i++, que++) {
2752                 if (que->tq != NULL) {
2753 #if __FreeBSD_version >= 800000
2754                         taskqueue_drain(que->tq, &que->txr->txq_task);
2755 #endif
2756                         taskqueue_drain(que->tq, &que->que_task);
2757                         taskqueue_free(que->tq);
2758                 }
2759         }
2760 mem:
2761         if (adapter->msix)
2762                 pci_release_msi(dev);
2763
2764         if (adapter->msix_mem != NULL)
2765                 bus_release_resource(dev, SYS_RES_MEMORY,
2766                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2767
2768         if (adapter->pci_mem != NULL)
2769                 bus_release_resource(dev, SYS_RES_MEMORY,
2770                     PCIR_BAR(0), adapter->pci_mem);
2771
2772 }
2773
2774 /*
2775  * Setup Either MSI/X or MSI
2776  */
2777 static int
2778 igb_setup_msix(struct adapter *adapter)
2779 {
2780         device_t dev = adapter->dev;
2781         int rid, want, queues, msgs, maxqueues;
2782
2783         /* tuneable override */
2784         if (igb_enable_msix == 0)
2785                 goto msi;
2786
2787         /* First try MSI/X */
2788         rid = PCIR_BAR(IGB_MSIX_BAR);
2789         adapter->msix_mem = bus_alloc_resource_any(dev,
2790             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2791         if (!adapter->msix_mem) {
2792                 /* May not be enabled */
2793                 device_printf(adapter->dev,
2794                     "Unable to map MSIX table \n");
2795                 goto msi;
2796         }
2797
2798         msgs = pci_msix_count(dev); 
2799         if (msgs == 0) { /* system has msix disabled */
2800                 bus_release_resource(dev, SYS_RES_MEMORY,
2801                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2802                 adapter->msix_mem = NULL;
2803                 goto msi;
2804         }
2805
2806         /* Figure out a reasonable auto config value */
2807         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2808
2809         /* Manual override */
2810         if (igb_num_queues != 0)
2811                 queues = igb_num_queues;
2812
2813         /* Sanity check based on HW */
2814         switch (adapter->hw.mac.type) {
2815                 case e1000_82575:
2816                         maxqueues = 4;
2817                         break;
2818                 case e1000_82576:
2819                 case e1000_82580:
2820                 case e1000_i350:
2821                         maxqueues = 8;
2822                         break;
2823                 case e1000_i210:
2824                         maxqueues = 4;
2825                         break;
2826                 case e1000_i211:
2827                         maxqueues = 2;
2828                         break;
2829                 default:  /* VF interfaces */
2830                         maxqueues = 1;
2831                         break;
2832         }
2833         if (queues > maxqueues)
2834                 queues = maxqueues;
2835
2836         /*
2837         ** One vector (RX/TX pair) per queue
2838         ** plus an additional for Link interrupt
2839         */
2840         want = queues + 1;
2841         if (msgs >= want)
2842                 msgs = want;
2843         else {
2844                 device_printf(adapter->dev,
2845                     "MSIX Configuration Problem, "
2846                     "%d vectors configured, but %d queues wanted!\n",
2847                     msgs, want);
2848                 return (0);
2849         }
2850         if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2851                 device_printf(adapter->dev,
2852                     "Using MSIX interrupts with %d vectors\n", msgs);
2853                 adapter->num_queues = queues;
2854                 return (msgs);
2855         }
2856 msi:
2857         msgs = pci_msi_count(dev);
2858         if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0) {
2859                 device_printf(adapter->dev," Using MSI interrupt\n");
2860                 return (msgs);
2861         }
2862         return (0);
2863 }
2864
2865 /*********************************************************************
2866  *
2867  *  Set up an fresh starting state
2868  *
2869  **********************************************************************/
2870 static void
2871 igb_reset(struct adapter *adapter)
2872 {
2873         device_t        dev = adapter->dev;
2874         struct e1000_hw *hw = &adapter->hw;
2875         struct e1000_fc_info *fc = &hw->fc;
2876         struct ifnet    *ifp = adapter->ifp;
2877         u32             pba = 0;
2878         u16             hwm;
2879
2880         INIT_DEBUGOUT("igb_reset: begin");
2881
2882         /* Let the firmware know the OS is in control */
2883         igb_get_hw_control(adapter);
2884
2885         /*
2886          * Packet Buffer Allocation (PBA)
2887          * Writing PBA sets the receive portion of the buffer
2888          * the remainder is used for the transmit buffer.
2889          */
2890         switch (hw->mac.type) {
2891         case e1000_82575:
2892                 pba = E1000_PBA_32K;
2893                 break;
2894         case e1000_82576:
2895         case e1000_vfadapt:
2896                 pba = E1000_READ_REG(hw, E1000_RXPBS);
2897                 pba &= E1000_RXPBS_SIZE_MASK_82576;
2898                 break;
2899         case e1000_82580:
2900         case e1000_i350:
2901         case e1000_vfadapt_i350:
2902                 pba = E1000_READ_REG(hw, E1000_RXPBS);
2903                 pba = e1000_rxpbs_adjust_82580(pba);
2904                 break;
2905         case e1000_i210:
2906         case e1000_i211:
2907                 pba = E1000_PBA_34K;
2908         default:
2909                 break;
2910         }
2911
2912         /* Special needs in case of Jumbo frames */
2913         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2914                 u32 tx_space, min_tx, min_rx;
2915                 pba = E1000_READ_REG(hw, E1000_PBA);
2916                 tx_space = pba >> 16;
2917                 pba &= 0xffff;
2918                 min_tx = (adapter->max_frame_size +
2919                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2920                 min_tx = roundup2(min_tx, 1024);
2921                 min_tx >>= 10;
2922                 min_rx = adapter->max_frame_size;
2923                 min_rx = roundup2(min_rx, 1024);
2924                 min_rx >>= 10;
2925                 if (tx_space < min_tx &&
2926                     ((min_tx - tx_space) < pba)) {
2927                         pba = pba - (min_tx - tx_space);
2928                         /*
2929                          * if short on rx space, rx wins
2930                          * and must trump tx adjustment
2931                          */
2932                         if (pba < min_rx)
2933                                 pba = min_rx;
2934                 }
2935                 E1000_WRITE_REG(hw, E1000_PBA, pba);
2936         }
2937
2938         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2939
2940         /*
2941          * These parameters control the automatic generation (Tx) and
2942          * response (Rx) to Ethernet PAUSE frames.
2943          * - High water mark should allow for at least two frames to be
2944          *   received after sending an XOFF.
2945          * - Low water mark works best when it is very near the high water mark.
2946          *   This allows the receiver to restart by sending XON when it has
2947          *   drained a bit.
2948          */
2949         hwm = min(((pba << 10) * 9 / 10),
2950             ((pba << 10) - 2 * adapter->max_frame_size));
2951
2952         if (hw->mac.type < e1000_82576) {
2953                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2954                 fc->low_water = fc->high_water - 8;
2955         } else {
2956                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2957                 fc->low_water = fc->high_water - 16;
2958         }
2959
2960         fc->pause_time = IGB_FC_PAUSE_TIME;
2961         fc->send_xon = TRUE;
2962         if (adapter->fc)
2963                 fc->requested_mode = adapter->fc;
2964         else
2965                 fc->requested_mode = e1000_fc_default;
2966
2967         /* Issue a global reset */
2968         e1000_reset_hw(hw);
2969         E1000_WRITE_REG(hw, E1000_WUC, 0);
2970
2971         if (e1000_init_hw(hw) < 0)
2972                 device_printf(dev, "Hardware Initialization Failed\n");
2973
2974         /* Setup DMA Coalescing */
2975         if ((hw->mac.type > e1000_82580) &&
2976             (hw->mac.type != e1000_i211)) {
2977                 u32 dmac;
2978                 u32 reg = ~E1000_DMACR_DMAC_EN;
2979
2980                 if (adapter->dmac == 0) { /* Disabling it */
2981                         E1000_WRITE_REG(hw, E1000_DMACR, reg);
2982                         goto reset_out;
2983                 }
2984
2985                 /* Set starting thresholds */
2986                 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2987                 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2988
2989                 hwm = 64 * pba - adapter->max_frame_size / 16;
2990                 if (hwm < 64 * (pba - 6))
2991                         hwm = 64 * (pba - 6);
2992                 reg = E1000_READ_REG(hw, E1000_FCRTC);
2993                 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2994                 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2995                     & E1000_FCRTC_RTH_COAL_MASK);
2996                 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2997
2998
2999                 dmac = pba - adapter->max_frame_size / 512;
3000                 if (dmac < pba - 10)
3001                         dmac = pba - 10;
3002                 reg = E1000_READ_REG(hw, E1000_DMACR);
3003                 reg &= ~E1000_DMACR_DMACTHR_MASK;
3004                 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
3005                     & E1000_DMACR_DMACTHR_MASK);
3006                 /* transition to L0x or L1 if available..*/
3007                 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
3008                 /* timer = value in adapter->dmac in 32usec intervals */
3009                 reg |= (adapter->dmac >> 5);
3010                 E1000_WRITE_REG(hw, E1000_DMACR, reg);
3011
3012                 /* Set the interval before transition */
3013                 reg = E1000_READ_REG(hw, E1000_DMCTLX);
3014                 reg |= 0x80000004;
3015                 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3016
3017                 /* free space in tx packet buffer to wake from DMA coal */
3018                 E1000_WRITE_REG(hw, E1000_DMCTXTH,
3019                     (20480 - (2 * adapter->max_frame_size)) >> 6);
3020
3021                 /* make low power state decision controlled by DMA coal */
3022                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3023                 reg &= ~E1000_PCIEMISC_LX_DECISION;
3024                 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3025                 device_printf(dev, "DMA Coalescing enabled\n");
3026
3027         } else if (hw->mac.type == e1000_82580) {
3028                 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3029                 E1000_WRITE_REG(hw, E1000_DMACR, 0);
3030                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
3031                     reg & ~E1000_PCIEMISC_LX_DECISION);
3032         }
3033
3034 reset_out:
3035         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3036         e1000_get_phy_info(hw);
3037         e1000_check_for_link(hw);
3038         return;
3039 }
3040
3041 /*********************************************************************
3042  *
3043  *  Setup networking device structure and register an interface.
3044  *
3045  **********************************************************************/
3046 static int
3047 igb_setup_interface(device_t dev, struct adapter *adapter)
3048 {
3049         struct ifnet   *ifp;
3050
3051         INIT_DEBUGOUT("igb_setup_interface: begin");
3052
3053         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3054         if (ifp == NULL) {
3055                 device_printf(dev, "can not allocate ifnet structure\n");
3056                 return (-1);
3057         }
3058         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3059         ifp->if_init =  igb_init;
3060         ifp->if_softc = adapter;
3061         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3062         ifp->if_ioctl = igb_ioctl;
3063 #if __FreeBSD_version >= 800000
3064         ifp->if_transmit = igb_mq_start;
3065         ifp->if_qflush = igb_qflush;
3066 #else
3067         ifp->if_start = igb_start;
3068         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3069         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3070         IFQ_SET_READY(&ifp->if_snd);
3071 #endif
3072
3073         ether_ifattach(ifp, adapter->hw.mac.addr);
3074
3075         ifp->if_capabilities = ifp->if_capenable = 0;
3076
3077         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3078         ifp->if_capabilities |= IFCAP_TSO4;
3079         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3080         ifp->if_capenable = ifp->if_capabilities;
3081
3082         /* Don't enable LRO by default */
3083         ifp->if_capabilities |= IFCAP_LRO;
3084
3085 #ifdef DEVICE_POLLING
3086         ifp->if_capabilities |= IFCAP_POLLING;
3087 #endif
3088
3089         /*
3090          * Tell the upper layer(s) we
3091          * support full VLAN capability.
3092          */
3093         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3094         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3095                              |  IFCAP_VLAN_HWTSO
3096                              |  IFCAP_VLAN_MTU;
3097         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3098                           |  IFCAP_VLAN_HWTSO
3099                           |  IFCAP_VLAN_MTU;
3100
3101         /*
3102         ** Don't turn this on by default, if vlans are
3103         ** created on another pseudo device (eg. lagg)
3104         ** then vlan events are not passed thru, breaking
3105         ** operation, but with HW FILTER off it works. If
3106         ** using vlans directly on the igb driver you can
3107         ** enable this and get full hardware tag filtering.
3108         */
3109         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3110
3111         /*
3112          * Specify the media types supported by this adapter and register
3113          * callbacks to update media and link information
3114          */
3115         ifmedia_init(&adapter->media, IFM_IMASK,
3116             igb_media_change, igb_media_status);
3117         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3118             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3119                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
3120                             0, NULL);
3121                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3122         } else {
3123                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3124                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3125                             0, NULL);
3126                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3127                             0, NULL);
3128                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3129                             0, NULL);
3130                 if (adapter->hw.phy.type != e1000_phy_ife) {
3131                         ifmedia_add(&adapter->media,
3132                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3133                         ifmedia_add(&adapter->media,
3134                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3135                 }
3136         }
3137         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3138         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3139         return (0);
3140 }
3141
3142
3143 /*
3144  * Manage DMA'able memory.
3145  */
3146 static void
3147 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3148 {
3149         if (error)
3150                 return;
3151         *(bus_addr_t *) arg = segs[0].ds_addr;
3152 }
3153
3154 static int
3155 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3156         struct igb_dma_alloc *dma, int mapflags)
3157 {
3158         int error;
3159
3160         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3161                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
3162                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3163                                 BUS_SPACE_MAXADDR,      /* highaddr */
3164                                 NULL, NULL,             /* filter, filterarg */
3165                                 size,                   /* maxsize */
3166                                 1,                      /* nsegments */
3167                                 size,                   /* maxsegsize */
3168                                 0,                      /* flags */
3169                                 NULL,                   /* lockfunc */
3170                                 NULL,                   /* lockarg */
3171                                 &dma->dma_tag);
3172         if (error) {
3173                 device_printf(adapter->dev,
3174                     "%s: bus_dma_tag_create failed: %d\n",
3175                     __func__, error);
3176                 goto fail_0;
3177         }
3178
3179         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3180             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3181         if (error) {
3182                 device_printf(adapter->dev,
3183                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3184                     __func__, (uintmax_t)size, error);
3185                 goto fail_2;
3186         }
3187
3188         dma->dma_paddr = 0;
3189         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3190             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3191         if (error || dma->dma_paddr == 0) {
3192                 device_printf(adapter->dev,
3193                     "%s: bus_dmamap_load failed: %d\n",
3194                     __func__, error);
3195                 goto fail_3;
3196         }
3197
3198         return (0);
3199
3200 fail_3:
3201         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3202 fail_2:
3203         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3204         bus_dma_tag_destroy(dma->dma_tag);
3205 fail_0:
3206         dma->dma_map = NULL;
3207         dma->dma_tag = NULL;
3208
3209         return (error);
3210 }
3211
3212 static void
3213 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3214 {
3215         if (dma->dma_tag == NULL)
3216                 return;
3217         if (dma->dma_map != NULL) {
3218                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3219                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3220                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3221                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3222                 dma->dma_map = NULL;
3223         }
3224         bus_dma_tag_destroy(dma->dma_tag);
3225         dma->dma_tag = NULL;
3226 }
3227
3228
3229 /*********************************************************************
3230  *
3231  *  Allocate memory for the transmit and receive rings, and then
3232  *  the descriptors associated with each, called only once at attach.
3233  *
3234  **********************************************************************/
3235 static int
3236 igb_allocate_queues(struct adapter *adapter)
3237 {
3238         device_t dev = adapter->dev;
3239         struct igb_queue        *que = NULL;
3240         struct tx_ring          *txr = NULL;
3241         struct rx_ring          *rxr = NULL;
3242         int rsize, tsize, error = E1000_SUCCESS;
3243         int txconf = 0, rxconf = 0;
3244
3245         /* First allocate the top level queue structs */
3246         if (!(adapter->queues =
3247             (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3248             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3249                 device_printf(dev, "Unable to allocate queue memory\n");
3250                 error = ENOMEM;
3251                 goto fail;
3252         }
3253
3254         /* Next allocate the TX ring struct memory */
3255         if (!(adapter->tx_rings =
3256             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3257             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3258                 device_printf(dev, "Unable to allocate TX ring memory\n");
3259                 error = ENOMEM;
3260                 goto tx_fail;
3261         }
3262
3263         /* Now allocate the RX */
3264         if (!(adapter->rx_rings =
3265             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3266             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3267                 device_printf(dev, "Unable to allocate RX ring memory\n");
3268                 error = ENOMEM;
3269                 goto rx_fail;
3270         }
3271
3272         tsize = roundup2(adapter->num_tx_desc *
3273             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3274         /*
3275          * Now set up the TX queues, txconf is needed to handle the
3276          * possibility that things fail midcourse and we need to
3277          * undo memory gracefully
3278          */ 
3279         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3280                 /* Set up some basics */
3281                 txr = &adapter->tx_rings[i];
3282                 txr->adapter = adapter;
3283                 txr->me = i;
3284
3285                 /* Initialize the TX lock */
3286                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3287                     device_get_nameunit(dev), txr->me);
3288                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3289
3290                 if (igb_dma_malloc(adapter, tsize,
3291                         &txr->txdma, BUS_DMA_NOWAIT)) {
3292                         device_printf(dev,
3293                             "Unable to allocate TX Descriptor memory\n");
3294                         error = ENOMEM;
3295                         goto err_tx_desc;
3296                 }
3297                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3298                 bzero((void *)txr->tx_base, tsize);
3299
3300                 /* Now allocate transmit buffers for the ring */
3301                 if (igb_allocate_transmit_buffers(txr)) {
3302                         device_printf(dev,
3303                             "Critical Failure setting up transmit buffers\n");
3304                         error = ENOMEM;
3305                         goto err_tx_desc;
3306                 }
3307 #if __FreeBSD_version >= 800000
3308                 /* Allocate a buf ring */
3309                 txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
3310                     M_WAITOK, &txr->tx_mtx);
3311 #endif
3312         }
3313
3314         /*
3315          * Next the RX queues...
3316          */ 
3317         rsize = roundup2(adapter->num_rx_desc *
3318             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3319         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3320                 rxr = &adapter->rx_rings[i];
3321                 rxr->adapter = adapter;
3322                 rxr->me = i;
3323
3324                 /* Initialize the RX lock */
3325                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3326                     device_get_nameunit(dev), txr->me);
3327                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3328
3329                 if (igb_dma_malloc(adapter, rsize,
3330                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3331                         device_printf(dev,
3332                             "Unable to allocate RxDescriptor memory\n");
3333                         error = ENOMEM;
3334                         goto err_rx_desc;
3335                 }
3336                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3337                 bzero((void *)rxr->rx_base, rsize);
3338
3339                 /* Allocate receive buffers for the ring*/
3340                 if (igb_allocate_receive_buffers(rxr)) {
3341                         device_printf(dev,
3342                             "Critical Failure setting up receive buffers\n");
3343                         error = ENOMEM;
3344                         goto err_rx_desc;
3345                 }
3346         }
3347
3348         /*
3349         ** Finally set up the queue holding structs
3350         */
3351         for (int i = 0; i < adapter->num_queues; i++) {
3352                 que = &adapter->queues[i];
3353                 que->adapter = adapter;
3354                 que->txr = &adapter->tx_rings[i];
3355                 que->rxr = &adapter->rx_rings[i];
3356         }
3357
3358         return (0);
3359
3360 err_rx_desc:
3361         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3362                 igb_dma_free(adapter, &rxr->rxdma);
3363 err_tx_desc:
3364         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3365                 igb_dma_free(adapter, &txr->txdma);
3366         free(adapter->rx_rings, M_DEVBUF);
3367 rx_fail:
3368 #if __FreeBSD_version >= 800000
3369         buf_ring_free(txr->br, M_DEVBUF);
3370 #endif
3371         free(adapter->tx_rings, M_DEVBUF);
3372 tx_fail:
3373         free(adapter->queues, M_DEVBUF);
3374 fail:
3375         return (error);
3376 }
3377
3378 /*********************************************************************
3379  *
3380  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3381  *  the information needed to transmit a packet on the wire. This is
3382  *  called only once at attach, setup is done every reset.
3383  *
3384  **********************************************************************/
3385 static int
3386 igb_allocate_transmit_buffers(struct tx_ring *txr)
3387 {
3388         struct adapter *adapter = txr->adapter;
3389         device_t dev = adapter->dev;
3390         struct igb_tx_buffer *txbuf;
3391         int error, i;
3392
3393         /*
3394          * Setup DMA descriptor areas.
3395          */
3396         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3397                                1, 0,                    /* alignment, bounds */
3398                                BUS_SPACE_MAXADDR,       /* lowaddr */
3399                                BUS_SPACE_MAXADDR,       /* highaddr */
3400                                NULL, NULL,              /* filter, filterarg */
3401                                IGB_TSO_SIZE,            /* maxsize */
3402                                IGB_MAX_SCATTER,         /* nsegments */
3403                                PAGE_SIZE,               /* maxsegsize */
3404                                0,                       /* flags */
3405                                NULL,                    /* lockfunc */
3406                                NULL,                    /* lockfuncarg */
3407                                &txr->txtag))) {
3408                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3409                 goto fail;
3410         }
3411
3412         if (!(txr->tx_buffers =
3413             (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3414             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3415                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3416                 error = ENOMEM;
3417                 goto fail;
3418         }
3419
3420         /* Create the descriptor buffer dma maps */
3421         txbuf = txr->tx_buffers;
3422         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3423                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3424                 if (error != 0) {
3425                         device_printf(dev, "Unable to create TX DMA map\n");
3426                         goto fail;
3427                 }
3428         }
3429
3430         return 0;
3431 fail:
3432         /* We free all, it handles case where we are in the middle */
3433         igb_free_transmit_structures(adapter);
3434         return (error);
3435 }
3436
3437 /*********************************************************************
3438  *
3439  *  Initialize a transmit ring.
3440  *
3441  **********************************************************************/
3442 static void
3443 igb_setup_transmit_ring(struct tx_ring *txr)
3444 {
3445         struct adapter *adapter = txr->adapter;
3446         struct igb_tx_buffer *txbuf;
3447         int i;
3448 #ifdef DEV_NETMAP
3449         struct netmap_adapter *na = NA(adapter->ifp);
3450         struct netmap_slot *slot;
3451 #endif /* DEV_NETMAP */
3452
3453         /* Clear the old descriptor contents */
3454         IGB_TX_LOCK(txr);
3455 #ifdef DEV_NETMAP
3456         slot = netmap_reset(na, NR_TX, txr->me, 0);
3457 #endif /* DEV_NETMAP */
3458         bzero((void *)txr->tx_base,
3459               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3460         /* Reset indices */
3461         txr->next_avail_desc = 0;
3462         txr->next_to_clean = 0;
3463
3464         /* Free any existing tx buffers. */
3465         txbuf = txr->tx_buffers;
3466         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3467                 if (txbuf->m_head != NULL) {
3468                         bus_dmamap_sync(txr->txtag, txbuf->map,
3469                             BUS_DMASYNC_POSTWRITE);
3470                         bus_dmamap_unload(txr->txtag, txbuf->map);
3471                         m_freem(txbuf->m_head);
3472                         txbuf->m_head = NULL;
3473                 }
3474 #ifdef DEV_NETMAP
3475                 if (slot) {
3476                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3477                         /* no need to set the address */
3478                         netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3479                 }
3480 #endif /* DEV_NETMAP */
3481                 /* clear the watch index */
3482                 txbuf->next_eop = -1;
3483         }
3484
3485         /* Set number of descriptors available */
3486         txr->tx_avail = adapter->num_tx_desc;
3487
3488         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3489             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3490         IGB_TX_UNLOCK(txr);
3491 }
3492
3493 /*********************************************************************
3494  *
3495  *  Initialize all transmit rings.
3496  *
3497  **********************************************************************/
3498 static void
3499 igb_setup_transmit_structures(struct adapter *adapter)
3500 {
3501         struct tx_ring *txr = adapter->tx_rings;
3502
3503         for (int i = 0; i < adapter->num_queues; i++, txr++)
3504                 igb_setup_transmit_ring(txr);
3505
3506         return;
3507 }
3508
3509 /*********************************************************************
3510  *
3511  *  Enable transmit unit.
3512  *
3513  **********************************************************************/
3514 static void
3515 igb_initialize_transmit_units(struct adapter *adapter)
3516 {
3517         struct tx_ring  *txr = adapter->tx_rings;
3518         struct e1000_hw *hw = &adapter->hw;
3519         u32             tctl, txdctl;
3520
3521         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3522         tctl = txdctl = 0;
3523
3524         /* Setup the Tx Descriptor Rings */
3525         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3526                 u64 bus_addr = txr->txdma.dma_paddr;
3527
3528                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3529                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3530                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3531                     (uint32_t)(bus_addr >> 32));
3532                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3533                     (uint32_t)bus_addr);
3534
3535                 /* Setup the HW Tx Head and Tail descriptor pointers */
3536                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3537                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3538
3539                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3540                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3541                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3542
3543                 txr->queue_status = IGB_QUEUE_IDLE;
3544
3545                 txdctl |= IGB_TX_PTHRESH;
3546                 txdctl |= IGB_TX_HTHRESH << 8;
3547                 txdctl |= IGB_TX_WTHRESH << 16;
3548                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3549                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3550         }
3551
3552         if (adapter->vf_ifp)
3553                 return;
3554
3555         e1000_config_collision_dist(hw);
3556
3557         /* Program the Transmit Control Register */
3558         tctl = E1000_READ_REG(hw, E1000_TCTL);
3559         tctl &= ~E1000_TCTL_CT;
3560         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3561                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3562
3563         /* This write will effectively turn on the transmit unit. */
3564         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3565 }
3566
3567 /*********************************************************************
3568  *
3569  *  Free all transmit rings.
3570  *
3571  **********************************************************************/
3572 static void
3573 igb_free_transmit_structures(struct adapter *adapter)
3574 {
3575         struct tx_ring *txr = adapter->tx_rings;
3576
3577         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3578                 IGB_TX_LOCK(txr);
3579                 igb_free_transmit_buffers(txr);
3580                 igb_dma_free(adapter, &txr->txdma);
3581                 IGB_TX_UNLOCK(txr);
3582                 IGB_TX_LOCK_DESTROY(txr);
3583         }
3584         free(adapter->tx_rings, M_DEVBUF);
3585 }
3586
3587 /*********************************************************************
3588  *
3589  *  Free transmit ring related data structures.
3590  *
3591  **********************************************************************/
3592 static void
3593 igb_free_transmit_buffers(struct tx_ring *txr)
3594 {
3595         struct adapter *adapter = txr->adapter;
3596         struct igb_tx_buffer *tx_buffer;
3597         int             i;
3598
3599         INIT_DEBUGOUT("free_transmit_ring: begin");
3600
3601         if (txr->tx_buffers == NULL)
3602                 return;
3603
3604         tx_buffer = txr->tx_buffers;
3605         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3606                 if (tx_buffer->m_head != NULL) {
3607                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3608                             BUS_DMASYNC_POSTWRITE);
3609                         bus_dmamap_unload(txr->txtag,
3610                             tx_buffer->map);
3611                         m_freem(tx_buffer->m_head);
3612                         tx_buffer->m_head = NULL;
3613                         if (tx_buffer->map != NULL) {
3614                                 bus_dmamap_destroy(txr->txtag,
3615                                     tx_buffer->map);
3616                                 tx_buffer->map = NULL;
3617                         }
3618                 } else if (tx_buffer->map != NULL) {
3619                         bus_dmamap_unload(txr->txtag,
3620                             tx_buffer->map);
3621                         bus_dmamap_destroy(txr->txtag,
3622                             tx_buffer->map);
3623                         tx_buffer->map = NULL;
3624                 }
3625         }
3626 #if __FreeBSD_version >= 800000
3627         if (txr->br != NULL)
3628                 buf_ring_free(txr->br, M_DEVBUF);
3629 #endif
3630         if (txr->tx_buffers != NULL) {
3631                 free(txr->tx_buffers, M_DEVBUF);
3632                 txr->tx_buffers = NULL;
3633         }
3634         if (txr->txtag != NULL) {
3635                 bus_dma_tag_destroy(txr->txtag);
3636                 txr->txtag = NULL;
3637         }
3638         return;
3639 }
3640
3641 /**********************************************************************
3642  *
3643  *  Setup work for hardware segmentation offload (TSO)
3644  *
3645  **********************************************************************/
3646 static bool
3647 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ehdrlen,
3648         struct ip *ip, struct tcphdr *th)
3649 {
3650         struct adapter *adapter = txr->adapter;
3651         struct e1000_adv_tx_context_desc *TXD;
3652         struct igb_tx_buffer        *tx_buffer;
3653         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3654         u32 mss_l4len_idx = 0;
3655         u16 vtag = 0;
3656         int ctxd, ip_hlen, tcp_hlen;
3657
3658         ctxd = txr->next_avail_desc;
3659         tx_buffer = &txr->tx_buffers[ctxd];
3660         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3661
3662         ip->ip_sum = 0;
3663         ip_hlen = ip->ip_hl << 2;
3664         tcp_hlen = th->th_off << 2;
3665
3666         /* VLAN MACLEN IPLEN */
3667         if (mp->m_flags & M_VLANTAG) {
3668                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3669                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3670         }
3671
3672         vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3673         vlan_macip_lens |= ip_hlen;
3674         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3675
3676         /* ADV DTYPE TUCMD */
3677         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3678         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3679         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3680         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3681
3682         /* MSS L4LEN IDX */
3683         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3684         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3685         /* 82575 needs the queue index added */
3686         if (adapter->hw.mac.type == e1000_82575)
3687                 mss_l4len_idx |= txr->me << 4;
3688         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3689
3690         TXD->seqnum_seed = htole32(0);
3691         tx_buffer->m_head = NULL;
3692         tx_buffer->next_eop = -1;
3693
3694         if (++ctxd == adapter->num_tx_desc)
3695                 ctxd = 0;
3696
3697         txr->tx_avail--;
3698         txr->next_avail_desc = ctxd;
3699         return TRUE;
3700 }
3701
3702
3703 /*********************************************************************
3704  *
3705  *  Context Descriptor setup for VLAN or CSUM
3706  *
3707  **********************************************************************/
3708
3709 static bool
3710 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3711 {
3712         struct adapter *adapter = txr->adapter;
3713         struct e1000_adv_tx_context_desc *TXD;
3714         struct igb_tx_buffer        *tx_buffer;
3715         u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3716         struct ether_vlan_header *eh;
3717         struct ip *ip = NULL;
3718         struct ip6_hdr *ip6;
3719         int  ehdrlen, ctxd, ip_hlen = 0;
3720         u16     etype, vtag = 0;
3721         u8      ipproto = 0;
3722         bool    offload = TRUE;
3723
3724         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3725                 offload = FALSE;
3726
3727         vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3728         ctxd = txr->next_avail_desc;
3729         tx_buffer = &txr->tx_buffers[ctxd];
3730         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3731
3732         /*
3733         ** In advanced descriptors the vlan tag must 
3734         ** be placed into the context descriptor, thus
3735         ** we need to be here just for that setup.
3736         */
3737         if (mp->m_flags & M_VLANTAG) {
3738                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3739                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3740         } else if (offload == FALSE)
3741                 return FALSE;
3742
3743         /*
3744          * Determine where frame payload starts.
3745          * Jump over vlan headers if already present,
3746          * helpful for QinQ too.
3747          */
3748         eh = mtod(mp, struct ether_vlan_header *);
3749         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3750                 etype = ntohs(eh->evl_proto);
3751                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3752         } else {
3753                 etype = ntohs(eh->evl_encap_proto);
3754                 ehdrlen = ETHER_HDR_LEN;
3755         }
3756
3757         /* Set the ether header length */
3758         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3759
3760         switch (etype) {
3761                 case ETHERTYPE_IP:
3762                         ip = (struct ip *)(mp->m_data + ehdrlen);
3763                         ip_hlen = ip->ip_hl << 2;
3764                         if (mp->m_len < ehdrlen + ip_hlen) {
3765                                 offload = FALSE;
3766                                 break;
3767                         }
3768                         ipproto = ip->ip_p;
3769                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3770                         break;
3771                 case ETHERTYPE_IPV6:
3772                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3773                         ip_hlen = sizeof(struct ip6_hdr);
3774                         ipproto = ip6->ip6_nxt;
3775                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3776                         break;
3777                 default:
3778                         offload = FALSE;
3779                         break;
3780         }
3781
3782         vlan_macip_lens |= ip_hlen;
3783         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3784
3785         switch (ipproto) {
3786                 case IPPROTO_TCP:
3787                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3788                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3789                         break;
3790                 case IPPROTO_UDP:
3791                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3792                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3793                         break;
3794 #if __FreeBSD_version >= 800000
3795                 case IPPROTO_SCTP:
3796                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3797                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3798                         break;
3799 #endif
3800                 default:
3801                         offload = FALSE;
3802                         break;
3803         }
3804
3805         /* 82575 needs the queue index added */
3806         if (adapter->hw.mac.type == e1000_82575)
3807                 mss_l4len_idx = txr->me << 4;
3808
3809         /* Now copy bits into descriptor */
3810         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3811         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3812         TXD->seqnum_seed = htole32(0);
3813         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3814
3815         tx_buffer->m_head = NULL;
3816         tx_buffer->next_eop = -1;
3817
3818         /* We've consumed the first desc, adjust counters */
3819         if (++ctxd == adapter->num_tx_desc)
3820                 ctxd = 0;
3821         txr->next_avail_desc = ctxd;
3822         --txr->tx_avail;
3823
3824         return (offload);
3825 }
3826
3827
3828 /**********************************************************************
3829  *
3830  *  Examine each tx_buffer in the used queue. If the hardware is done
3831  *  processing the packet then free associated resources. The
3832  *  tx_buffer is put back on the free queue.
3833  *
3834  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3835  **********************************************************************/
3836 static bool
3837 igb_txeof(struct tx_ring *txr)
3838 {
3839         struct adapter  *adapter = txr->adapter;
3840         int first, last, done, processed;
3841         struct igb_tx_buffer *tx_buffer;
3842         struct e1000_tx_desc   *tx_desc, *eop_desc;
3843         struct ifnet   *ifp = adapter->ifp;
3844
3845         IGB_TX_LOCK_ASSERT(txr);
3846
3847 #ifdef DEV_NETMAP
3848         if (ifp->if_capenable & IFCAP_NETMAP) {
3849                 struct netmap_adapter *na = NA(ifp);
3850
3851                 selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3852                 IGB_TX_UNLOCK(txr);
3853                 IGB_CORE_LOCK(adapter);
3854                 selwakeuppri(&na->tx_si, PI_NET);
3855                 IGB_CORE_UNLOCK(adapter);
3856                 IGB_TX_LOCK(txr);
3857                 return FALSE;
3858         }
3859 #endif /* DEV_NETMAP */
3860         if (txr->tx_avail == adapter->num_tx_desc) {
3861                 txr->queue_status = IGB_QUEUE_IDLE;
3862                 return FALSE;
3863         }
3864
3865         processed = 0;
3866         first = txr->next_to_clean;
3867         tx_desc = &txr->tx_base[first];
3868         tx_buffer = &txr->tx_buffers[first];
3869         last = tx_buffer->next_eop;
3870         eop_desc = &txr->tx_base[last];
3871
3872         /*
3873          * What this does is get the index of the
3874          * first descriptor AFTER the EOP of the 
3875          * first packet, that way we can do the
3876          * simple comparison on the inner while loop.
3877          */
3878         if (++last == adapter->num_tx_desc)
3879                 last = 0;
3880         done = last;
3881
3882         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3883             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3884
3885         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3886                 /* We clean the range of the packet */
3887                 while (first != done) {
3888                         tx_desc->upper.data = 0;
3889                         tx_desc->lower.data = 0;
3890                         tx_desc->buffer_addr = 0;
3891                         ++txr->tx_avail;
3892                         ++processed;
3893
3894                         if (tx_buffer->m_head) {
3895                                 txr->bytes +=
3896                                     tx_buffer->m_head->m_pkthdr.len;
3897                                 bus_dmamap_sync(txr->txtag,
3898                                     tx_buffer->map,
3899                                     BUS_DMASYNC_POSTWRITE);
3900                                 bus_dmamap_unload(txr->txtag,
3901                                     tx_buffer->map);
3902
3903                                 m_freem(tx_buffer->m_head);
3904                                 tx_buffer->m_head = NULL;
3905                         }
3906                         tx_buffer->next_eop = -1;
3907                         txr->watchdog_time = ticks;
3908
3909                         if (++first == adapter->num_tx_desc)
3910                                 first = 0;
3911
3912                         tx_buffer = &txr->tx_buffers[first];
3913                         tx_desc = &txr->tx_base[first];
3914                 }
3915                 ++txr->packets;
3916                 ++ifp->if_opackets;
3917                 /* See if we can continue to the next packet */
3918                 last = tx_buffer->next_eop;
3919                 if (last != -1) {
3920                         eop_desc = &txr->tx_base[last];
3921                         /* Get new done point */
3922                         if (++last == adapter->num_tx_desc) last = 0;
3923                         done = last;
3924                 } else
3925                         break;
3926         }
3927         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3928             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3929
3930         txr->next_to_clean = first;
3931
3932         /*
3933         ** Watchdog calculation, we know there's
3934         ** work outstanding or the first return
3935         ** would have been taken, so none processed
3936         ** for too long indicates a hang.
3937         */
3938         if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3939                 txr->queue_status |= IGB_QUEUE_HUNG;
3940         /*
3941          * If we have a minimum free,
3942          * clear depleted state bit
3943          */
3944         if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)          
3945                 txr->queue_status &= ~IGB_QUEUE_DEPLETED;
3946
3947         /* All clean, turn off the watchdog */
3948         if (txr->tx_avail == adapter->num_tx_desc) {
3949                 txr->queue_status = IGB_QUEUE_IDLE;
3950                 return (FALSE);
3951         }
3952
3953         return (TRUE);
3954 }
3955
3956 /*********************************************************************
3957  *
3958  *  Refresh mbuf buffers for RX descriptor rings
3959  *   - now keeps its own state so discards due to resource
3960  *     exhaustion are unnecessary, if an mbuf cannot be obtained
3961  *     it just returns, keeping its placeholder, thus it can simply
3962  *     be recalled to try again.
3963  *
3964  **********************************************************************/
3965 static void
3966 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3967 {
3968         struct adapter          *adapter = rxr->adapter;
3969         bus_dma_segment_t       hseg[1];
3970         bus_dma_segment_t       pseg[1];
3971         struct igb_rx_buf       *rxbuf;
3972         struct mbuf             *mh, *mp;
3973         int                     i, j, nsegs, error;
3974         bool                    refreshed = FALSE;
3975
3976         i = j = rxr->next_to_refresh;
3977         /*
3978         ** Get one descriptor beyond
3979         ** our work mark to control
3980         ** the loop.
3981         */
3982         if (++j == adapter->num_rx_desc)
3983                 j = 0;
3984
3985         while (j != limit) {
3986                 rxbuf = &rxr->rx_buffers[i];
3987                 /* No hdr mbuf used with header split off */
3988                 if (rxr->hdr_split == FALSE)
3989                         goto no_split;
3990                 if (rxbuf->m_head == NULL) {
3991                         mh = m_gethdr(M_DONTWAIT, MT_DATA);
3992                         if (mh == NULL)
3993                                 goto update;
3994                 } else
3995                         mh = rxbuf->m_head;
3996
3997                 mh->m_pkthdr.len = mh->m_len = MHLEN;
3998                 mh->m_len = MHLEN;
3999                 mh->m_flags |= M_PKTHDR;
4000                 /* Get the memory mapping */
4001                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4002                     rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4003                 if (error != 0) {
4004                         printf("Refresh mbufs: hdr dmamap load"
4005                             " failure - %d\n", error);
4006                         m_free(mh);
4007                         rxbuf->m_head = NULL;
4008                         goto update;
4009                 }
4010                 rxbuf->m_head = mh;
4011                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4012                     BUS_DMASYNC_PREREAD);
4013                 rxr->rx_base[i].read.hdr_addr =
4014                     htole64(hseg[0].ds_addr);
4015 no_split:
4016                 if (rxbuf->m_pack == NULL) {
4017                         mp = m_getjcl(M_DONTWAIT, MT_DATA,
4018                             M_PKTHDR, adapter->rx_mbuf_sz);
4019                         if (mp == NULL)
4020                                 goto update;
4021                 } else
4022                         mp = rxbuf->m_pack;
4023
4024                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4025                 /* Get the memory mapping */
4026                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4027                     rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4028                 if (error != 0) {
4029                         printf("Refresh mbufs: payload dmamap load"
4030                             " failure - %d\n", error);
4031                         m_free(mp);
4032                         rxbuf->m_pack = NULL;
4033                         goto update;
4034                 }
4035                 rxbuf->m_pack = mp;
4036                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4037                     BUS_DMASYNC_PREREAD);
4038                 rxr->rx_base[i].read.pkt_addr =
4039                     htole64(pseg[0].ds_addr);
4040                 refreshed = TRUE; /* I feel wefreshed :) */
4041
4042                 i = j; /* our next is precalculated */
4043                 rxr->next_to_refresh = i;
4044                 if (++j == adapter->num_rx_desc)
4045                         j = 0;
4046         }
4047 update:
4048         if (refreshed) /* update tail */
4049                 E1000_WRITE_REG(&adapter->hw,
4050                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4051         return;
4052 }
4053
4054
4055 /*********************************************************************
4056  *
4057  *  Allocate memory for rx_buffer structures. Since we use one
4058  *  rx_buffer per received packet, the maximum number of rx_buffer's
4059  *  that we'll need is equal to the number of receive descriptors
4060  *  that we've allocated.
4061  *
4062  **********************************************************************/
4063 static int
4064 igb_allocate_receive_buffers(struct rx_ring *rxr)
4065 {
4066         struct  adapter         *adapter = rxr->adapter;
4067         device_t                dev = adapter->dev;
4068         struct igb_rx_buf       *rxbuf;
4069         int                     i, bsize, error;
4070
4071         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4072         if (!(rxr->rx_buffers =
4073             (struct igb_rx_buf *) malloc(bsize,
4074             M_DEVBUF, M_NOWAIT | M_ZERO))) {
4075                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4076                 error = ENOMEM;
4077                 goto fail;
4078         }
4079
4080         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4081                                    1, 0,                /* alignment, bounds */
4082                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4083                                    BUS_SPACE_MAXADDR,   /* highaddr */
4084                                    NULL, NULL,          /* filter, filterarg */
4085                                    MSIZE,               /* maxsize */
4086                                    1,                   /* nsegments */
4087                                    MSIZE,               /* maxsegsize */
4088                                    0,                   /* flags */
4089                                    NULL,                /* lockfunc */
4090                                    NULL,                /* lockfuncarg */
4091                                    &rxr->htag))) {
4092                 device_printf(dev, "Unable to create RX DMA tag\n");
4093                 goto fail;
4094         }
4095
4096         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4097                                    1, 0,                /* alignment, bounds */
4098                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4099                                    BUS_SPACE_MAXADDR,   /* highaddr */
4100                                    NULL, NULL,          /* filter, filterarg */
4101                                    MJUM9BYTES,          /* maxsize */
4102                                    1,                   /* nsegments */
4103                                    MJUM9BYTES,          /* maxsegsize */
4104                                    0,                   /* flags */
4105                                    NULL,                /* lockfunc */
4106                                    NULL,                /* lockfuncarg */
4107                                    &rxr->ptag))) {
4108                 device_printf(dev, "Unable to create RX payload DMA tag\n");
4109                 goto fail;
4110         }
4111
4112         for (i = 0; i < adapter->num_rx_desc; i++) {
4113                 rxbuf = &rxr->rx_buffers[i];
4114                 error = bus_dmamap_create(rxr->htag,
4115                     BUS_DMA_NOWAIT, &rxbuf->hmap);
4116                 if (error) {
4117                         device_printf(dev,
4118                             "Unable to create RX head DMA maps\n");
4119                         goto fail;
4120                 }
4121                 error = bus_dmamap_create(rxr->ptag,
4122                     BUS_DMA_NOWAIT, &rxbuf->pmap);
4123                 if (error) {
4124                         device_printf(dev,
4125                             "Unable to create RX packet DMA maps\n");
4126                         goto fail;
4127                 }
4128         }
4129
4130         return (0);
4131
4132 fail:
4133         /* Frees all, but can handle partial completion */
4134         igb_free_receive_structures(adapter);
4135         return (error);
4136 }
4137
4138
4139 static void
4140 igb_free_receive_ring(struct rx_ring *rxr)
4141 {
4142         struct  adapter         *adapter = rxr->adapter;
4143         struct igb_rx_buf       *rxbuf;
4144
4145
4146         for (int i = 0; i < adapter->num_rx_desc; i++) {
4147                 rxbuf = &rxr->rx_buffers[i];
4148                 if (rxbuf->m_head != NULL) {
4149                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4150                             BUS_DMASYNC_POSTREAD);
4151                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4152                         rxbuf->m_head->m_flags |= M_PKTHDR;
4153                         m_freem(rxbuf->m_head);
4154                 }
4155                 if (rxbuf->m_pack != NULL) {
4156                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4157                             BUS_DMASYNC_POSTREAD);
4158                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4159                         rxbuf->m_pack->m_flags |= M_PKTHDR;
4160                         m_freem(rxbuf->m_pack);
4161                 }
4162                 rxbuf->m_head = NULL;
4163                 rxbuf->m_pack = NULL;
4164         }
4165 }
4166
4167
4168 /*********************************************************************
4169  *
4170  *  Initialize a receive ring and its buffers.
4171  *
4172  **********************************************************************/
4173 static int
4174 igb_setup_receive_ring(struct rx_ring *rxr)
4175 {
4176         struct  adapter         *adapter;
4177         struct  ifnet           *ifp;
4178         device_t                dev;
4179         struct igb_rx_buf       *rxbuf;
4180         bus_dma_segment_t       pseg[1], hseg[1];
4181         struct lro_ctrl         *lro = &rxr->lro;
4182         int                     rsize, nsegs, error = 0;
4183 #ifdef DEV_NETMAP
4184         struct netmap_adapter *na = NA(rxr->adapter->ifp);
4185         struct netmap_slot *slot;
4186 #endif /* DEV_NETMAP */
4187
4188         adapter = rxr->adapter;
4189         dev = adapter->dev;
4190         ifp = adapter->ifp;
4191
4192         /* Clear the ring contents */
4193         IGB_RX_LOCK(rxr);
4194 #ifdef DEV_NETMAP
4195         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4196 #endif /* DEV_NETMAP */
4197         rsize = roundup2(adapter->num_rx_desc *
4198             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4199         bzero((void *)rxr->rx_base, rsize);
4200
4201         /*
4202         ** Free current RX buffer structures and their mbufs
4203         */
4204         igb_free_receive_ring(rxr);
4205
4206         /* Configure for header split? */
4207         if (igb_header_split)
4208                 rxr->hdr_split = TRUE;
4209
4210         /* Now replenish the ring mbufs */
4211         for (int j = 0; j < adapter->num_rx_desc; ++j) {
4212                 struct mbuf     *mh, *mp;
4213
4214                 rxbuf = &rxr->rx_buffers[j];
4215 #ifdef DEV_NETMAP
4216                 if (slot) {
4217                         /* slot sj is mapped to the i-th NIC-ring entry */
4218                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4219                         uint64_t paddr;
4220                         void *addr;
4221
4222                         addr = PNMB(slot + sj, &paddr);
4223                         netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4224                         /* Update descriptor */
4225                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4226                         continue;
4227                 }
4228 #endif /* DEV_NETMAP */
4229                 if (rxr->hdr_split == FALSE)
4230                         goto skip_head;
4231
4232                 /* First the header */
4233                 rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
4234                 if (rxbuf->m_head == NULL) {
4235                         error = ENOBUFS;
4236                         goto fail;
4237                 }
4238                 m_adj(rxbuf->m_head, ETHER_ALIGN);
4239                 mh = rxbuf->m_head;
4240                 mh->m_len = mh->m_pkthdr.len = MHLEN;
4241                 mh->m_flags |= M_PKTHDR;
4242                 /* Get the memory mapping */
4243                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4244                     rxbuf->hmap, rxbuf->m_head, hseg,
4245                     &nsegs, BUS_DMA_NOWAIT);
4246                 if (error != 0) /* Nothing elegant to do here */
4247                         goto fail;
4248                 bus_dmamap_sync(rxr->htag,
4249                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
4250                 /* Update descriptor */
4251                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4252
4253 skip_head:
4254                 /* Now the payload cluster */
4255                 rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
4256                     M_PKTHDR, adapter->rx_mbuf_sz);
4257                 if (rxbuf->m_pack == NULL) {
4258                         error = ENOBUFS;
4259                         goto fail;
4260                 }
4261                 mp = rxbuf->m_pack;
4262                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4263                 /* Get the memory mapping */
4264                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4265                     rxbuf->pmap, mp, pseg,
4266                     &nsegs, BUS_DMA_NOWAIT);
4267                 if (error != 0)
4268                         goto fail;
4269                 bus_dmamap_sync(rxr->ptag,
4270                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4271                 /* Update descriptor */
4272                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4273         }
4274
4275         /* Setup our descriptor indices */
4276         rxr->next_to_check = 0;
4277         rxr->next_to_refresh = adapter->num_rx_desc - 1;
4278         rxr->lro_enabled = FALSE;
4279         rxr->rx_split_packets = 0;
4280         rxr->rx_bytes = 0;
4281
4282         rxr->fmp = NULL;
4283         rxr->lmp = NULL;
4284         rxr->discard = FALSE;
4285
4286         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4287             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4288
4289         /*
4290         ** Now set up the LRO interface, we
4291         ** also only do head split when LRO
4292         ** is enabled, since so often they
4293         ** are undesireable in similar setups.
4294         */
4295         if (ifp->if_capenable & IFCAP_LRO) {
4296                 error = tcp_lro_init(lro);
4297                 if (error) {
4298                         device_printf(dev, "LRO Initialization failed!\n");
4299                         goto fail;
4300                 }
4301                 INIT_DEBUGOUT("RX LRO Initialized\n");
4302                 rxr->lro_enabled = TRUE;
4303                 lro->ifp = adapter->ifp;
4304         }
4305
4306         IGB_RX_UNLOCK(rxr);
4307         return (0);
4308
4309 fail:
4310         igb_free_receive_ring(rxr);
4311         IGB_RX_UNLOCK(rxr);
4312         return (error);
4313 }
4314
4315
4316 /*********************************************************************
4317  *
4318  *  Initialize all receive rings.
4319  *
4320  **********************************************************************/
4321 static int
4322 igb_setup_receive_structures(struct adapter *adapter)
4323 {
4324         struct rx_ring *rxr = adapter->rx_rings;
4325         int i;
4326
4327         for (i = 0; i < adapter->num_queues; i++, rxr++)
4328                 if (igb_setup_receive_ring(rxr))
4329                         goto fail;
4330
4331         return (0);
4332 fail:
4333         /*
4334          * Free RX buffers allocated so far, we will only handle
4335          * the rings that completed, the failing case will have
4336          * cleaned up for itself. 'i' is the endpoint.
4337          */
4338         for (int j = 0; j > i; ++j) {
4339                 rxr = &adapter->rx_rings[i];
4340                 IGB_RX_LOCK(rxr);
4341                 igb_free_receive_ring(rxr);
4342                 IGB_RX_UNLOCK(rxr);
4343         }
4344
4345         return (ENOBUFS);
4346 }
4347
4348 /*********************************************************************
4349  *
4350  *  Enable receive unit.
4351  *
4352  **********************************************************************/
4353 static void
4354 igb_initialize_receive_units(struct adapter *adapter)
4355 {
4356         struct rx_ring  *rxr = adapter->rx_rings;
4357         struct ifnet    *ifp = adapter->ifp;
4358         struct e1000_hw *hw = &adapter->hw;
4359         u32             rctl, rxcsum, psize, srrctl = 0;
4360
4361         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4362
4363         /*
4364          * Make sure receives are disabled while setting
4365          * up the descriptor ring
4366          */
4367         rctl = E1000_READ_REG(hw, E1000_RCTL);
4368         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4369
4370         /*
4371         ** Set up for header split
4372         */
4373         if (igb_header_split) {
4374                 /* Use a standard mbuf for the header */
4375                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4376                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4377         } else
4378                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4379
4380         /*
4381         ** Set up for jumbo frames
4382         */
4383         if (ifp->if_mtu > ETHERMTU) {
4384                 rctl |= E1000_RCTL_LPE;
4385                 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4386                         srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4387                         rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4388                 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4389                         srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4390                         rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4391                 }
4392                 /* Set maximum packet len */
4393                 psize = adapter->max_frame_size;
4394                 /* are we on a vlan? */
4395                 if (adapter->ifp->if_vlantrunk != NULL)
4396                         psize += VLAN_TAG_SIZE;
4397                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4398         } else {
4399                 rctl &= ~E1000_RCTL_LPE;
4400                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4401                 rctl |= E1000_RCTL_SZ_2048;
4402         }
4403
4404         /* Setup the Base and Length of the Rx Descriptor Rings */
4405         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4406                 u64 bus_addr = rxr->rxdma.dma_paddr;
4407                 u32 rxdctl;
4408
4409                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4410                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4411                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4412                     (uint32_t)(bus_addr >> 32));
4413                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4414                     (uint32_t)bus_addr);
4415                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4416                 /* Enable this Queue */
4417                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4418                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4419                 rxdctl &= 0xFFF00000;
4420                 rxdctl |= IGB_RX_PTHRESH;
4421                 rxdctl |= IGB_RX_HTHRESH << 8;
4422                 rxdctl |= IGB_RX_WTHRESH << 16;
4423                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4424         }
4425
4426         /*
4427         ** Setup for RX MultiQueue
4428         */
4429         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4430         if (adapter->num_queues >1) {
4431                 u32 random[10], mrqc, shift = 0;
4432                 union igb_reta {
4433                         u32 dword;
4434                         u8  bytes[4];
4435                 } reta;
4436
4437                 arc4rand(&random, sizeof(random), 0);
4438                 if (adapter->hw.mac.type == e1000_82575)
4439                         shift = 6;
4440                 /* Warning FM follows */
4441                 for (int i = 0; i < 128; i++) {
4442                         reta.bytes[i & 3] =
4443                             (i % adapter->num_queues) << shift;
4444                         if ((i & 3) == 3)
4445                                 E1000_WRITE_REG(hw,
4446                                     E1000_RETA(i >> 2), reta.dword);
4447                 }
4448                 /* Now fill in hash table */
4449                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4450                 for (int i = 0; i < 10; i++)
4451                         E1000_WRITE_REG_ARRAY(hw,
4452                             E1000_RSSRK(0), i, random[i]);
4453
4454                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4455                     E1000_MRQC_RSS_FIELD_IPV4_TCP);
4456                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4457                     E1000_MRQC_RSS_FIELD_IPV6_TCP);
4458                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4459                     E1000_MRQC_RSS_FIELD_IPV6_UDP);
4460                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4461                     E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4462
4463                 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4464
4465                 /*
4466                 ** NOTE: Receive Full-Packet Checksum Offload 
4467                 ** is mutually exclusive with Multiqueue. However
4468                 ** this is not the same as TCP/IP checksums which
4469                 ** still work.
4470                 */
4471                 rxcsum |= E1000_RXCSUM_PCSD;
4472 #if __FreeBSD_version >= 800000
4473                 /* For SCTP Offload */
4474                 if ((hw->mac.type == e1000_82576)
4475                     && (ifp->if_capenable & IFCAP_RXCSUM))
4476                         rxcsum |= E1000_RXCSUM_CRCOFL;
4477 #endif
4478         } else {
4479                 /* Non RSS setup */
4480                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4481                         rxcsum |= E1000_RXCSUM_IPPCSE;
4482 #if __FreeBSD_version >= 800000
4483                         if (adapter->hw.mac.type == e1000_82576)
4484                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4485 #endif
4486                 } else
4487                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4488         }
4489         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4490
4491         /* Setup the Receive Control Register */
4492         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4493         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4494                    E1000_RCTL_RDMTS_HALF |
4495                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4496         /* Strip CRC bytes. */
4497         rctl |= E1000_RCTL_SECRC;
4498         /* Make sure VLAN Filters are off */
4499         rctl &= ~E1000_RCTL_VFE;
4500         /* Don't store bad packets */
4501         rctl &= ~E1000_RCTL_SBP;
4502
4503         /* Enable Receives */
4504         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4505
4506         /*
4507          * Setup the HW Rx Head and Tail Descriptor Pointers
4508          *   - needs to be after enable
4509          */
4510         for (int i = 0; i < adapter->num_queues; i++) {
4511                 rxr = &adapter->rx_rings[i];
4512                 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4513 #ifdef DEV_NETMAP
4514                 /*
4515                  * an init() while a netmap client is active must
4516                  * preserve the rx buffers passed to userspace.
4517                  * In this driver it means we adjust RDT to
4518                  * somthing different from next_to_refresh
4519                  * (which is not used in netmap mode).
4520                  */
4521                 if (ifp->if_capenable & IFCAP_NETMAP) {
4522                         struct netmap_adapter *na = NA(adapter->ifp);
4523                         struct netmap_kring *kring = &na->rx_rings[i];
4524                         int t = rxr->next_to_refresh - kring->nr_hwavail;
4525
4526                         if (t >= adapter->num_rx_desc)
4527                                 t -= adapter->num_rx_desc;
4528                         else if (t < 0)
4529                                 t += adapter->num_rx_desc;
4530                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4531                 } else
4532 #endif /* DEV_NETMAP */
4533                 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4534         }
4535         return;
4536 }
4537
4538 /*********************************************************************
4539  *
4540  *  Free receive rings.
4541  *
4542  **********************************************************************/
4543 static void
4544 igb_free_receive_structures(struct adapter *adapter)
4545 {
4546         struct rx_ring *rxr = adapter->rx_rings;
4547
4548         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4549                 struct lro_ctrl *lro = &rxr->lro;
4550                 igb_free_receive_buffers(rxr);
4551                 tcp_lro_free(lro);
4552                 igb_dma_free(adapter, &rxr->rxdma);
4553         }
4554
4555         free(adapter->rx_rings, M_DEVBUF);
4556 }
4557
4558 /*********************************************************************
4559  *
4560  *  Free receive ring data structures.
4561  *
4562  **********************************************************************/
4563 static void
4564 igb_free_receive_buffers(struct rx_ring *rxr)
4565 {
4566         struct adapter          *adapter = rxr->adapter;
4567         struct igb_rx_buf       *rxbuf;
4568         int i;
4569
4570         INIT_DEBUGOUT("free_receive_structures: begin");
4571
4572         /* Cleanup any existing buffers */
4573         if (rxr->rx_buffers != NULL) {
4574                 for (i = 0; i < adapter->num_rx_desc; i++) {
4575                         rxbuf = &rxr->rx_buffers[i];
4576                         if (rxbuf->m_head != NULL) {
4577                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4578                                     BUS_DMASYNC_POSTREAD);
4579                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4580                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4581                                 m_freem(rxbuf->m_head);
4582                         }
4583                         if (rxbuf->m_pack != NULL) {
4584                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4585                                     BUS_DMASYNC_POSTREAD);
4586                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4587                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4588                                 m_freem(rxbuf->m_pack);
4589                         }
4590                         rxbuf->m_head = NULL;
4591                         rxbuf->m_pack = NULL;
4592                         if (rxbuf->hmap != NULL) {
4593                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4594                                 rxbuf->hmap = NULL;
4595                         }
4596                         if (rxbuf->pmap != NULL) {
4597                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4598                                 rxbuf->pmap = NULL;
4599                         }
4600                 }
4601                 if (rxr->rx_buffers != NULL) {
4602                         free(rxr->rx_buffers, M_DEVBUF);
4603                         rxr->rx_buffers = NULL;
4604                 }
4605         }
4606
4607         if (rxr->htag != NULL) {
4608                 bus_dma_tag_destroy(rxr->htag);
4609                 rxr->htag = NULL;
4610         }
4611         if (rxr->ptag != NULL) {
4612                 bus_dma_tag_destroy(rxr->ptag);
4613                 rxr->ptag = NULL;
4614         }
4615 }
4616
4617 static __inline void
4618 igb_rx_discard(struct rx_ring *rxr, int i)
4619 {
4620         struct igb_rx_buf       *rbuf;
4621
4622         rbuf = &rxr->rx_buffers[i];
4623
4624         /* Partially received? Free the chain */
4625         if (rxr->fmp != NULL) {
4626                 rxr->fmp->m_flags |= M_PKTHDR;
4627                 m_freem(rxr->fmp);
4628                 rxr->fmp = NULL;
4629                 rxr->lmp = NULL;
4630         }
4631
4632         /*
4633         ** With advanced descriptors the writeback
4634         ** clobbers the buffer addrs, so its easier
4635         ** to just free the existing mbufs and take
4636         ** the normal refresh path to get new buffers
4637         ** and mapping.
4638         */
4639         if (rbuf->m_head) {
4640                 m_free(rbuf->m_head);
4641                 rbuf->m_head = NULL;
4642         }
4643
4644         if (rbuf->m_pack) {
4645                 m_free(rbuf->m_pack);
4646                 rbuf->m_pack = NULL;
4647         }
4648
4649         return;
4650 }
4651
4652 static __inline void
4653 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4654 {
4655
4656         /*
4657          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4658          * should be computed by hardware. Also it should not have VLAN tag in
4659          * ethernet header.
4660          */
4661         if (rxr->lro_enabled &&
4662             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4663             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4664             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4665             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4666             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4667             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4668                 /*
4669                  * Send to the stack if:
4670                  **  - LRO not enabled, or
4671                  **  - no LRO resources, or
4672                  **  - lro enqueue fails
4673                  */
4674                 if (rxr->lro.lro_cnt != 0)
4675                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4676                                 return;
4677         }
4678         IGB_RX_UNLOCK(rxr);
4679         (*ifp->if_input)(ifp, m);
4680         IGB_RX_LOCK(rxr);
4681 }
4682
4683 /*********************************************************************
4684  *
4685  *  This routine executes in interrupt context. It replenishes
4686  *  the mbufs in the descriptor and sends data which has been
4687  *  dma'ed into host memory to upper layer.
4688  *
4689  *  We loop at most count times if count is > 0, or until done if
4690  *  count < 0.
4691  *
4692  *  Return TRUE if more to clean, FALSE otherwise
4693  *********************************************************************/
4694 static bool
4695 igb_rxeof(struct igb_queue *que, int count, int *done)
4696 {
4697         struct adapter          *adapter = que->adapter;
4698         struct rx_ring          *rxr = que->rxr;
4699         struct ifnet            *ifp = adapter->ifp;
4700         struct lro_ctrl         *lro = &rxr->lro;
4701         struct lro_entry        *queued;
4702         int                     i, processed = 0, rxdone = 0;
4703         u32                     ptype, staterr = 0;
4704         union e1000_adv_rx_desc *cur;
4705
4706         IGB_RX_LOCK(rxr);
4707         /* Sync the ring. */
4708         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4709             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4710
4711 #ifdef DEV_NETMAP
4712         if (ifp->if_capenable & IFCAP_NETMAP) {
4713                 struct netmap_adapter *na = NA(ifp);
4714
4715                 na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4716                 selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4717                 IGB_RX_UNLOCK(rxr);
4718                 IGB_CORE_LOCK(adapter);
4719                 selwakeuppri(&na->rx_si, PI_NET);
4720                 IGB_CORE_UNLOCK(adapter);
4721                 return (0);
4722         }
4723 #endif /* DEV_NETMAP */
4724
4725         /* Main clean loop */
4726         for (i = rxr->next_to_check; count != 0;) {
4727                 struct mbuf             *sendmp, *mh, *mp;
4728                 struct igb_rx_buf       *rxbuf;
4729                 u16                     hlen, plen, hdr, vtag;
4730                 bool                    eop = FALSE;
4731  
4732                 cur = &rxr->rx_base[i];
4733                 staterr = le32toh(cur->wb.upper.status_error);
4734                 if ((staterr & E1000_RXD_STAT_DD) == 0)
4735                         break;
4736                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4737                         break;
4738                 count--;
4739                 sendmp = mh = mp = NULL;
4740                 cur->wb.upper.status_error = 0;
4741                 rxbuf = &rxr->rx_buffers[i];
4742                 plen = le16toh(cur->wb.upper.length);
4743                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4744                 if ((adapter->hw.mac.type == e1000_i350) &&
4745                     (staterr & E1000_RXDEXT_STATERR_LB))
4746                         vtag = be16toh(cur->wb.upper.vlan);
4747                 else
4748                         vtag = le16toh(cur->wb.upper.vlan);
4749                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4750                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4751
4752                 /* Make sure all segments of a bad packet are discarded */
4753                 if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4754                     (rxr->discard)) {
4755                         adapter->dropped_pkts++;
4756                         ++rxr->rx_discarded;
4757                         if (!eop) /* Catch subsequent segs */
4758                                 rxr->discard = TRUE;
4759                         else
4760                                 rxr->discard = FALSE;
4761                         igb_rx_discard(rxr, i);
4762                         goto next_desc;
4763                 }
4764
4765                 /*
4766                 ** The way the hardware is configured to
4767                 ** split, it will ONLY use the header buffer
4768                 ** when header split is enabled, otherwise we
4769                 ** get normal behavior, ie, both header and
4770                 ** payload are DMA'd into the payload buffer.
4771                 **
4772                 ** The fmp test is to catch the case where a
4773                 ** packet spans multiple descriptors, in that
4774                 ** case only the first header is valid.
4775                 */
4776                 if (rxr->hdr_split && rxr->fmp == NULL) {
4777                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4778                             E1000_RXDADV_HDRBUFLEN_SHIFT;
4779                         if (hlen > IGB_HDR_BUF)
4780                                 hlen = IGB_HDR_BUF;
4781                         mh = rxr->rx_buffers[i].m_head;
4782                         mh->m_len = hlen;
4783                         /* clear buf pointer for refresh */
4784                         rxbuf->m_head = NULL;
4785                         /*
4786                         ** Get the payload length, this
4787                         ** could be zero if its a small
4788                         ** packet.
4789                         */
4790                         if (plen > 0) {
4791                                 mp = rxr->rx_buffers[i].m_pack;
4792                                 mp->m_len = plen;
4793                                 mh->m_next = mp;
4794                                 /* clear buf pointer */
4795                                 rxbuf->m_pack = NULL;
4796                                 rxr->rx_split_packets++;
4797                         }
4798                 } else {
4799                         /*
4800                         ** Either no header split, or a
4801                         ** secondary piece of a fragmented
4802                         ** split packet.
4803                         */
4804                         mh = rxr->rx_buffers[i].m_pack;
4805                         mh->m_len = plen;
4806                         /* clear buf info for refresh */
4807                         rxbuf->m_pack = NULL;
4808                 }
4809
4810                 ++processed; /* So we know when to refresh */
4811
4812                 /* Initial frame - setup */
4813                 if (rxr->fmp == NULL) {
4814                         mh->m_pkthdr.len = mh->m_len;
4815                         /* Save the head of the chain */
4816                         rxr->fmp = mh;
4817                         rxr->lmp = mh;
4818                         if (mp != NULL) {
4819                                 /* Add payload if split */
4820                                 mh->m_pkthdr.len += mp->m_len;
4821                                 rxr->lmp = mh->m_next;
4822                         }
4823                 } else {
4824                         /* Chain mbuf's together */
4825                         rxr->lmp->m_next = mh;
4826                         rxr->lmp = rxr->lmp->m_next;
4827                         rxr->fmp->m_pkthdr.len += mh->m_len;
4828                 }
4829
4830                 if (eop) {
4831                         rxr->fmp->m_pkthdr.rcvif = ifp;
4832                         ifp->if_ipackets++;
4833                         rxr->rx_packets++;
4834                         /* capture data for AIM */
4835                         rxr->packets++;
4836                         rxr->bytes += rxr->fmp->m_pkthdr.len;
4837                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4838
4839                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4840                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
4841
4842                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4843                             (staterr & E1000_RXD_STAT_VP) != 0) {
4844                                 rxr->fmp->m_pkthdr.ether_vtag = vtag;
4845                                 rxr->fmp->m_flags |= M_VLANTAG;
4846                         }
4847 #if __FreeBSD_version >= 800000
4848                         rxr->fmp->m_pkthdr.flowid = que->msix;
4849                         rxr->fmp->m_flags |= M_FLOWID;
4850 #endif
4851                         sendmp = rxr->fmp;
4852                         /* Make sure to set M_PKTHDR. */
4853                         sendmp->m_flags |= M_PKTHDR;
4854                         rxr->fmp = NULL;
4855                         rxr->lmp = NULL;
4856                 }
4857
4858 next_desc:
4859                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4860                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4861
4862                 /* Advance our pointers to the next descriptor. */
4863                 if (++i == adapter->num_rx_desc)
4864                         i = 0;
4865                 /*
4866                 ** Send to the stack or LRO
4867                 */
4868                 if (sendmp != NULL) {
4869                         rxr->next_to_check = i;
4870                         igb_rx_input(rxr, ifp, sendmp, ptype);
4871                         i = rxr->next_to_check;
4872                         rxdone++;
4873                 }
4874
4875                 /* Every 8 descriptors we go to refresh mbufs */
4876                 if (processed == 8) {
4877                         igb_refresh_mbufs(rxr, i);
4878                         processed = 0;
4879                 }
4880         }
4881
4882         /* Catch any remainders */
4883         if (igb_rx_unrefreshed(rxr))
4884                 igb_refresh_mbufs(rxr, i);
4885
4886         rxr->next_to_check = i;
4887
4888         /*
4889          * Flush any outstanding LRO work
4890          */
4891         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4892                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4893                 tcp_lro_flush(lro, queued);
4894         }
4895
4896         if (done != NULL)
4897                 *done = rxdone;
4898
4899         IGB_RX_UNLOCK(rxr);
4900         return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4901 }
4902
4903 /*********************************************************************
4904  *
4905  *  Verify that the hardware indicated that the checksum is valid.
4906  *  Inform the stack about the status of checksum so that stack
4907  *  doesn't spend time verifying the checksum.
4908  *
4909  *********************************************************************/
4910 static void
4911 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4912 {
4913         u16 status = (u16)staterr;
4914         u8  errors = (u8) (staterr >> 24);
4915         int sctp;
4916
4917         /* Ignore Checksum bit is set */
4918         if (status & E1000_RXD_STAT_IXSM) {
4919                 mp->m_pkthdr.csum_flags = 0;
4920                 return;
4921         }
4922
4923         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4924             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4925                 sctp = 1;
4926         else
4927                 sctp = 0;
4928         if (status & E1000_RXD_STAT_IPCS) {
4929                 /* Did it pass? */
4930                 if (!(errors & E1000_RXD_ERR_IPE)) {
4931                         /* IP Checksum Good */
4932                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4933                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4934                 } else
4935                         mp->m_pkthdr.csum_flags = 0;
4936         }
4937
4938         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4939                 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4940 #if __FreeBSD_version >= 800000
4941                 if (sctp) /* reassign */
4942                         type = CSUM_SCTP_VALID;
4943 #endif
4944                 /* Did it pass? */
4945                 if (!(errors & E1000_RXD_ERR_TCPE)) {
4946                         mp->m_pkthdr.csum_flags |= type;
4947                         if (sctp == 0)
4948                                 mp->m_pkthdr.csum_data = htons(0xffff);
4949                 }
4950         }
4951         return;
4952 }
4953
4954 /*
4955  * This routine is run via an vlan
4956  * config EVENT
4957  */
4958 static void
4959 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4960 {
4961         struct adapter  *adapter = ifp->if_softc;
4962         u32             index, bit;
4963
4964         if (ifp->if_softc !=  arg)   /* Not our event */
4965                 return;
4966
4967         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4968                 return;
4969
4970         IGB_CORE_LOCK(adapter);
4971         index = (vtag >> 5) & 0x7F;
4972         bit = vtag & 0x1F;
4973         adapter->shadow_vfta[index] |= (1 << bit);
4974         ++adapter->num_vlans;
4975         /* Change hw filter setting */
4976         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4977                 igb_setup_vlan_hw_support(adapter);
4978         IGB_CORE_UNLOCK(adapter);
4979 }
4980
4981 /*
4982  * This routine is run via an vlan
4983  * unconfig EVENT
4984  */
4985 static void
4986 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4987 {
4988         struct adapter  *adapter = ifp->if_softc;
4989         u32             index, bit;
4990
4991         if (ifp->if_softc !=  arg)
4992                 return;
4993
4994         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4995                 return;
4996
4997         IGB_CORE_LOCK(adapter);
4998         index = (vtag >> 5) & 0x7F;
4999         bit = vtag & 0x1F;
5000         adapter->shadow_vfta[index] &= ~(1 << bit);
5001         --adapter->num_vlans;
5002         /* Change hw filter setting */
5003         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5004                 igb_setup_vlan_hw_support(adapter);
5005         IGB_CORE_UNLOCK(adapter);
5006 }
5007
5008 static void
5009 igb_setup_vlan_hw_support(struct adapter *adapter)
5010 {
5011         struct e1000_hw *hw = &adapter->hw;
5012         struct ifnet    *ifp = adapter->ifp;
5013         u32             reg;
5014
5015         if (adapter->vf_ifp) {
5016                 e1000_rlpml_set_vf(hw,
5017                     adapter->max_frame_size + VLAN_TAG_SIZE);
5018                 return;
5019         }
5020
5021         reg = E1000_READ_REG(hw, E1000_CTRL);
5022         reg |= E1000_CTRL_VME;
5023         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5024
5025         /* Enable the Filter Table */
5026         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5027                 reg = E1000_READ_REG(hw, E1000_RCTL);
5028                 reg &= ~E1000_RCTL_CFIEN;
5029                 reg |= E1000_RCTL_VFE;
5030                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5031         }
5032
5033         /* Update the frame size */
5034         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5035             adapter->max_frame_size + VLAN_TAG_SIZE);
5036
5037         /* Don't bother with table if no vlans */
5038         if ((adapter->num_vlans == 0) ||
5039             ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5040                 return;
5041         /*
5042         ** A soft reset zero's out the VFTA, so
5043         ** we need to repopulate it now.
5044         */
5045         for (int i = 0; i < IGB_VFTA_SIZE; i++)
5046                 if (adapter->shadow_vfta[i] != 0) {
5047                         if (adapter->vf_ifp)
5048                                 e1000_vfta_set_vf(hw,
5049                                     adapter->shadow_vfta[i], TRUE);
5050                         else
5051                                 e1000_write_vfta(hw,
5052                                     i, adapter->shadow_vfta[i]);
5053                 }
5054 }
5055
5056 static void
5057 igb_enable_intr(struct adapter *adapter)
5058 {
5059         /* With RSS set up what to auto clear */
5060         if (adapter->msix_mem) {
5061                 u32 mask = (adapter->que_mask | adapter->link_mask);
5062                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5063                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5064                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5065                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5066                     E1000_IMS_LSC);
5067         } else {
5068                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5069                     IMS_ENABLE_MASK);
5070         }
5071         E1000_WRITE_FLUSH(&adapter->hw);
5072
5073         return;
5074 }
5075
5076 static void
5077 igb_disable_intr(struct adapter *adapter)
5078 {
5079         if (adapter->msix_mem) {
5080                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5081                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5082         } 
5083         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5084         E1000_WRITE_FLUSH(&adapter->hw);
5085         return;
5086 }
5087
5088 /*
5089  * Bit of a misnomer, what this really means is
5090  * to enable OS management of the system... aka
5091  * to disable special hardware management features 
5092  */
5093 static void
5094 igb_init_manageability(struct adapter *adapter)
5095 {
5096         if (adapter->has_manage) {
5097                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5098                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5099
5100                 /* disable hardware interception of ARP */
5101                 manc &= ~(E1000_MANC_ARP_EN);
5102
5103                 /* enable receiving management packets to the host */
5104                 manc |= E1000_MANC_EN_MNG2HOST;
5105                 manc2h |= 1 << 5;  /* Mng Port 623 */
5106                 manc2h |= 1 << 6;  /* Mng Port 664 */
5107                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5108                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5109         }
5110 }
5111
5112 /*
5113  * Give control back to hardware management
5114  * controller if there is one.
5115  */
5116 static void
5117 igb_release_manageability(struct adapter *adapter)
5118 {
5119         if (adapter->has_manage) {
5120                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5121
5122                 /* re-enable hardware interception of ARP */
5123                 manc |= E1000_MANC_ARP_EN;
5124                 manc &= ~E1000_MANC_EN_MNG2HOST;
5125
5126                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5127         }
5128 }
5129
5130 /*
5131  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5132  * For ASF and Pass Through versions of f/w this means that
5133  * the driver is loaded. 
5134  *
5135  */
5136 static void
5137 igb_get_hw_control(struct adapter *adapter)
5138 {
5139         u32 ctrl_ext;
5140
5141         if (adapter->vf_ifp)
5142                 return;
5143
5144         /* Let firmware know the driver has taken over */
5145         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5146         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5147             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5148 }
5149
5150 /*
5151  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5152  * For ASF and Pass Through versions of f/w this means that the
5153  * driver is no longer loaded.
5154  *
5155  */
5156 static void
5157 igb_release_hw_control(struct adapter *adapter)
5158 {
5159         u32 ctrl_ext;
5160
5161         if (adapter->vf_ifp)
5162                 return;
5163
5164         /* Let firmware taken over control of h/w */
5165         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5166         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5167             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5168 }
5169
5170 static int
5171 igb_is_valid_ether_addr(uint8_t *addr)
5172 {
5173         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5174
5175         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5176                 return (FALSE);
5177         }
5178
5179         return (TRUE);
5180 }
5181
5182
5183 /*
5184  * Enable PCI Wake On Lan capability
5185  */
5186 static void
5187 igb_enable_wakeup(device_t dev)
5188 {
5189         u16     cap, status;
5190         u8      id;
5191
5192         /* First find the capabilities pointer*/
5193         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5194         /* Read the PM Capabilities */
5195         id = pci_read_config(dev, cap, 1);
5196         if (id != PCIY_PMG)     /* Something wrong */
5197                 return;
5198         /* OK, we have the power capabilities, so
5199            now get the status register */
5200         cap += PCIR_POWER_STATUS;
5201         status = pci_read_config(dev, cap, 2);
5202         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5203         pci_write_config(dev, cap, status, 2);
5204         return;
5205 }
5206
5207 static void
5208 igb_led_func(void *arg, int onoff)
5209 {
5210         struct adapter  *adapter = arg;
5211
5212         IGB_CORE_LOCK(adapter);
5213         if (onoff) {
5214                 e1000_setup_led(&adapter->hw);
5215                 e1000_led_on(&adapter->hw);
5216         } else {
5217                 e1000_led_off(&adapter->hw);
5218                 e1000_cleanup_led(&adapter->hw);
5219         }
5220         IGB_CORE_UNLOCK(adapter);
5221 }
5222
5223 /**********************************************************************
5224  *
5225  *  Update the board statistics counters.
5226  *
5227  **********************************************************************/
5228 static void
5229 igb_update_stats_counters(struct adapter *adapter)
5230 {
5231         struct ifnet            *ifp;
5232         struct e1000_hw         *hw = &adapter->hw;
5233         struct e1000_hw_stats   *stats;
5234
5235         /* 
5236         ** The virtual function adapter has only a
5237         ** small controlled set of stats, do only 
5238         ** those and return.
5239         */
5240         if (adapter->vf_ifp) {
5241                 igb_update_vf_stats_counters(adapter);
5242                 return;
5243         }
5244
5245         stats = (struct e1000_hw_stats  *)adapter->stats;
5246
5247         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5248            (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5249                 stats->symerrs +=
5250                     E1000_READ_REG(hw,E1000_SYMERRS);
5251                 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5252         }
5253
5254         stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5255         stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5256         stats->scc += E1000_READ_REG(hw, E1000_SCC);
5257         stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5258
5259         stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5260         stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5261         stats->colc += E1000_READ_REG(hw, E1000_COLC);
5262         stats->dc += E1000_READ_REG(hw, E1000_DC);
5263         stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5264         stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5265         stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5266         /*
5267         ** For watchdog management we need to know if we have been
5268         ** paused during the last interval, so capture that here.
5269         */ 
5270         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5271         stats->xoffrxc += adapter->pause_frames;
5272         stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5273         stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5274         stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5275         stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5276         stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5277         stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5278         stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5279         stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5280         stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5281         stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5282         stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5283         stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5284
5285         /* For the 64-bit byte counters the low dword must be read first. */
5286         /* Both registers clear on the read of the high dword */
5287
5288         stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5289             ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5290         stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5291             ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5292
5293         stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5294         stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5295         stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5296         stats->roc += E1000_READ_REG(hw, E1000_ROC);
5297         stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5298
5299         stats->tor += E1000_READ_REG(hw, E1000_TORH);
5300         stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5301
5302         stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5303         stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5304         stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5305         stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5306         stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5307         stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5308         stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5309         stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5310         stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5311         stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5312
5313         /* Interrupt Counts */
5314
5315         stats->iac += E1000_READ_REG(hw, E1000_IAC);
5316         stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5317         stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5318         stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5319         stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5320         stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5321         stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5322         stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5323         stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5324
5325         /* Host to Card Statistics */
5326
5327         stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5328         stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5329         stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5330         stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5331         stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5332         stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5333         stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5334         stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5335             ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5336         stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5337             ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5338         stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5339         stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5340         stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5341
5342         stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5343         stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5344         stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5345         stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5346         stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5347         stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5348
5349         ifp = adapter->ifp;
5350         ifp->if_collisions = stats->colc;
5351
5352         /* Rx Errors */
5353         ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5354             stats->crcerrs + stats->algnerrc +
5355             stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5356
5357         /* Tx Errors */
5358         ifp->if_oerrors = stats->ecol +
5359             stats->latecol + adapter->watchdog_events;
5360
5361         /* Driver specific counters */
5362         adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5363         adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5364         adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5365         adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5366         adapter->packet_buf_alloc_tx =
5367             ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5368         adapter->packet_buf_alloc_rx =
5369             (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5370 }
5371
5372
5373 /**********************************************************************
5374  *
5375  *  Initialize the VF board statistics counters.
5376  *
5377  **********************************************************************/
5378 static void
5379 igb_vf_init_stats(struct adapter *adapter)
5380 {
5381         struct e1000_hw *hw = &adapter->hw;
5382         struct e1000_vf_stats   *stats;
5383
5384         stats = (struct e1000_vf_stats  *)adapter->stats;
5385         if (stats == NULL)
5386                 return;
5387         stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5388         stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5389         stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5390         stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5391         stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5392 }
5393  
5394 /**********************************************************************
5395  *
5396  *  Update the VF board statistics counters.
5397  *
5398  **********************************************************************/
5399 static void
5400 igb_update_vf_stats_counters(struct adapter *adapter)
5401 {
5402         struct e1000_hw *hw = &adapter->hw;
5403         struct e1000_vf_stats   *stats;
5404
5405         if (adapter->link_speed == 0)
5406                 return;
5407
5408         stats = (struct e1000_vf_stats  *)adapter->stats;
5409
5410         UPDATE_VF_REG(E1000_VFGPRC,
5411             stats->last_gprc, stats->gprc);
5412         UPDATE_VF_REG(E1000_VFGORC,
5413             stats->last_gorc, stats->gorc);
5414         UPDATE_VF_REG(E1000_VFGPTC,
5415             stats->last_gptc, stats->gptc);
5416         UPDATE_VF_REG(E1000_VFGOTC,
5417             stats->last_gotc, stats->gotc);
5418         UPDATE_VF_REG(E1000_VFMPRC,
5419             stats->last_mprc, stats->mprc);
5420 }
5421
5422 /* Export a single 32-bit register via a read-only sysctl. */
5423 static int
5424 igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5425 {
5426         struct adapter *adapter;
5427         u_int val;
5428
5429         adapter = oidp->oid_arg1;
5430         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5431         return (sysctl_handle_int(oidp, &val, 0, req));
5432 }
5433
5434 /*
5435 **  Tuneable interrupt rate handler
5436 */
5437 static int
5438 igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5439 {
5440         struct igb_queue        *que = ((struct igb_queue *)oidp->oid_arg1);
5441         int                     error;
5442         u32                     reg, usec, rate;
5443                         
5444         reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5445         usec = ((reg & 0x7FFC) >> 2);
5446         if (usec > 0)
5447                 rate = 1000000 / usec;
5448         else
5449                 rate = 0;
5450         error = sysctl_handle_int(oidp, &rate, 0, req);
5451         if (error || !req->newptr)
5452                 return error;
5453         return 0;
5454 }
5455
5456 /*
5457  * Add sysctl variables, one per statistic, to the system.
5458  */
5459 static void
5460 igb_add_hw_stats(struct adapter *adapter)
5461 {
5462         device_t dev = adapter->dev;
5463
5464         struct tx_ring *txr = adapter->tx_rings;
5465         struct rx_ring *rxr = adapter->rx_rings;
5466
5467         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5468         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5469         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5470         struct e1000_hw_stats *stats = adapter->stats;
5471
5472         struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5473         struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5474
5475 #define QUEUE_NAME_LEN 32
5476         char namebuf[QUEUE_NAME_LEN];
5477
5478         /* Driver Statistics */
5479         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq", 
5480                         CTLFLAG_RD, &adapter->link_irq, 0,
5481                         "Link MSIX IRQ Handled");
5482         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5483                         CTLFLAG_RD, &adapter->dropped_pkts,
5484                         "Driver dropped packets");
5485         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5486                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5487                         "Driver tx dma failure in xmit");
5488         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5489                         CTLFLAG_RD, &adapter->rx_overruns,
5490                         "RX overruns");
5491         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5492                         CTLFLAG_RD, &adapter->watchdog_events,
5493                         "Watchdog timeouts");
5494
5495         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", 
5496                         CTLFLAG_RD, &adapter->device_control,
5497                         "Device Control Register");
5498         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", 
5499                         CTLFLAG_RD, &adapter->rx_control,
5500                         "Receiver Control Register");
5501         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", 
5502                         CTLFLAG_RD, &adapter->int_mask,
5503                         "Interrupt Mask");
5504         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", 
5505                         CTLFLAG_RD, &adapter->eint_mask,
5506                         "Extended Interrupt Mask");
5507         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", 
5508                         CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5509                         "Transmit Buffer Packet Allocation");
5510         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", 
5511                         CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5512                         "Receive Buffer Packet Allocation");
5513         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5514                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5515                         "Flow Control High Watermark");
5516         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5517                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5518                         "Flow Control Low Watermark");
5519
5520         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5521                 struct lro_ctrl *lro = &rxr->lro;
5522
5523                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5524                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5525                                             CTLFLAG_RD, NULL, "Queue Name");
5526                 queue_list = SYSCTL_CHILDREN(queue_node);
5527
5528                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", 
5529                                 CTLFLAG_RD, &adapter->queues[i],
5530                                 sizeof(&adapter->queues[i]),
5531                                 igb_sysctl_interrupt_rate_handler,
5532                                 "IU", "Interrupt Rate");
5533
5534                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5535                                 CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5536                                 igb_sysctl_reg_handler, "IU",
5537                                 "Transmit Descriptor Head");
5538                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5539                                 CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5540                                 igb_sysctl_reg_handler, "IU",
5541                                 "Transmit Descriptor Tail");
5542                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5543                                 CTLFLAG_RD, &txr->no_desc_avail,
5544                                 "Queue No Descriptor Available");
5545                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5546                                 CTLFLAG_RD, &txr->tx_packets,
5547                                 "Queue Packets Transmitted");
5548
5549                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5550                                 CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5551                                 igb_sysctl_reg_handler, "IU",
5552                                 "Receive Descriptor Head");
5553                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5554                                 CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5555                                 igb_sysctl_reg_handler, "IU",
5556                                 "Receive Descriptor Tail");
5557                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5558                                 CTLFLAG_RD, &rxr->rx_packets,
5559                                 "Queue Packets Received");
5560                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5561                                 CTLFLAG_RD, &rxr->rx_bytes,
5562                                 "Queue Bytes Received");
5563                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5564                                 CTLFLAG_RD, &lro->lro_queued, 0,
5565                                 "LRO Queued");
5566                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5567                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5568                                 "LRO Flushed");
5569         }
5570
5571         /* MAC stats get their own sub node */
5572
5573         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5574                                     CTLFLAG_RD, NULL, "MAC Statistics");
5575         stat_list = SYSCTL_CHILDREN(stat_node);
5576
5577         /*
5578         ** VF adapter has a very limited set of stats
5579         ** since its not managing the metal, so to speak.
5580         */
5581         if (adapter->vf_ifp) {
5582         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5583                         CTLFLAG_RD, &stats->gprc,
5584                         "Good Packets Received");
5585         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5586                         CTLFLAG_RD, &stats->gptc,
5587                         "Good Packets Transmitted");
5588         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5589                         CTLFLAG_RD, &stats->gorc, 
5590                         "Good Octets Received"); 
5591         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5592                         CTLFLAG_RD, &stats->gotc, 
5593                         "Good Octets Transmitted"); 
5594         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5595                         CTLFLAG_RD, &stats->mprc,
5596                         "Multicast Packets Received");
5597                 return;
5598         }
5599
5600         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
5601                         CTLFLAG_RD, &stats->ecol,
5602                         "Excessive collisions");
5603         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
5604                         CTLFLAG_RD, &stats->scc,
5605                         "Single collisions");
5606         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
5607                         CTLFLAG_RD, &stats->mcc,
5608                         "Multiple collisions");
5609         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
5610                         CTLFLAG_RD, &stats->latecol,
5611                         "Late collisions");
5612         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
5613                         CTLFLAG_RD, &stats->colc,
5614                         "Collision Count");
5615         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5616                         CTLFLAG_RD, &stats->symerrs,
5617                         "Symbol Errors");
5618         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5619                         CTLFLAG_RD, &stats->sec,
5620                         "Sequence Errors");
5621         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5622                         CTLFLAG_RD, &stats->dc,
5623                         "Defer Count");
5624         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5625                         CTLFLAG_RD, &stats->mpc,
5626                         "Missed Packets");
5627         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5628                         CTLFLAG_RD, &stats->rnbc,
5629                         "Receive No Buffers");
5630         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5631                         CTLFLAG_RD, &stats->ruc,
5632                         "Receive Undersize");
5633         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5634                         CTLFLAG_RD, &stats->rfc,
5635                         "Fragmented Packets Received ");
5636         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5637                         CTLFLAG_RD, &stats->roc,
5638                         "Oversized Packets Received");
5639         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5640                         CTLFLAG_RD, &stats->rjc,
5641                         "Recevied Jabber");
5642         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5643                         CTLFLAG_RD, &stats->rxerrc,
5644                         "Receive Errors");
5645         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5646                         CTLFLAG_RD, &stats->crcerrs,
5647                         "CRC errors");
5648         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5649                         CTLFLAG_RD, &stats->algnerrc,
5650                         "Alignment Errors");
5651         /* On 82575 these are collision counts */
5652         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5653                         CTLFLAG_RD, &stats->cexterr,
5654                         "Collision/Carrier extension errors");
5655         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5656                         CTLFLAG_RD, &stats->xonrxc,
5657                         "XON Received");
5658         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5659                         CTLFLAG_RD, &stats->xontxc,
5660                         "XON Transmitted");
5661         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5662                         CTLFLAG_RD, &stats->xoffrxc,
5663                         "XOFF Received");
5664         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5665                         CTLFLAG_RD, &stats->xofftxc,
5666                         "XOFF Transmitted");
5667         /* Packet Reception Stats */
5668         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5669                         CTLFLAG_RD, &stats->tpr,
5670                         "Total Packets Received ");
5671         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5672                         CTLFLAG_RD, &stats->gprc,
5673                         "Good Packets Received");
5674         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5675                         CTLFLAG_RD, &stats->bprc,
5676                         "Broadcast Packets Received");
5677         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5678                         CTLFLAG_RD, &stats->mprc,
5679                         "Multicast Packets Received");
5680         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5681                         CTLFLAG_RD, &stats->prc64,
5682                         "64 byte frames received ");
5683         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5684                         CTLFLAG_RD, &stats->prc127,
5685                         "65-127 byte frames received");
5686         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5687                         CTLFLAG_RD, &stats->prc255,
5688                         "128-255 byte frames received");
5689         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5690                         CTLFLAG_RD, &stats->prc511,
5691                         "256-511 byte frames received");
5692         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5693                         CTLFLAG_RD, &stats->prc1023,
5694                         "512-1023 byte frames received");
5695         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5696                         CTLFLAG_RD, &stats->prc1522,
5697                         "1023-1522 byte frames received");
5698         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5699                         CTLFLAG_RD, &stats->gorc, 
5700                         "Good Octets Received"); 
5701
5702         /* Packet Transmission Stats */
5703         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5704                         CTLFLAG_RD, &stats->gotc, 
5705                         "Good Octets Transmitted"); 
5706         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5707                         CTLFLAG_RD, &stats->tpt,
5708                         "Total Packets Transmitted");
5709         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5710                         CTLFLAG_RD, &stats->gptc,
5711                         "Good Packets Transmitted");
5712         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5713                         CTLFLAG_RD, &stats->bptc,
5714                         "Broadcast Packets Transmitted");
5715         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5716                         CTLFLAG_RD, &stats->mptc,
5717                         "Multicast Packets Transmitted");
5718         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5719                         CTLFLAG_RD, &stats->ptc64,
5720                         "64 byte frames transmitted ");
5721         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5722                         CTLFLAG_RD, &stats->ptc127,
5723                         "65-127 byte frames transmitted");
5724         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5725                         CTLFLAG_RD, &stats->ptc255,
5726                         "128-255 byte frames transmitted");
5727         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5728                         CTLFLAG_RD, &stats->ptc511,
5729                         "256-511 byte frames transmitted");
5730         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5731                         CTLFLAG_RD, &stats->ptc1023,
5732                         "512-1023 byte frames transmitted");
5733         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5734                         CTLFLAG_RD, &stats->ptc1522,
5735                         "1024-1522 byte frames transmitted");
5736         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5737                         CTLFLAG_RD, &stats->tsctc,
5738                         "TSO Contexts Transmitted");
5739         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5740                         CTLFLAG_RD, &stats->tsctfc,
5741                         "TSO Contexts Failed");
5742
5743
5744         /* Interrupt Stats */
5745
5746         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5747                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5748         int_list = SYSCTL_CHILDREN(int_node);
5749
5750         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5751                         CTLFLAG_RD, &stats->iac,
5752                         "Interrupt Assertion Count");
5753
5754         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5755                         CTLFLAG_RD, &stats->icrxptc,
5756                         "Interrupt Cause Rx Pkt Timer Expire Count");
5757
5758         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5759                         CTLFLAG_RD, &stats->icrxatc,
5760                         "Interrupt Cause Rx Abs Timer Expire Count");
5761
5762         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5763                         CTLFLAG_RD, &stats->ictxptc,
5764                         "Interrupt Cause Tx Pkt Timer Expire Count");
5765
5766         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5767                         CTLFLAG_RD, &stats->ictxatc,
5768                         "Interrupt Cause Tx Abs Timer Expire Count");
5769
5770         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5771                         CTLFLAG_RD, &stats->ictxqec,
5772                         "Interrupt Cause Tx Queue Empty Count");
5773
5774         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5775                         CTLFLAG_RD, &stats->ictxqmtc,
5776                         "Interrupt Cause Tx Queue Min Thresh Count");
5777
5778         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5779                         CTLFLAG_RD, &stats->icrxdmtc,
5780                         "Interrupt Cause Rx Desc Min Thresh Count");
5781
5782         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5783                         CTLFLAG_RD, &stats->icrxoc,
5784                         "Interrupt Cause Receiver Overrun Count");
5785
5786         /* Host to Card Stats */
5787
5788         host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", 
5789                                     CTLFLAG_RD, NULL, 
5790                                     "Host to Card Statistics");
5791
5792         host_list = SYSCTL_CHILDREN(host_node);
5793
5794         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5795                         CTLFLAG_RD, &stats->cbtmpc,
5796                         "Circuit Breaker Tx Packet Count");
5797
5798         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5799                         CTLFLAG_RD, &stats->htdpmc,
5800                         "Host Transmit Discarded Packets");
5801
5802         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5803                         CTLFLAG_RD, &stats->rpthc,
5804                         "Rx Packets To Host");
5805
5806         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5807                         CTLFLAG_RD, &stats->cbrmpc,
5808                         "Circuit Breaker Rx Packet Count");
5809
5810         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5811                         CTLFLAG_RD, &stats->cbrdpc,
5812                         "Circuit Breaker Rx Dropped Count");
5813
5814         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5815                         CTLFLAG_RD, &stats->hgptc,
5816                         "Host Good Packets Tx Count");
5817
5818         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5819                         CTLFLAG_RD, &stats->htcbdpc,
5820                         "Host Tx Circuit Breaker Dropped Count");
5821
5822         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5823                         CTLFLAG_RD, &stats->hgorc,
5824                         "Host Good Octets Received Count");
5825
5826         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5827                         CTLFLAG_RD, &stats->hgotc,
5828                         "Host Good Octets Transmit Count");
5829
5830         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5831                         CTLFLAG_RD, &stats->lenerrs,
5832                         "Length Errors");
5833
5834         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5835                         CTLFLAG_RD, &stats->scvpc,
5836                         "SerDes/SGMII Code Violation Pkt Count");
5837
5838         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5839                         CTLFLAG_RD, &stats->hrmpc,
5840                         "Header Redirection Missed Packet Count");
5841 }
5842
5843
5844 /**********************************************************************
5845  *
5846  *  This routine provides a way to dump out the adapter eeprom,
5847  *  often a useful debug/service tool. This only dumps the first
5848  *  32 words, stuff that matters is in that extent.
5849  *
5850  **********************************************************************/
5851 static int
5852 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5853 {
5854         struct adapter *adapter;
5855         int error;
5856         int result;
5857
5858         result = -1;
5859         error = sysctl_handle_int(oidp, &result, 0, req);
5860
5861         if (error || !req->newptr)
5862                 return (error);
5863
5864         /*
5865          * This value will cause a hex dump of the
5866          * first 32 16-bit words of the EEPROM to
5867          * the screen.
5868          */
5869         if (result == 1) {
5870                 adapter = (struct adapter *)arg1;
5871                 igb_print_nvm_info(adapter);
5872         }
5873
5874         return (error);
5875 }
5876
5877 static void
5878 igb_print_nvm_info(struct adapter *adapter)
5879 {
5880         u16     eeprom_data;
5881         int     i, j, row = 0;
5882
5883         /* Its a bit crude, but it gets the job done */
5884         printf("\nInterface EEPROM Dump:\n");
5885         printf("Offset\n0x0000  ");
5886         for (i = 0, j = 0; i < 32; i++, j++) {
5887                 if (j == 8) { /* Make the offset block */
5888                         j = 0; ++row;
5889                         printf("\n0x00%x0  ",row);
5890                 }
5891                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5892                 printf("%04x ", eeprom_data);
5893         }
5894         printf("\n");
5895 }
5896
5897 static void
5898 igb_set_sysctl_value(struct adapter *adapter, const char *name,
5899         const char *description, int *limit, int value)
5900 {
5901         *limit = value;
5902         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5903             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5904             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5905 }
5906
5907 /*
5908 ** Set flow control using sysctl:
5909 ** Flow control values:
5910 **      0 - off
5911 **      1 - rx pause
5912 **      2 - tx pause
5913 **      3 - full
5914 */
5915 static int
5916 igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5917 {
5918         int             error;
5919         static int      input = 3; /* default is full */
5920         struct adapter  *adapter = (struct adapter *) arg1;
5921
5922         error = sysctl_handle_int(oidp, &input, 0, req);
5923
5924         if ((error) || (req->newptr == NULL))
5925                 return (error);
5926
5927         switch (input) {
5928                 case e1000_fc_rx_pause:
5929                 case e1000_fc_tx_pause:
5930                 case e1000_fc_full:
5931                 case e1000_fc_none:
5932                         adapter->hw.fc.requested_mode = input;
5933                         adapter->fc = input;
5934                         break;
5935                 default:
5936                         /* Do nothing */
5937                         return (error);
5938         }
5939
5940         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5941         e1000_force_mac_fc(&adapter->hw);
5942         return (error);
5943 }
5944
5945 /*
5946 ** Manage DMA Coalesce:
5947 ** Control values:
5948 **      0/1 - off/on
5949 **      Legal timer values are:
5950 **      250,500,1000-10000 in thousands
5951 */
5952 static int
5953 igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
5954 {
5955         struct adapter *adapter = (struct adapter *) arg1;
5956         int             error;
5957
5958         error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
5959
5960         if ((error) || (req->newptr == NULL))
5961                 return (error);
5962
5963         switch (adapter->dmac) {
5964                 case 0:
5965                         /*Disabling */
5966                         break;
5967                 case 1: /* Just enable and use default */
5968                         adapter->dmac = 1000;
5969                         break;
5970                 case 250:
5971                 case 500:
5972                 case 1000:
5973                 case 2000:
5974                 case 3000:
5975                 case 4000:
5976                 case 5000:
5977                 case 6000:
5978                 case 7000:
5979                 case 8000:
5980                 case 9000:
5981                 case 10000:
5982                         /* Legal values - allow */
5983                         break;
5984                 default:
5985                         /* Do nothing, illegal value */
5986                         adapter->dmac = 0;
5987                         return (error);
5988         }
5989         /* Reinit the interface */
5990         igb_init(adapter);
5991         return (error);
5992 }
5993
5994 /*
5995 ** Manage Energy Efficient Ethernet:
5996 ** Control values:
5997 **     0/1 - enabled/disabled
5998 */
5999 static int
6000 igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6001 {
6002         struct adapter  *adapter = (struct adapter *) arg1;
6003         int             error, value;
6004
6005         value = adapter->hw.dev_spec._82575.eee_disable;
6006         error = sysctl_handle_int(oidp, &value, 0, req);
6007         if (error || req->newptr == NULL)
6008                 return (error);
6009         IGB_CORE_LOCK(adapter);
6010         adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6011         igb_init_locked(adapter);
6012         IGB_CORE_UNLOCK(adapter);
6013         return (0);
6014 }