]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/e1000/if_igb.c
Update to 20140321
[FreeBSD/FreeBSD.git] / sys / dev / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2013, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38
39 #ifdef HAVE_KERNEL_OPTION_HEADERS
40 #include "opt_device_polling.h"
41 #include "opt_altq.h"
42 #endif
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifndef IGB_LEGACY_TX
47 #include <sys/buf_ring.h>
48 #endif
49 #include <sys/bus.h>
50 #include <sys/endian.h>
51 #include <sys/kernel.h>
52 #include <sys/kthread.h>
53 #include <sys/malloc.h>
54 #include <sys/mbuf.h>
55 #include <sys/module.h>
56 #include <sys/rman.h>
57 #include <sys/socket.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/taskqueue.h>
61 #include <sys/eventhandler.h>
62 #include <sys/pcpu.h>
63 #include <sys/smp.h>
64 #include <machine/smp.h>
65 #include <machine/bus.h>
66 #include <machine/resource.h>
67
68 #include <net/bpf.h>
69 #include <net/ethernet.h>
70 #include <net/if.h>
71 #include <net/if_var.h>
72 #include <net/if_arp.h>
73 #include <net/if_dl.h>
74 #include <net/if_media.h>
75
76 #include <net/if_types.h>
77 #include <net/if_vlan_var.h>
78
79 #include <netinet/in_systm.h>
80 #include <netinet/in.h>
81 #include <netinet/if_ether.h>
82 #include <netinet/ip.h>
83 #include <netinet/ip6.h>
84 #include <netinet/tcp.h>
85 #include <netinet/tcp_lro.h>
86 #include <netinet/udp.h>
87
88 #include <machine/in_cksum.h>
89 #include <dev/led/led.h>
90 #include <dev/pci/pcivar.h>
91 #include <dev/pci/pcireg.h>
92
93 #include "e1000_api.h"
94 #include "e1000_82575.h"
95 #include "if_igb.h"
96
97 /*********************************************************************
98  *  Set this to one to display debug statistics
99  *********************************************************************/
100 int     igb_display_debug_stats = 0;
101
102 /*********************************************************************
103  *  Driver version:
104  *********************************************************************/
105 char igb_driver_version[] = "version - 2.4.0";
106
107
108 /*********************************************************************
109  *  PCI Device ID Table
110  *
111  *  Used by probe to select devices to load on
112  *  Last field stores an index into e1000_strings
113  *  Last entry must be all 0s
114  *
115  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
116  *********************************************************************/
117
118 static igb_vendor_info_t igb_vendor_info_array[] =
119 {
120         { 0x8086, E1000_DEV_ID_82575EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
122                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
124                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82576,           PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82576_NS,        PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82576_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82576_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
131                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
133                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
135                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82576_VF,        PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82580_COPPER,    PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_82580_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_82580_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_82580_SGMII,     PCI_ANY_ID, PCI_ANY_ID, 0},
141         { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
142                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
143         { 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
144                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_DH89XXCC_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,  PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_DH89XXCC_SFP,    PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
149                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_I350_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_I350_FIBER,      PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_I350_SERDES,     PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_I350_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_I350_VF,         PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_I210_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_I210_COPPER_IT,  PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
158                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_I210_COPPER_FLASHLESS,
160                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_I210_SERDES_FLASHLESS,
162                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_I210_FIBER,      PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_I210_SERDES,     PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_I210_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_I211_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_I354_BACKPLANE_1GBPS,
168                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS,
170                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_I354_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
172         /* required last entry */
173         { 0, 0, 0, 0, 0}
174 };
175
176 /*********************************************************************
177  *  Table of branding strings for all supported NICs.
178  *********************************************************************/
179
180 static char *igb_strings[] = {
181         "Intel(R) PRO/1000 Network Connection"
182 };
183
184 /*********************************************************************
185  *  Function prototypes
186  *********************************************************************/
187 static int      igb_probe(device_t);
188 static int      igb_attach(device_t);
189 static int      igb_detach(device_t);
190 static int      igb_shutdown(device_t);
191 static int      igb_suspend(device_t);
192 static int      igb_resume(device_t);
193 #ifndef IGB_LEGACY_TX
194 static int      igb_mq_start(struct ifnet *, struct mbuf *);
195 static int      igb_mq_start_locked(struct ifnet *, struct tx_ring *);
196 static void     igb_qflush(struct ifnet *);
197 static void     igb_deferred_mq_start(void *, int);
198 #else
199 static void     igb_start(struct ifnet *);
200 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
201 #endif
202 static int      igb_ioctl(struct ifnet *, u_long, caddr_t);
203 static void     igb_init(void *);
204 static void     igb_init_locked(struct adapter *);
205 static void     igb_stop(void *);
206 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
207 static int      igb_media_change(struct ifnet *);
208 static void     igb_identify_hardware(struct adapter *);
209 static int      igb_allocate_pci_resources(struct adapter *);
210 static int      igb_allocate_msix(struct adapter *);
211 static int      igb_allocate_legacy(struct adapter *);
212 static int      igb_setup_msix(struct adapter *);
213 static void     igb_free_pci_resources(struct adapter *);
214 static void     igb_local_timer(void *);
215 static void     igb_reset(struct adapter *);
216 static int      igb_setup_interface(device_t, struct adapter *);
217 static int      igb_allocate_queues(struct adapter *);
218 static void     igb_configure_queues(struct adapter *);
219
220 static int      igb_allocate_transmit_buffers(struct tx_ring *);
221 static void     igb_setup_transmit_structures(struct adapter *);
222 static void     igb_setup_transmit_ring(struct tx_ring *);
223 static void     igb_initialize_transmit_units(struct adapter *);
224 static void     igb_free_transmit_structures(struct adapter *);
225 static void     igb_free_transmit_buffers(struct tx_ring *);
226
227 static int      igb_allocate_receive_buffers(struct rx_ring *);
228 static int      igb_setup_receive_structures(struct adapter *);
229 static int      igb_setup_receive_ring(struct rx_ring *);
230 static void     igb_initialize_receive_units(struct adapter *);
231 static void     igb_free_receive_structures(struct adapter *);
232 static void     igb_free_receive_buffers(struct rx_ring *);
233 static void     igb_free_receive_ring(struct rx_ring *);
234
235 static void     igb_enable_intr(struct adapter *);
236 static void     igb_disable_intr(struct adapter *);
237 static void     igb_update_stats_counters(struct adapter *);
238 static bool     igb_txeof(struct tx_ring *);
239
240 static __inline void igb_rx_discard(struct rx_ring *, int);
241 static __inline void igb_rx_input(struct rx_ring *,
242                     struct ifnet *, struct mbuf *, u32);
243
244 static bool     igb_rxeof(struct igb_queue *, int, int *);
245 static void     igb_rx_checksum(u32, struct mbuf *, u32);
246 static int      igb_tx_ctx_setup(struct tx_ring *,
247                     struct mbuf *, u32 *, u32 *);
248 static int      igb_tso_setup(struct tx_ring *,
249                     struct mbuf *, u32 *, u32 *);
250 static void     igb_set_promisc(struct adapter *);
251 static void     igb_disable_promisc(struct adapter *);
252 static void     igb_set_multi(struct adapter *);
253 static void     igb_update_link_status(struct adapter *);
254 static void     igb_refresh_mbufs(struct rx_ring *, int);
255
256 static void     igb_register_vlan(void *, struct ifnet *, u16);
257 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
258 static void     igb_setup_vlan_hw_support(struct adapter *);
259
260 static int      igb_xmit(struct tx_ring *, struct mbuf **);
261 static int      igb_dma_malloc(struct adapter *, bus_size_t,
262                     struct igb_dma_alloc *, int);
263 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
264 static int      igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
265 static void     igb_print_nvm_info(struct adapter *);
266 static int      igb_is_valid_ether_addr(u8 *);
267 static void     igb_add_hw_stats(struct adapter *);
268
269 static void     igb_vf_init_stats(struct adapter *);
270 static void     igb_update_vf_stats_counters(struct adapter *);
271
272 /* Management and WOL Support */
273 static void     igb_init_manageability(struct adapter *);
274 static void     igb_release_manageability(struct adapter *);
275 static void     igb_get_hw_control(struct adapter *);
276 static void     igb_release_hw_control(struct adapter *);
277 static void     igb_enable_wakeup(device_t);
278 static void     igb_led_func(void *, int);
279
280 static int      igb_irq_fast(void *);
281 static void     igb_msix_que(void *);
282 static void     igb_msix_link(void *);
283 static void     igb_handle_que(void *context, int pending);
284 static void     igb_handle_link(void *context, int pending);
285 static void     igb_handle_link_locked(struct adapter *);
286
287 static void     igb_set_sysctl_value(struct adapter *, const char *,
288                     const char *, int *, int);
289 static int      igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
290 static int      igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
291 static int      igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
292
293 #ifdef DEVICE_POLLING
294 static poll_handler_t igb_poll;
295 #endif /* POLLING */
296
297 /*********************************************************************
298  *  FreeBSD Device Interface Entry Points
299  *********************************************************************/
300
301 static device_method_t igb_methods[] = {
302         /* Device interface */
303         DEVMETHOD(device_probe, igb_probe),
304         DEVMETHOD(device_attach, igb_attach),
305         DEVMETHOD(device_detach, igb_detach),
306         DEVMETHOD(device_shutdown, igb_shutdown),
307         DEVMETHOD(device_suspend, igb_suspend),
308         DEVMETHOD(device_resume, igb_resume),
309         DEVMETHOD_END
310 };
311
312 static driver_t igb_driver = {
313         "igb", igb_methods, sizeof(struct adapter),
314 };
315
316 static devclass_t igb_devclass;
317 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
318 MODULE_DEPEND(igb, pci, 1, 1, 1);
319 MODULE_DEPEND(igb, ether, 1, 1, 1);
320
321 /*********************************************************************
322  *  Tunable default values.
323  *********************************************************************/
324
325 static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
326
327 /* Descriptor defaults */
328 static int igb_rxd = IGB_DEFAULT_RXD;
329 static int igb_txd = IGB_DEFAULT_TXD;
330 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
331 TUNABLE_INT("hw.igb.txd", &igb_txd);
332 SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
333     "Number of receive descriptors per queue");
334 SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
335     "Number of transmit descriptors per queue");
336
337 /*
338 ** AIM: Adaptive Interrupt Moderation
339 ** which means that the interrupt rate
340 ** is varied over time based on the
341 ** traffic for that interrupt vector
342 */
343 static int igb_enable_aim = TRUE;
344 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
345 SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
346     "Enable adaptive interrupt moderation");
347
348 /*
349  * MSIX should be the default for best performance,
350  * but this allows it to be forced off for testing.
351  */         
352 static int igb_enable_msix = 1;
353 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
354 SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
355     "Enable MSI-X interrupts");
356
357 /*
358 ** Tuneable Interrupt rate
359 */
360 static int igb_max_interrupt_rate = 8000;
361 TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
362 SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
363     &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
364
365 #ifndef IGB_LEGACY_TX
366 /*
367 ** Tuneable number of buffers in the buf-ring (drbr_xxx)
368 */
369 static int igb_buf_ring_size = IGB_BR_SIZE;
370 TUNABLE_INT("hw.igb.buf_ring_size", &igb_buf_ring_size);
371 SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
372     &igb_buf_ring_size, 0, "Size of the bufring");
373 #endif
374
375 /*
376 ** Header split causes the packet header to
377 ** be dma'd to a seperate mbuf from the payload.
378 ** this can have memory alignment benefits. But
379 ** another plus is that small packets often fit
380 ** into the header and thus use no cluster. Its
381 ** a very workload dependent type feature.
382 */
383 static int igb_header_split = FALSE;
384 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
385 SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
386     "Enable receive mbuf header split");
387
388 /*
389 ** This will autoconfigure based on the
390 ** number of CPUs and max supported
391 ** MSIX messages if left at 0.
392 */
393 static int igb_num_queues = 0;
394 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
395 SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
396     "Number of queues to configure, 0 indicates autoconfigure");
397
398 /*
399 ** Global variable to store last used CPU when binding queues
400 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
401 ** queue is bound to a cpu.
402 */
403 static int igb_last_bind_cpu = -1;
404
405 /* How many packets rxeof tries to clean at a time */
406 static int igb_rx_process_limit = 100;
407 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
408 SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
409     &igb_rx_process_limit, 0,
410     "Maximum number of received packets to process at a time, -1 means unlimited");
411
412 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
413 #include <dev/netmap/if_igb_netmap.h>
414 #endif /* DEV_NETMAP */
415 /*********************************************************************
416  *  Device identification routine
417  *
418  *  igb_probe determines if the driver should be loaded on
419  *  adapter based on PCI vendor/device id of the adapter.
420  *
421  *  return BUS_PROBE_DEFAULT on success, positive on failure
422  *********************************************************************/
423
424 static int
425 igb_probe(device_t dev)
426 {
427         char            adapter_name[60];
428         uint16_t        pci_vendor_id = 0;
429         uint16_t        pci_device_id = 0;
430         uint16_t        pci_subvendor_id = 0;
431         uint16_t        pci_subdevice_id = 0;
432         igb_vendor_info_t *ent;
433
434         INIT_DEBUGOUT("igb_probe: begin");
435
436         pci_vendor_id = pci_get_vendor(dev);
437         if (pci_vendor_id != IGB_VENDOR_ID)
438                 return (ENXIO);
439
440         pci_device_id = pci_get_device(dev);
441         pci_subvendor_id = pci_get_subvendor(dev);
442         pci_subdevice_id = pci_get_subdevice(dev);
443
444         ent = igb_vendor_info_array;
445         while (ent->vendor_id != 0) {
446                 if ((pci_vendor_id == ent->vendor_id) &&
447                     (pci_device_id == ent->device_id) &&
448
449                     ((pci_subvendor_id == ent->subvendor_id) ||
450                     (ent->subvendor_id == PCI_ANY_ID)) &&
451
452                     ((pci_subdevice_id == ent->subdevice_id) ||
453                     (ent->subdevice_id == PCI_ANY_ID))) {
454                         sprintf(adapter_name, "%s %s",
455                                 igb_strings[ent->index],
456                                 igb_driver_version);
457                         device_set_desc_copy(dev, adapter_name);
458                         return (BUS_PROBE_DEFAULT);
459                 }
460                 ent++;
461         }
462
463         return (ENXIO);
464 }
465
466 /*********************************************************************
467  *  Device initialization routine
468  *
469  *  The attach entry point is called when the driver is being loaded.
470  *  This routine identifies the type of hardware, allocates all resources
471  *  and initializes the hardware.
472  *
473  *  return 0 on success, positive on failure
474  *********************************************************************/
475
476 static int
477 igb_attach(device_t dev)
478 {
479         struct adapter  *adapter;
480         int             error = 0;
481         u16             eeprom_data;
482
483         INIT_DEBUGOUT("igb_attach: begin");
484
485         if (resource_disabled("igb", device_get_unit(dev))) {
486                 device_printf(dev, "Disabled by device hint\n");
487                 return (ENXIO);
488         }
489
490         adapter = device_get_softc(dev);
491         adapter->dev = adapter->osdep.dev = dev;
492         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
493
494         /* SYSCTL stuff */
495         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
496             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
497             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
498             igb_sysctl_nvm_info, "I", "NVM Information");
499
500         igb_set_sysctl_value(adapter, "enable_aim",
501             "Interrupt Moderation", &adapter->enable_aim,
502             igb_enable_aim);
503
504         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
505             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
506             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
507             adapter, 0, igb_set_flowcntl, "I", "Flow Control");
508
509         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
510
511         /* Determine hardware and mac info */
512         igb_identify_hardware(adapter);
513
514         /* Setup PCI resources */
515         if (igb_allocate_pci_resources(adapter)) {
516                 device_printf(dev, "Allocation of PCI resources failed\n");
517                 error = ENXIO;
518                 goto err_pci;
519         }
520
521         /* Do Shared Code initialization */
522         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
523                 device_printf(dev, "Setup of Shared code failed\n");
524                 error = ENXIO;
525                 goto err_pci;
526         }
527
528         e1000_get_bus_info(&adapter->hw);
529
530         /* Sysctl for limiting the amount of work done in the taskqueue */
531         igb_set_sysctl_value(adapter, "rx_processing_limit",
532             "max number of rx packets to process",
533             &adapter->rx_process_limit, igb_rx_process_limit);
534
535         /*
536          * Validate number of transmit and receive descriptors. It
537          * must not exceed hardware maximum, and must be multiple
538          * of E1000_DBA_ALIGN.
539          */
540         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
541             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
542                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
543                     IGB_DEFAULT_TXD, igb_txd);
544                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
545         } else
546                 adapter->num_tx_desc = igb_txd;
547         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
548             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
549                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
550                     IGB_DEFAULT_RXD, igb_rxd);
551                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
552         } else
553                 adapter->num_rx_desc = igb_rxd;
554
555         adapter->hw.mac.autoneg = DO_AUTO_NEG;
556         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
557         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
558
559         /* Copper options */
560         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
561                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
562                 adapter->hw.phy.disable_polarity_correction = FALSE;
563                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
564         }
565
566         /*
567          * Set the frame limits assuming
568          * standard ethernet sized frames.
569          */
570         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
571
572         /*
573         ** Allocate and Setup Queues
574         */
575         if (igb_allocate_queues(adapter)) {
576                 error = ENOMEM;
577                 goto err_pci;
578         }
579
580         /* Allocate the appropriate stats memory */
581         if (adapter->vf_ifp) {
582                 adapter->stats =
583                     (struct e1000_vf_stats *)malloc(sizeof \
584                     (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
585                 igb_vf_init_stats(adapter);
586         } else
587                 adapter->stats =
588                     (struct e1000_hw_stats *)malloc(sizeof \
589                     (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
590         if (adapter->stats == NULL) {
591                 device_printf(dev, "Can not allocate stats memory\n");
592                 error = ENOMEM;
593                 goto err_late;
594         }
595
596         /* Allocate multicast array memory. */
597         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
598             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
599         if (adapter->mta == NULL) {
600                 device_printf(dev, "Can not allocate multicast setup array\n");
601                 error = ENOMEM;
602                 goto err_late;
603         }
604
605         /* Some adapter-specific advanced features */
606         if (adapter->hw.mac.type >= e1000_i350) {
607                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
608                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
609                     OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
610                     adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
611                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
612                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
613                     OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
614                     adapter, 0, igb_sysctl_eee, "I",
615                     "Disable Energy Efficient Ethernet");
616                 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
617                         if (adapter->hw.mac.type == e1000_i354)
618                                 e1000_set_eee_i354(&adapter->hw);
619                         else
620                                 e1000_set_eee_i350(&adapter->hw);
621                 }
622         }
623
624         /*
625         ** Start from a known state, this is
626         ** important in reading the nvm and
627         ** mac from that.
628         */
629         e1000_reset_hw(&adapter->hw);
630
631         /* Make sure we have a good EEPROM before we read from it */
632         if (((adapter->hw.mac.type != e1000_i210) &&
633             (adapter->hw.mac.type != e1000_i211)) &&
634             (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
635                 /*
636                 ** Some PCI-E parts fail the first check due to
637                 ** the link being in sleep state, call it again,
638                 ** if it fails a second time its a real issue.
639                 */
640                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
641                         device_printf(dev,
642                             "The EEPROM Checksum Is Not Valid\n");
643                         error = EIO;
644                         goto err_late;
645                 }
646         }
647
648         /*
649         ** Copy the permanent MAC address out of the EEPROM
650         */
651         if (e1000_read_mac_addr(&adapter->hw) < 0) {
652                 device_printf(dev, "EEPROM read error while reading MAC"
653                     " address\n");
654                 error = EIO;
655                 goto err_late;
656         }
657         /* Check its sanity */
658         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
659                 device_printf(dev, "Invalid MAC address\n");
660                 error = EIO;
661                 goto err_late;
662         }
663
664         /* Setup OS specific network interface */
665         if (igb_setup_interface(dev, adapter) != 0)
666                 goto err_late;
667
668         /* Now get a good starting state */
669         igb_reset(adapter);
670
671         /* Initialize statistics */
672         igb_update_stats_counters(adapter);
673
674         adapter->hw.mac.get_link_status = 1;
675         igb_update_link_status(adapter);
676
677         /* Indicate SOL/IDER usage */
678         if (e1000_check_reset_block(&adapter->hw))
679                 device_printf(dev,
680                     "PHY reset is blocked due to SOL/IDER session.\n");
681
682         /* Determine if we have to control management hardware */
683         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
684
685         /*
686          * Setup Wake-on-Lan
687          */
688         /* APME bit in EEPROM is mapped to WUC.APME */
689         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
690         if (eeprom_data)
691                 adapter->wol = E1000_WUFC_MAG;
692
693         /* Register for VLAN events */
694         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
695              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
696         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
697              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
698
699         igb_add_hw_stats(adapter);
700
701         /* Tell the stack that the interface is not active */
702         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
703         adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
704
705         adapter->led_dev = led_create(igb_led_func, adapter,
706             device_get_nameunit(dev));
707
708         /* 
709         ** Configure Interrupts
710         */
711         if ((adapter->msix > 1) && (igb_enable_msix))
712                 error = igb_allocate_msix(adapter);
713         else /* MSI or Legacy */
714                 error = igb_allocate_legacy(adapter);
715         if (error)
716                 goto err_late;
717
718 #ifdef DEV_NETMAP
719         igb_netmap_attach(adapter);
720 #endif /* DEV_NETMAP */
721         INIT_DEBUGOUT("igb_attach: end");
722
723         return (0);
724
725 err_late:
726         igb_detach(dev);
727         igb_free_transmit_structures(adapter);
728         igb_free_receive_structures(adapter);
729         igb_release_hw_control(adapter);
730 err_pci:
731         igb_free_pci_resources(adapter);
732         if (adapter->ifp != NULL)
733                 if_free(adapter->ifp);
734         free(adapter->mta, M_DEVBUF);
735         IGB_CORE_LOCK_DESTROY(adapter);
736
737         return (error);
738 }
739
740 /*********************************************************************
741  *  Device removal routine
742  *
743  *  The detach entry point is called when the driver is being removed.
744  *  This routine stops the adapter and deallocates all the resources
745  *  that were allocated for driver operation.
746  *
747  *  return 0 on success, positive on failure
748  *********************************************************************/
749
750 static int
751 igb_detach(device_t dev)
752 {
753         struct adapter  *adapter = device_get_softc(dev);
754         struct ifnet    *ifp = adapter->ifp;
755
756         INIT_DEBUGOUT("igb_detach: begin");
757
758         /* Make sure VLANS are not using driver */
759         if (adapter->ifp->if_vlantrunk != NULL) {
760                 device_printf(dev,"Vlan in use, detach first\n");
761                 return (EBUSY);
762         }
763
764         ether_ifdetach(adapter->ifp);
765
766         if (adapter->led_dev != NULL)
767                 led_destroy(adapter->led_dev);
768
769 #ifdef DEVICE_POLLING
770         if (ifp->if_capenable & IFCAP_POLLING)
771                 ether_poll_deregister(ifp);
772 #endif
773
774         IGB_CORE_LOCK(adapter);
775         adapter->in_detach = 1;
776         igb_stop(adapter);
777         IGB_CORE_UNLOCK(adapter);
778
779         e1000_phy_hw_reset(&adapter->hw);
780
781         /* Give control back to firmware */
782         igb_release_manageability(adapter);
783         igb_release_hw_control(adapter);
784
785         if (adapter->wol) {
786                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
787                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
788                 igb_enable_wakeup(dev);
789         }
790
791         /* Unregister VLAN events */
792         if (adapter->vlan_attach != NULL)
793                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
794         if (adapter->vlan_detach != NULL)
795                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
796
797         callout_drain(&adapter->timer);
798
799 #ifdef DEV_NETMAP
800         netmap_detach(adapter->ifp);
801 #endif /* DEV_NETMAP */
802         igb_free_pci_resources(adapter);
803         bus_generic_detach(dev);
804         if_free(ifp);
805
806         igb_free_transmit_structures(adapter);
807         igb_free_receive_structures(adapter);
808         if (adapter->mta != NULL)
809                 free(adapter->mta, M_DEVBUF);
810
811         IGB_CORE_LOCK_DESTROY(adapter);
812
813         return (0);
814 }
815
816 /*********************************************************************
817  *
818  *  Shutdown entry point
819  *
820  **********************************************************************/
821
822 static int
823 igb_shutdown(device_t dev)
824 {
825         return igb_suspend(dev);
826 }
827
828 /*
829  * Suspend/resume device methods.
830  */
831 static int
832 igb_suspend(device_t dev)
833 {
834         struct adapter *adapter = device_get_softc(dev);
835
836         IGB_CORE_LOCK(adapter);
837
838         igb_stop(adapter);
839
840         igb_release_manageability(adapter);
841         igb_release_hw_control(adapter);
842
843         if (adapter->wol) {
844                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
845                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
846                 igb_enable_wakeup(dev);
847         }
848
849         IGB_CORE_UNLOCK(adapter);
850
851         return bus_generic_suspend(dev);
852 }
853
854 static int
855 igb_resume(device_t dev)
856 {
857         struct adapter *adapter = device_get_softc(dev);
858         struct tx_ring  *txr = adapter->tx_rings;
859         struct ifnet *ifp = adapter->ifp;
860
861         IGB_CORE_LOCK(adapter);
862         igb_init_locked(adapter);
863         igb_init_manageability(adapter);
864
865         if ((ifp->if_flags & IFF_UP) &&
866             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
867                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
868                         IGB_TX_LOCK(txr);
869 #ifndef IGB_LEGACY_TX
870                         /* Process the stack queue only if not depleted */
871                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
872                             !drbr_empty(ifp, txr->br))
873                                 igb_mq_start_locked(ifp, txr);
874 #else
875                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
876                                 igb_start_locked(txr, ifp);
877 #endif
878                         IGB_TX_UNLOCK(txr);
879                 }
880         }
881         IGB_CORE_UNLOCK(adapter);
882
883         return bus_generic_resume(dev);
884 }
885
886
887 #ifdef IGB_LEGACY_TX
888
889 /*********************************************************************
890  *  Transmit entry point
891  *
892  *  igb_start is called by the stack to initiate a transmit.
893  *  The driver will remain in this routine as long as there are
894  *  packets to transmit and transmit resources are available.
895  *  In case resources are not available stack is notified and
896  *  the packet is requeued.
897  **********************************************************************/
898
899 static void
900 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
901 {
902         struct adapter  *adapter = ifp->if_softc;
903         struct mbuf     *m_head;
904
905         IGB_TX_LOCK_ASSERT(txr);
906
907         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
908             IFF_DRV_RUNNING)
909                 return;
910         if (!adapter->link_active)
911                 return;
912
913         /* Call cleanup if number of TX descriptors low */
914         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
915                 igb_txeof(txr);
916
917         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
918                 if (txr->tx_avail <= IGB_MAX_SCATTER) {
919                         txr->queue_status |= IGB_QUEUE_DEPLETED;
920                         break;
921                 }
922                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
923                 if (m_head == NULL)
924                         break;
925                 /*
926                  *  Encapsulation can modify our pointer, and or make it
927                  *  NULL on failure.  In that event, we can't requeue.
928                  */
929                 if (igb_xmit(txr, &m_head)) {
930                         if (m_head != NULL)
931                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
932                         if (txr->tx_avail <= IGB_MAX_SCATTER)
933                                 txr->queue_status |= IGB_QUEUE_DEPLETED;
934                         break;
935                 }
936
937                 /* Send a copy of the frame to the BPF listener */
938                 ETHER_BPF_MTAP(ifp, m_head);
939
940                 /* Set watchdog on */
941                 txr->watchdog_time = ticks;
942                 txr->queue_status |= IGB_QUEUE_WORKING;
943         }
944 }
945  
946 /*
947  * Legacy TX driver routine, called from the
948  * stack, always uses tx[0], and spins for it.
949  * Should not be used with multiqueue tx
950  */
951 static void
952 igb_start(struct ifnet *ifp)
953 {
954         struct adapter  *adapter = ifp->if_softc;
955         struct tx_ring  *txr = adapter->tx_rings;
956
957         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
958                 IGB_TX_LOCK(txr);
959                 igb_start_locked(txr, ifp);
960                 IGB_TX_UNLOCK(txr);
961         }
962         return;
963 }
964
965 #else /* ~IGB_LEGACY_TX */
966
967 /*
968 ** Multiqueue Transmit Entry:
969 **  quick turnaround to the stack
970 **
971 */
972 static int
973 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
974 {
975         struct adapter          *adapter = ifp->if_softc;
976         struct igb_queue        *que;
977         struct tx_ring          *txr;
978         int                     i, err = 0;
979
980         /* Which queue to use */
981         if ((m->m_flags & M_FLOWID) != 0)
982                 i = m->m_pkthdr.flowid % adapter->num_queues;
983         else
984                 i = curcpu % adapter->num_queues;
985         txr = &adapter->tx_rings[i];
986         que = &adapter->queues[i];
987
988         err = drbr_enqueue(ifp, txr->br, m);
989         if (err)
990                 return (err);
991         if (IGB_TX_TRYLOCK(txr)) {
992                 err = igb_mq_start_locked(ifp, txr);
993                 IGB_TX_UNLOCK(txr);
994         } else
995                 taskqueue_enqueue(que->tq, &txr->txq_task);
996
997         return (err);
998 }
999
1000 static int
1001 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1002 {
1003         struct adapter  *adapter = txr->adapter;
1004         struct mbuf     *next;
1005         int             err = 0, enq = 0;
1006
1007         IGB_TX_LOCK_ASSERT(txr);
1008
1009         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
1010             adapter->link_active == 0)
1011                 return (ENETDOWN);
1012
1013
1014         /* Process the queue */
1015         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1016                 if ((err = igb_xmit(txr, &next)) != 0) {
1017                         if (next == NULL) {
1018                                 /* It was freed, move forward */
1019                                 drbr_advance(ifp, txr->br);
1020                         } else {
1021                                 /* 
1022                                  * Still have one left, it may not be
1023                                  * the same since the transmit function
1024                                  * may have changed it.
1025                                  */
1026                                 drbr_putback(ifp, txr->br, next);
1027                         }
1028                         break;
1029                 }
1030                 drbr_advance(ifp, txr->br);
1031                 enq++;
1032                 ifp->if_obytes += next->m_pkthdr.len;
1033                 if (next->m_flags & M_MCAST)
1034                         ifp->if_omcasts++;
1035                 ETHER_BPF_MTAP(ifp, next);
1036                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1037                         break;
1038         }
1039         if (enq > 0) {
1040                 /* Set the watchdog */
1041                 txr->queue_status |= IGB_QUEUE_WORKING;
1042                 txr->watchdog_time = ticks;
1043         }
1044         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1045                 igb_txeof(txr);
1046         if (txr->tx_avail <= IGB_MAX_SCATTER)
1047                 txr->queue_status |= IGB_QUEUE_DEPLETED;
1048         return (err);
1049 }
1050
1051 /*
1052  * Called from a taskqueue to drain queued transmit packets.
1053  */
1054 static void
1055 igb_deferred_mq_start(void *arg, int pending)
1056 {
1057         struct tx_ring *txr = arg;
1058         struct adapter *adapter = txr->adapter;
1059         struct ifnet *ifp = adapter->ifp;
1060
1061         IGB_TX_LOCK(txr);
1062         if (!drbr_empty(ifp, txr->br))
1063                 igb_mq_start_locked(ifp, txr);
1064         IGB_TX_UNLOCK(txr);
1065 }
1066
1067 /*
1068 ** Flush all ring buffers
1069 */
1070 static void
1071 igb_qflush(struct ifnet *ifp)
1072 {
1073         struct adapter  *adapter = ifp->if_softc;
1074         struct tx_ring  *txr = adapter->tx_rings;
1075         struct mbuf     *m;
1076
1077         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1078                 IGB_TX_LOCK(txr);
1079                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1080                         m_freem(m);
1081                 IGB_TX_UNLOCK(txr);
1082         }
1083         if_qflush(ifp);
1084 }
1085 #endif /* ~IGB_LEGACY_TX */
1086
1087 /*********************************************************************
1088  *  Ioctl entry point
1089  *
1090  *  igb_ioctl is called when the user wants to configure the
1091  *  interface.
1092  *
1093  *  return 0 on success, positive on failure
1094  **********************************************************************/
1095
1096 static int
1097 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1098 {
1099         struct adapter  *adapter = ifp->if_softc;
1100         struct ifreq    *ifr = (struct ifreq *)data;
1101 #if defined(INET) || defined(INET6)
1102         struct ifaddr   *ifa = (struct ifaddr *)data;
1103 #endif
1104         bool            avoid_reset = FALSE;
1105         int             error = 0;
1106
1107         if (adapter->in_detach)
1108                 return (error);
1109
1110         switch (command) {
1111         case SIOCSIFADDR:
1112 #ifdef INET
1113                 if (ifa->ifa_addr->sa_family == AF_INET)
1114                         avoid_reset = TRUE;
1115 #endif
1116 #ifdef INET6
1117                 if (ifa->ifa_addr->sa_family == AF_INET6)
1118                         avoid_reset = TRUE;
1119 #endif
1120                 /*
1121                 ** Calling init results in link renegotiation,
1122                 ** so we avoid doing it when possible.
1123                 */
1124                 if (avoid_reset) {
1125                         ifp->if_flags |= IFF_UP;
1126                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1127                                 igb_init(adapter);
1128 #ifdef INET
1129                         if (!(ifp->if_flags & IFF_NOARP))
1130                                 arp_ifinit(ifp, ifa);
1131 #endif
1132                 } else
1133                         error = ether_ioctl(ifp, command, data);
1134                 break;
1135         case SIOCSIFMTU:
1136             {
1137                 int max_frame_size;
1138
1139                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1140
1141                 IGB_CORE_LOCK(adapter);
1142                 max_frame_size = 9234;
1143                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1144                     ETHER_CRC_LEN) {
1145                         IGB_CORE_UNLOCK(adapter);
1146                         error = EINVAL;
1147                         break;
1148                 }
1149
1150                 ifp->if_mtu = ifr->ifr_mtu;
1151                 adapter->max_frame_size =
1152                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1153                 igb_init_locked(adapter);
1154                 IGB_CORE_UNLOCK(adapter);
1155                 break;
1156             }
1157         case SIOCSIFFLAGS:
1158                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1159                     SIOCSIFFLAGS (Set Interface Flags)");
1160                 IGB_CORE_LOCK(adapter);
1161                 if (ifp->if_flags & IFF_UP) {
1162                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1163                                 if ((ifp->if_flags ^ adapter->if_flags) &
1164                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1165                                         igb_disable_promisc(adapter);
1166                                         igb_set_promisc(adapter);
1167                                 }
1168                         } else
1169                                 igb_init_locked(adapter);
1170                 } else
1171                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1172                                 igb_stop(adapter);
1173                 adapter->if_flags = ifp->if_flags;
1174                 IGB_CORE_UNLOCK(adapter);
1175                 break;
1176         case SIOCADDMULTI:
1177         case SIOCDELMULTI:
1178                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1179                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1180                         IGB_CORE_LOCK(adapter);
1181                         igb_disable_intr(adapter);
1182                         igb_set_multi(adapter);
1183 #ifdef DEVICE_POLLING
1184                         if (!(ifp->if_capenable & IFCAP_POLLING))
1185 #endif
1186                                 igb_enable_intr(adapter);
1187                         IGB_CORE_UNLOCK(adapter);
1188                 }
1189                 break;
1190         case SIOCSIFMEDIA:
1191                 /* Check SOL/IDER usage */
1192                 IGB_CORE_LOCK(adapter);
1193                 if (e1000_check_reset_block(&adapter->hw)) {
1194                         IGB_CORE_UNLOCK(adapter);
1195                         device_printf(adapter->dev, "Media change is"
1196                             " blocked due to SOL/IDER session.\n");
1197                         break;
1198                 }
1199                 IGB_CORE_UNLOCK(adapter);
1200         case SIOCGIFMEDIA:
1201                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1202                     SIOCxIFMEDIA (Get/Set Interface Media)");
1203                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1204                 break;
1205         case SIOCSIFCAP:
1206             {
1207                 int mask, reinit;
1208
1209                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1210                 reinit = 0;
1211                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1212 #ifdef DEVICE_POLLING
1213                 if (mask & IFCAP_POLLING) {
1214                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1215                                 error = ether_poll_register(igb_poll, ifp);
1216                                 if (error)
1217                                         return (error);
1218                                 IGB_CORE_LOCK(adapter);
1219                                 igb_disable_intr(adapter);
1220                                 ifp->if_capenable |= IFCAP_POLLING;
1221                                 IGB_CORE_UNLOCK(adapter);
1222                         } else {
1223                                 error = ether_poll_deregister(ifp);
1224                                 /* Enable interrupt even in error case */
1225                                 IGB_CORE_LOCK(adapter);
1226                                 igb_enable_intr(adapter);
1227                                 ifp->if_capenable &= ~IFCAP_POLLING;
1228                                 IGB_CORE_UNLOCK(adapter);
1229                         }
1230                 }
1231 #endif
1232                 if (mask & IFCAP_HWCSUM) {
1233                         ifp->if_capenable ^= IFCAP_HWCSUM;
1234                         reinit = 1;
1235                 }
1236                 if (mask & IFCAP_TSO4) {
1237                         ifp->if_capenable ^= IFCAP_TSO4;
1238                         reinit = 1;
1239                 }
1240                 if (mask & IFCAP_TSO6) {
1241                         ifp->if_capenable ^= IFCAP_TSO6;
1242                         reinit = 1;
1243                 }
1244                 if (mask & IFCAP_VLAN_HWTAGGING) {
1245                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1246                         reinit = 1;
1247                 }
1248                 if (mask & IFCAP_VLAN_HWFILTER) {
1249                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1250                         reinit = 1;
1251                 }
1252                 if (mask & IFCAP_VLAN_HWTSO) {
1253                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1254                         reinit = 1;
1255                 }
1256                 if (mask & IFCAP_LRO) {
1257                         ifp->if_capenable ^= IFCAP_LRO;
1258                         reinit = 1;
1259                 }
1260                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1261                         igb_init(adapter);
1262                 VLAN_CAPABILITIES(ifp);
1263                 break;
1264             }
1265
1266         default:
1267                 error = ether_ioctl(ifp, command, data);
1268                 break;
1269         }
1270
1271         return (error);
1272 }
1273
1274
1275 /*********************************************************************
1276  *  Init entry point
1277  *
1278  *  This routine is used in two ways. It is used by the stack as
1279  *  init entry point in network interface structure. It is also used
1280  *  by the driver as a hw/sw initialization routine to get to a
1281  *  consistent state.
1282  *
1283  *  return 0 on success, positive on failure
1284  **********************************************************************/
1285
1286 static void
1287 igb_init_locked(struct adapter *adapter)
1288 {
1289         struct ifnet    *ifp = adapter->ifp;
1290         device_t        dev = adapter->dev;
1291
1292         INIT_DEBUGOUT("igb_init: begin");
1293
1294         IGB_CORE_LOCK_ASSERT(adapter);
1295
1296         igb_disable_intr(adapter);
1297         callout_stop(&adapter->timer);
1298
1299         /* Get the latest mac address, User can use a LAA */
1300         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1301               ETHER_ADDR_LEN);
1302
1303         /* Put the address into the Receive Address Array */
1304         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1305
1306         igb_reset(adapter);
1307         igb_update_link_status(adapter);
1308
1309         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1310
1311         /* Set hardware offload abilities */
1312         ifp->if_hwassist = 0;
1313         if (ifp->if_capenable & IFCAP_TXCSUM) {
1314                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1315 #if __FreeBSD_version >= 800000
1316                 if (adapter->hw.mac.type == e1000_82576)
1317                         ifp->if_hwassist |= CSUM_SCTP;
1318 #endif
1319         }
1320
1321         if (ifp->if_capenable & IFCAP_TSO)
1322                 ifp->if_hwassist |= CSUM_TSO;
1323
1324         /* Configure for OS presence */
1325         igb_init_manageability(adapter);
1326
1327         /* Prepare transmit descriptors and buffers */
1328         igb_setup_transmit_structures(adapter);
1329         igb_initialize_transmit_units(adapter);
1330
1331         /* Setup Multicast table */
1332         igb_set_multi(adapter);
1333
1334         /*
1335         ** Figure out the desired mbuf pool
1336         ** for doing jumbo/packetsplit
1337         */
1338         if (adapter->max_frame_size <= 2048)
1339                 adapter->rx_mbuf_sz = MCLBYTES;
1340         else if (adapter->max_frame_size <= 4096)
1341                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1342         else
1343                 adapter->rx_mbuf_sz = MJUM9BYTES;
1344
1345         /* Prepare receive descriptors and buffers */
1346         if (igb_setup_receive_structures(adapter)) {
1347                 device_printf(dev, "Could not setup receive structures\n");
1348                 return;
1349         }
1350         igb_initialize_receive_units(adapter);
1351
1352         /* Enable VLAN support */
1353         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1354                 igb_setup_vlan_hw_support(adapter);
1355                                 
1356         /* Don't lose promiscuous settings */
1357         igb_set_promisc(adapter);
1358
1359         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1360         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1361
1362         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1363         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1364
1365         if (adapter->msix > 1) /* Set up queue routing */
1366                 igb_configure_queues(adapter);
1367
1368         /* this clears any pending interrupts */
1369         E1000_READ_REG(&adapter->hw, E1000_ICR);
1370 #ifdef DEVICE_POLLING
1371         /*
1372          * Only enable interrupts if we are not polling, make sure
1373          * they are off otherwise.
1374          */
1375         if (ifp->if_capenable & IFCAP_POLLING)
1376                 igb_disable_intr(adapter);
1377         else
1378 #endif /* DEVICE_POLLING */
1379         {
1380                 igb_enable_intr(adapter);
1381                 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1382         }
1383
1384         /* Set Energy Efficient Ethernet */
1385         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1386                 if (adapter->hw.mac.type == e1000_i354)
1387                         e1000_set_eee_i354(&adapter->hw);
1388                 else
1389                         e1000_set_eee_i350(&adapter->hw);
1390         }
1391 }
1392
1393 static void
1394 igb_init(void *arg)
1395 {
1396         struct adapter *adapter = arg;
1397
1398         IGB_CORE_LOCK(adapter);
1399         igb_init_locked(adapter);
1400         IGB_CORE_UNLOCK(adapter);
1401 }
1402
1403
1404 static void
1405 igb_handle_que(void *context, int pending)
1406 {
1407         struct igb_queue *que = context;
1408         struct adapter *adapter = que->adapter;
1409         struct tx_ring *txr = que->txr;
1410         struct ifnet    *ifp = adapter->ifp;
1411
1412         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1413                 bool    more;
1414
1415                 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1416
1417                 IGB_TX_LOCK(txr);
1418                 igb_txeof(txr);
1419 #ifndef IGB_LEGACY_TX
1420                 /* Process the stack queue only if not depleted */
1421                 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1422                     !drbr_empty(ifp, txr->br))
1423                         igb_mq_start_locked(ifp, txr);
1424 #else
1425                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1426                         igb_start_locked(txr, ifp);
1427 #endif
1428                 IGB_TX_UNLOCK(txr);
1429                 /* Do we need another? */
1430                 if (more) {
1431                         taskqueue_enqueue(que->tq, &que->que_task);
1432                         return;
1433                 }
1434         }
1435
1436 #ifdef DEVICE_POLLING
1437         if (ifp->if_capenable & IFCAP_POLLING)
1438                 return;
1439 #endif
1440         /* Reenable this interrupt */
1441         if (que->eims)
1442                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1443         else
1444                 igb_enable_intr(adapter);
1445 }
1446
1447 /* Deal with link in a sleepable context */
1448 static void
1449 igb_handle_link(void *context, int pending)
1450 {
1451         struct adapter *adapter = context;
1452
1453         IGB_CORE_LOCK(adapter);
1454         igb_handle_link_locked(adapter);
1455         IGB_CORE_UNLOCK(adapter);
1456 }
1457
1458 static void
1459 igb_handle_link_locked(struct adapter *adapter)
1460 {
1461         struct tx_ring  *txr = adapter->tx_rings;
1462         struct ifnet *ifp = adapter->ifp;
1463
1464         IGB_CORE_LOCK_ASSERT(adapter);
1465         adapter->hw.mac.get_link_status = 1;
1466         igb_update_link_status(adapter);
1467         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1468                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1469                         IGB_TX_LOCK(txr);
1470 #ifndef IGB_LEGACY_TX
1471                         /* Process the stack queue only if not depleted */
1472                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1473                             !drbr_empty(ifp, txr->br))
1474                                 igb_mq_start_locked(ifp, txr);
1475 #else
1476                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1477                                 igb_start_locked(txr, ifp);
1478 #endif
1479                         IGB_TX_UNLOCK(txr);
1480                 }
1481         }
1482 }
1483
1484 /*********************************************************************
1485  *
1486  *  MSI/Legacy Deferred
1487  *  Interrupt Service routine  
1488  *
1489  *********************************************************************/
1490 static int
1491 igb_irq_fast(void *arg)
1492 {
1493         struct adapter          *adapter = arg;
1494         struct igb_queue        *que = adapter->queues;
1495         u32                     reg_icr;
1496
1497
1498         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1499
1500         /* Hot eject?  */
1501         if (reg_icr == 0xffffffff)
1502                 return FILTER_STRAY;
1503
1504         /* Definitely not our interrupt.  */
1505         if (reg_icr == 0x0)
1506                 return FILTER_STRAY;
1507
1508         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1509                 return FILTER_STRAY;
1510
1511         /*
1512          * Mask interrupts until the taskqueue is finished running.  This is
1513          * cheap, just assume that it is needed.  This also works around the
1514          * MSI message reordering errata on certain systems.
1515          */
1516         igb_disable_intr(adapter);
1517         taskqueue_enqueue(que->tq, &que->que_task);
1518
1519         /* Link status change */
1520         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1521                 taskqueue_enqueue(que->tq, &adapter->link_task);
1522
1523         if (reg_icr & E1000_ICR_RXO)
1524                 adapter->rx_overruns++;
1525         return FILTER_HANDLED;
1526 }
1527
1528 #ifdef DEVICE_POLLING
1529 #if __FreeBSD_version >= 800000
1530 #define POLL_RETURN_COUNT(a) (a)
1531 static int
1532 #else
1533 #define POLL_RETURN_COUNT(a)
1534 static void
1535 #endif
1536 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1537 {
1538         struct adapter          *adapter = ifp->if_softc;
1539         struct igb_queue        *que;
1540         struct tx_ring          *txr;
1541         u32                     reg_icr, rx_done = 0;
1542         u32                     loop = IGB_MAX_LOOP;
1543         bool                    more;
1544
1545         IGB_CORE_LOCK(adapter);
1546         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1547                 IGB_CORE_UNLOCK(adapter);
1548                 return POLL_RETURN_COUNT(rx_done);
1549         }
1550
1551         if (cmd == POLL_AND_CHECK_STATUS) {
1552                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1553                 /* Link status change */
1554                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1555                         igb_handle_link_locked(adapter);
1556
1557                 if (reg_icr & E1000_ICR_RXO)
1558                         adapter->rx_overruns++;
1559         }
1560         IGB_CORE_UNLOCK(adapter);
1561
1562         for (int i = 0; i < adapter->num_queues; i++) {
1563                 que = &adapter->queues[i];
1564                 txr = que->txr;
1565
1566                 igb_rxeof(que, count, &rx_done);
1567
1568                 IGB_TX_LOCK(txr);
1569                 do {
1570                         more = igb_txeof(txr);
1571                 } while (loop-- && more);
1572 #ifndef IGB_LEGACY_TX
1573                 if (!drbr_empty(ifp, txr->br))
1574                         igb_mq_start_locked(ifp, txr);
1575 #else
1576                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1577                         igb_start_locked(txr, ifp);
1578 #endif
1579                 IGB_TX_UNLOCK(txr);
1580         }
1581
1582         return POLL_RETURN_COUNT(rx_done);
1583 }
1584 #endif /* DEVICE_POLLING */
1585
1586 /*********************************************************************
1587  *
1588  *  MSIX Que Interrupt Service routine
1589  *
1590  **********************************************************************/
1591 static void
1592 igb_msix_que(void *arg)
1593 {
1594         struct igb_queue *que = arg;
1595         struct adapter *adapter = que->adapter;
1596         struct ifnet   *ifp = adapter->ifp;
1597         struct tx_ring *txr = que->txr;
1598         struct rx_ring *rxr = que->rxr;
1599         u32             newitr = 0;
1600         bool            more_rx;
1601
1602         /* Ignore spurious interrupts */
1603         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1604                 return;
1605
1606         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1607         ++que->irqs;
1608
1609         IGB_TX_LOCK(txr);
1610         igb_txeof(txr);
1611 #ifndef IGB_LEGACY_TX
1612         /* Process the stack queue only if not depleted */
1613         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1614             !drbr_empty(ifp, txr->br))
1615                 igb_mq_start_locked(ifp, txr);
1616 #else
1617         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1618                 igb_start_locked(txr, ifp);
1619 #endif
1620         IGB_TX_UNLOCK(txr);
1621
1622         more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1623
1624         if (adapter->enable_aim == FALSE)
1625                 goto no_calc;
1626         /*
1627         ** Do Adaptive Interrupt Moderation:
1628         **  - Write out last calculated setting
1629         **  - Calculate based on average size over
1630         **    the last interval.
1631         */
1632         if (que->eitr_setting)
1633                 E1000_WRITE_REG(&adapter->hw,
1634                     E1000_EITR(que->msix), que->eitr_setting);
1635  
1636         que->eitr_setting = 0;
1637
1638         /* Idle, do nothing */
1639         if ((txr->bytes == 0) && (rxr->bytes == 0))
1640                 goto no_calc;
1641                                 
1642         /* Used half Default if sub-gig */
1643         if (adapter->link_speed != 1000)
1644                 newitr = IGB_DEFAULT_ITR / 2;
1645         else {
1646                 if ((txr->bytes) && (txr->packets))
1647                         newitr = txr->bytes/txr->packets;
1648                 if ((rxr->bytes) && (rxr->packets))
1649                         newitr = max(newitr,
1650                             (rxr->bytes / rxr->packets));
1651                 newitr += 24; /* account for hardware frame, crc */
1652                 /* set an upper boundary */
1653                 newitr = min(newitr, 3000);
1654                 /* Be nice to the mid range */
1655                 if ((newitr > 300) && (newitr < 1200))
1656                         newitr = (newitr / 3);
1657                 else
1658                         newitr = (newitr / 2);
1659         }
1660         newitr &= 0x7FFC;  /* Mask invalid bits */
1661         if (adapter->hw.mac.type == e1000_82575)
1662                 newitr |= newitr << 16;
1663         else
1664                 newitr |= E1000_EITR_CNT_IGNR;
1665                  
1666         /* save for next interrupt */
1667         que->eitr_setting = newitr;
1668
1669         /* Reset state */
1670         txr->bytes = 0;
1671         txr->packets = 0;
1672         rxr->bytes = 0;
1673         rxr->packets = 0;
1674
1675 no_calc:
1676         /* Schedule a clean task if needed*/
1677         if (more_rx)
1678                 taskqueue_enqueue(que->tq, &que->que_task);
1679         else
1680                 /* Reenable this interrupt */
1681                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1682         return;
1683 }
1684
1685
1686 /*********************************************************************
1687  *
1688  *  MSIX Link Interrupt Service routine
1689  *
1690  **********************************************************************/
1691
1692 static void
1693 igb_msix_link(void *arg)
1694 {
1695         struct adapter  *adapter = arg;
1696         u32             icr;
1697
1698         ++adapter->link_irq;
1699         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1700         if (!(icr & E1000_ICR_LSC))
1701                 goto spurious;
1702         igb_handle_link(adapter, 0);
1703
1704 spurious:
1705         /* Rearm */
1706         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1707         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1708         return;
1709 }
1710
1711
1712 /*********************************************************************
1713  *
1714  *  Media Ioctl callback
1715  *
1716  *  This routine is called whenever the user queries the status of
1717  *  the interface using ifconfig.
1718  *
1719  **********************************************************************/
1720 static void
1721 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1722 {
1723         struct adapter *adapter = ifp->if_softc;
1724
1725         INIT_DEBUGOUT("igb_media_status: begin");
1726
1727         IGB_CORE_LOCK(adapter);
1728         igb_update_link_status(adapter);
1729
1730         ifmr->ifm_status = IFM_AVALID;
1731         ifmr->ifm_active = IFM_ETHER;
1732
1733         if (!adapter->link_active) {
1734                 IGB_CORE_UNLOCK(adapter);
1735                 return;
1736         }
1737
1738         ifmr->ifm_status |= IFM_ACTIVE;
1739
1740         switch (adapter->link_speed) {
1741         case 10:
1742                 ifmr->ifm_active |= IFM_10_T;
1743                 break;
1744         case 100:
1745                 /*
1746                 ** Support for 100Mb SFP - these are Fiber 
1747                 ** but the media type appears as serdes
1748                 */
1749                 if (adapter->hw.phy.media_type ==
1750                     e1000_media_type_internal_serdes)
1751                         ifmr->ifm_active |= IFM_100_FX;
1752                 else
1753                         ifmr->ifm_active |= IFM_100_TX;
1754                 break;
1755         case 1000:
1756                 ifmr->ifm_active |= IFM_1000_T;
1757                 break;
1758         case 2500:
1759                 ifmr->ifm_active |= IFM_2500_SX;
1760                 break;
1761         }
1762
1763         if (adapter->link_duplex == FULL_DUPLEX)
1764                 ifmr->ifm_active |= IFM_FDX;
1765         else
1766                 ifmr->ifm_active |= IFM_HDX;
1767
1768         IGB_CORE_UNLOCK(adapter);
1769 }
1770
1771 /*********************************************************************
1772  *
1773  *  Media Ioctl callback
1774  *
1775  *  This routine is called when the user changes speed/duplex using
1776  *  media/mediopt option with ifconfig.
1777  *
1778  **********************************************************************/
1779 static int
1780 igb_media_change(struct ifnet *ifp)
1781 {
1782         struct adapter *adapter = ifp->if_softc;
1783         struct ifmedia  *ifm = &adapter->media;
1784
1785         INIT_DEBUGOUT("igb_media_change: begin");
1786
1787         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1788                 return (EINVAL);
1789
1790         IGB_CORE_LOCK(adapter);
1791         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1792         case IFM_AUTO:
1793                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1794                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1795                 break;
1796         case IFM_1000_LX:
1797         case IFM_1000_SX:
1798         case IFM_1000_T:
1799                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1800                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1801                 break;
1802         case IFM_100_TX:
1803                 adapter->hw.mac.autoneg = FALSE;
1804                 adapter->hw.phy.autoneg_advertised = 0;
1805                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1806                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1807                 else
1808                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1809                 break;
1810         case IFM_10_T:
1811                 adapter->hw.mac.autoneg = FALSE;
1812                 adapter->hw.phy.autoneg_advertised = 0;
1813                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1814                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1815                 else
1816                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1817                 break;
1818         default:
1819                 device_printf(adapter->dev, "Unsupported media type\n");
1820         }
1821
1822         igb_init_locked(adapter);
1823         IGB_CORE_UNLOCK(adapter);
1824
1825         return (0);
1826 }
1827
1828
1829 /*********************************************************************
1830  *
1831  *  This routine maps the mbufs to Advanced TX descriptors.
1832  *  
1833  **********************************************************************/
1834 static int
1835 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1836 {
1837         struct adapter  *adapter = txr->adapter;
1838         u32             olinfo_status = 0, cmd_type_len;
1839         int             i, j, error, nsegs;
1840         int             first;
1841         bool            remap = TRUE;
1842         struct mbuf     *m_head;
1843         bus_dma_segment_t segs[IGB_MAX_SCATTER];
1844         bus_dmamap_t    map;
1845         struct igb_tx_buf *txbuf;
1846         union e1000_adv_tx_desc *txd = NULL;
1847
1848         m_head = *m_headp;
1849
1850         /* Basic descriptor defines */
1851         cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1852             E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1853
1854         if (m_head->m_flags & M_VLANTAG)
1855                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1856
1857         /*
1858          * Important to capture the first descriptor
1859          * used because it will contain the index of
1860          * the one we tell the hardware to report back
1861          */
1862         first = txr->next_avail_desc;
1863         txbuf = &txr->tx_buffers[first];
1864         map = txbuf->map;
1865
1866         /*
1867          * Map the packet for DMA.
1868          */
1869 retry:
1870         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1871             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1872
1873         if (__predict_false(error)) {
1874                 struct mbuf *m;
1875
1876                 switch (error) {
1877                 case EFBIG:
1878                         /* Try it again? - one try */
1879                         if (remap == TRUE) {
1880                                 remap = FALSE;
1881                                 m = m_defrag(*m_headp, M_NOWAIT);
1882                                 if (m == NULL) {
1883                                         adapter->mbuf_defrag_failed++;
1884                                         m_freem(*m_headp);
1885                                         *m_headp = NULL;
1886                                         return (ENOBUFS);
1887                                 }
1888                                 *m_headp = m;
1889                                 goto retry;
1890                         } else
1891                                 return (error);
1892                 case ENOMEM:
1893                         txr->no_tx_dma_setup++;
1894                         return (error);
1895                 default:
1896                         txr->no_tx_dma_setup++;
1897                         m_freem(*m_headp);
1898                         *m_headp = NULL;
1899                         return (error);
1900                 }
1901         }
1902
1903         /* Make certain there are enough descriptors */
1904         if (nsegs > txr->tx_avail - 2) {
1905                 txr->no_desc_avail++;
1906                 bus_dmamap_unload(txr->txtag, map);
1907                 return (ENOBUFS);
1908         }
1909         m_head = *m_headp;
1910
1911         /*
1912         ** Set up the appropriate offload context
1913         ** this will consume the first descriptor
1914         */
1915         error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1916         if (__predict_false(error)) {
1917                 m_freem(*m_headp);
1918                 *m_headp = NULL;
1919                 return (error);
1920         }
1921
1922         /* 82575 needs the queue index added */
1923         if (adapter->hw.mac.type == e1000_82575)
1924                 olinfo_status |= txr->me << 4;
1925
1926         i = txr->next_avail_desc;
1927         for (j = 0; j < nsegs; j++) {
1928                 bus_size_t seglen;
1929                 bus_addr_t segaddr;
1930
1931                 txbuf = &txr->tx_buffers[i];
1932                 txd = &txr->tx_base[i];
1933                 seglen = segs[j].ds_len;
1934                 segaddr = htole64(segs[j].ds_addr);
1935
1936                 txd->read.buffer_addr = segaddr;
1937                 txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1938                     cmd_type_len | seglen);
1939                 txd->read.olinfo_status = htole32(olinfo_status);
1940
1941                 if (++i == txr->num_desc)
1942                         i = 0;
1943         }
1944
1945         txd->read.cmd_type_len |=
1946             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1947         txr->tx_avail -= nsegs;
1948         txr->next_avail_desc = i;
1949
1950         txbuf->m_head = m_head;
1951         /*
1952         ** Here we swap the map so the last descriptor,
1953         ** which gets the completion interrupt has the
1954         ** real map, and the first descriptor gets the
1955         ** unused map from this descriptor.
1956         */
1957         txr->tx_buffers[first].map = txbuf->map;
1958         txbuf->map = map;
1959         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1960
1961         /* Set the EOP descriptor that will be marked done */
1962         txbuf = &txr->tx_buffers[first];
1963         txbuf->eop = txd;
1964
1965         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1966             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1967         /*
1968          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1969          * hardware that this frame is available to transmit.
1970          */
1971         ++txr->total_packets;
1972         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1973
1974         return (0);
1975 }
1976 static void
1977 igb_set_promisc(struct adapter *adapter)
1978 {
1979         struct ifnet    *ifp = adapter->ifp;
1980         struct e1000_hw *hw = &adapter->hw;
1981         u32             reg;
1982
1983         if (adapter->vf_ifp) {
1984                 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1985                 return;
1986         }
1987
1988         reg = E1000_READ_REG(hw, E1000_RCTL);
1989         if (ifp->if_flags & IFF_PROMISC) {
1990                 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1991                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1992         } else if (ifp->if_flags & IFF_ALLMULTI) {
1993                 reg |= E1000_RCTL_MPE;
1994                 reg &= ~E1000_RCTL_UPE;
1995                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1996         }
1997 }
1998
1999 static void
2000 igb_disable_promisc(struct adapter *adapter)
2001 {
2002         struct e1000_hw *hw = &adapter->hw;
2003         struct ifnet    *ifp = adapter->ifp;
2004         u32             reg;
2005         int             mcnt = 0;
2006
2007         if (adapter->vf_ifp) {
2008                 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2009                 return;
2010         }
2011         reg = E1000_READ_REG(hw, E1000_RCTL);
2012         reg &=  (~E1000_RCTL_UPE);
2013         if (ifp->if_flags & IFF_ALLMULTI)
2014                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2015         else {
2016                 struct  ifmultiaddr *ifma;
2017 #if __FreeBSD_version < 800000
2018                 IF_ADDR_LOCK(ifp);
2019 #else   
2020                 if_maddr_rlock(ifp);
2021 #endif
2022                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2023                         if (ifma->ifma_addr->sa_family != AF_LINK)
2024                                 continue;
2025                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2026                                 break;
2027                         mcnt++;
2028                 }
2029 #if __FreeBSD_version < 800000
2030                 IF_ADDR_UNLOCK(ifp);
2031 #else
2032                 if_maddr_runlock(ifp);
2033 #endif
2034         }
2035         /* Don't disable if in MAX groups */
2036         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2037                 reg &=  (~E1000_RCTL_MPE);
2038         E1000_WRITE_REG(hw, E1000_RCTL, reg);
2039 }
2040
2041
2042 /*********************************************************************
2043  *  Multicast Update
2044  *
2045  *  This routine is called whenever multicast address list is updated.
2046  *
2047  **********************************************************************/
2048
2049 static void
2050 igb_set_multi(struct adapter *adapter)
2051 {
2052         struct ifnet    *ifp = adapter->ifp;
2053         struct ifmultiaddr *ifma;
2054         u32 reg_rctl = 0;
2055         u8  *mta;
2056
2057         int mcnt = 0;
2058
2059         IOCTL_DEBUGOUT("igb_set_multi: begin");
2060
2061         mta = adapter->mta;
2062         bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2063             MAX_NUM_MULTICAST_ADDRESSES);
2064
2065 #if __FreeBSD_version < 800000
2066         IF_ADDR_LOCK(ifp);
2067 #else
2068         if_maddr_rlock(ifp);
2069 #endif
2070         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2071                 if (ifma->ifma_addr->sa_family != AF_LINK)
2072                         continue;
2073
2074                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2075                         break;
2076
2077                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2078                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2079                 mcnt++;
2080         }
2081 #if __FreeBSD_version < 800000
2082         IF_ADDR_UNLOCK(ifp);
2083 #else
2084         if_maddr_runlock(ifp);
2085 #endif
2086
2087         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2088                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2089                 reg_rctl |= E1000_RCTL_MPE;
2090                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2091         } else
2092                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2093 }
2094
2095
2096 /*********************************************************************
2097  *  Timer routine:
2098  *      This routine checks for link status,
2099  *      updates statistics, and does the watchdog.
2100  *
2101  **********************************************************************/
2102
2103 static void
2104 igb_local_timer(void *arg)
2105 {
2106         struct adapter          *adapter = arg;
2107         device_t                dev = adapter->dev;
2108         struct ifnet            *ifp = adapter->ifp;
2109         struct tx_ring          *txr = adapter->tx_rings;
2110         struct igb_queue        *que = adapter->queues;
2111         int                     hung = 0, busy = 0;
2112
2113
2114         IGB_CORE_LOCK_ASSERT(adapter);
2115
2116         igb_update_link_status(adapter);
2117         igb_update_stats_counters(adapter);
2118
2119         /*
2120         ** Check the TX queues status
2121         **      - central locked handling of OACTIVE
2122         **      - watchdog only if all queues show hung
2123         */
2124         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2125                 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2126                     (adapter->pause_frames == 0))
2127                         ++hung;
2128                 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2129                         ++busy;
2130                 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2131                         taskqueue_enqueue(que->tq, &que->que_task);
2132         }
2133         if (hung == adapter->num_queues)
2134                 goto timeout;
2135         if (busy == adapter->num_queues)
2136                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2137         else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2138             (busy < adapter->num_queues))
2139                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2140
2141         adapter->pause_frames = 0;
2142         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2143 #ifndef DEVICE_POLLING
2144         /* Schedule all queue interrupts - deadlock protection */
2145         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2146 #endif
2147         return;
2148
2149 timeout:
2150         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2151         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2152             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2153             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2154         device_printf(dev,"TX(%d) desc avail = %d,"
2155             "Next TX to Clean = %d\n",
2156             txr->me, txr->tx_avail, txr->next_to_clean);
2157         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2158         adapter->watchdog_events++;
2159         igb_init_locked(adapter);
2160 }
2161
2162 static void
2163 igb_update_link_status(struct adapter *adapter)
2164 {
2165         struct e1000_hw         *hw = &adapter->hw;
2166         struct e1000_fc_info    *fc = &hw->fc;
2167         struct ifnet            *ifp = adapter->ifp;
2168         device_t                dev = adapter->dev;
2169         struct tx_ring          *txr = adapter->tx_rings;
2170         u32                     link_check, thstat, ctrl;
2171         char                    *flowctl = NULL;
2172
2173         link_check = thstat = ctrl = 0;
2174
2175         /* Get the cached link value or read for real */
2176         switch (hw->phy.media_type) {
2177         case e1000_media_type_copper:
2178                 if (hw->mac.get_link_status) {
2179                         /* Do the work to read phy */
2180                         e1000_check_for_link(hw);
2181                         link_check = !hw->mac.get_link_status;
2182                 } else
2183                         link_check = TRUE;
2184                 break;
2185         case e1000_media_type_fiber:
2186                 e1000_check_for_link(hw);
2187                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2188                                  E1000_STATUS_LU);
2189                 break;
2190         case e1000_media_type_internal_serdes:
2191                 e1000_check_for_link(hw);
2192                 link_check = adapter->hw.mac.serdes_has_link;
2193                 break;
2194         /* VF device is type_unknown */
2195         case e1000_media_type_unknown:
2196                 e1000_check_for_link(hw);
2197                 link_check = !hw->mac.get_link_status;
2198                 /* Fall thru */
2199         default:
2200                 break;
2201         }
2202
2203         /* Check for thermal downshift or shutdown */
2204         if (hw->mac.type == e1000_i350) {
2205                 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2206                 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2207         }
2208
2209         /* Get the flow control for display */
2210         switch (fc->current_mode) {
2211         case e1000_fc_rx_pause:
2212                 flowctl = "RX";
2213                 break;  
2214         case e1000_fc_tx_pause:
2215                 flowctl = "TX";
2216                 break;  
2217         case e1000_fc_full:
2218                 flowctl = "Full";
2219                 break;  
2220         case e1000_fc_none:
2221         default:
2222                 flowctl = "None";
2223                 break;  
2224         }
2225
2226         /* Now we check if a transition has happened */
2227         if (link_check && (adapter->link_active == 0)) {
2228                 e1000_get_speed_and_duplex(&adapter->hw, 
2229                     &adapter->link_speed, &adapter->link_duplex);
2230                 if (bootverbose)
2231                         device_printf(dev, "Link is up %d Mbps %s,"
2232                             " Flow Control: %s\n",
2233                             adapter->link_speed,
2234                             ((adapter->link_duplex == FULL_DUPLEX) ?
2235                             "Full Duplex" : "Half Duplex"), flowctl);
2236                 adapter->link_active = 1;
2237                 ifp->if_baudrate = adapter->link_speed * 1000000;
2238                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2239                     (thstat & E1000_THSTAT_LINK_THROTTLE))
2240                         device_printf(dev, "Link: thermal downshift\n");
2241                 /* Delay Link Up for Phy update */
2242                 if (((hw->mac.type == e1000_i210) ||
2243                     (hw->mac.type == e1000_i211)) &&
2244                     (hw->phy.id == I210_I_PHY_ID))
2245                         msec_delay(I210_LINK_DELAY);
2246                 /* Reset if the media type changed. */
2247                 if (hw->dev_spec._82575.media_changed) {
2248                         hw->dev_spec._82575.media_changed = false;
2249                         adapter->flags |= IGB_MEDIA_RESET;
2250                         igb_reset(adapter);
2251                 }       
2252                 /* This can sleep */
2253                 if_link_state_change(ifp, LINK_STATE_UP);
2254         } else if (!link_check && (adapter->link_active == 1)) {
2255                 ifp->if_baudrate = adapter->link_speed = 0;
2256                 adapter->link_duplex = 0;
2257                 if (bootverbose)
2258                         device_printf(dev, "Link is Down\n");
2259                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2260                     (thstat & E1000_THSTAT_PWR_DOWN))
2261                         device_printf(dev, "Link: thermal shutdown\n");
2262                 adapter->link_active = 0;
2263                 /* This can sleep */
2264                 if_link_state_change(ifp, LINK_STATE_DOWN);
2265                 /* Reset queue state */
2266                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2267                         txr->queue_status = IGB_QUEUE_IDLE;
2268         }
2269 }
2270
2271 /*********************************************************************
2272  *
2273  *  This routine disables all traffic on the adapter by issuing a
2274  *  global reset on the MAC and deallocates TX/RX buffers.
2275  *
2276  **********************************************************************/
2277
2278 static void
2279 igb_stop(void *arg)
2280 {
2281         struct adapter  *adapter = arg;
2282         struct ifnet    *ifp = adapter->ifp;
2283         struct tx_ring *txr = adapter->tx_rings;
2284
2285         IGB_CORE_LOCK_ASSERT(adapter);
2286
2287         INIT_DEBUGOUT("igb_stop: begin");
2288
2289         igb_disable_intr(adapter);
2290
2291         callout_stop(&adapter->timer);
2292
2293         /* Tell the stack that the interface is no longer active */
2294         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2295         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2296
2297         /* Disarm watchdog timer. */
2298         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2299                 IGB_TX_LOCK(txr);
2300                 txr->queue_status = IGB_QUEUE_IDLE;
2301                 IGB_TX_UNLOCK(txr);
2302         }
2303
2304         e1000_reset_hw(&adapter->hw);
2305         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2306
2307         e1000_led_off(&adapter->hw);
2308         e1000_cleanup_led(&adapter->hw);
2309 }
2310
2311
2312 /*********************************************************************
2313  *
2314  *  Determine hardware revision.
2315  *
2316  **********************************************************************/
2317 static void
2318 igb_identify_hardware(struct adapter *adapter)
2319 {
2320         device_t dev = adapter->dev;
2321
2322         /* Make sure our PCI config space has the necessary stuff set */
2323         pci_enable_busmaster(dev);
2324         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2325
2326         /* Save off the information about this board */
2327         adapter->hw.vendor_id = pci_get_vendor(dev);
2328         adapter->hw.device_id = pci_get_device(dev);
2329         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2330         adapter->hw.subsystem_vendor_id =
2331             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2332         adapter->hw.subsystem_device_id =
2333             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2334
2335         /* Set MAC type early for PCI setup */
2336         e1000_set_mac_type(&adapter->hw);
2337
2338         /* Are we a VF device? */
2339         if ((adapter->hw.mac.type == e1000_vfadapt) ||
2340             (adapter->hw.mac.type == e1000_vfadapt_i350))
2341                 adapter->vf_ifp = 1;
2342         else
2343                 adapter->vf_ifp = 0;
2344 }
2345
2346 static int
2347 igb_allocate_pci_resources(struct adapter *adapter)
2348 {
2349         device_t        dev = adapter->dev;
2350         int             rid;
2351
2352         rid = PCIR_BAR(0);
2353         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2354             &rid, RF_ACTIVE);
2355         if (adapter->pci_mem == NULL) {
2356                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2357                 return (ENXIO);
2358         }
2359         adapter->osdep.mem_bus_space_tag =
2360             rman_get_bustag(adapter->pci_mem);
2361         adapter->osdep.mem_bus_space_handle =
2362             rman_get_bushandle(adapter->pci_mem);
2363         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2364
2365         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2366
2367         /* This will setup either MSI/X or MSI */
2368         adapter->msix = igb_setup_msix(adapter);
2369         adapter->hw.back = &adapter->osdep;
2370
2371         return (0);
2372 }
2373
2374 /*********************************************************************
2375  *
2376  *  Setup the Legacy or MSI Interrupt handler
2377  *
2378  **********************************************************************/
2379 static int
2380 igb_allocate_legacy(struct adapter *adapter)
2381 {
2382         device_t                dev = adapter->dev;
2383         struct igb_queue        *que = adapter->queues;
2384 #ifndef IGB_LEGACY_TX
2385         struct tx_ring          *txr = adapter->tx_rings;
2386 #endif
2387         int                     error, rid = 0;
2388
2389         /* Turn off all interrupts */
2390         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2391
2392         /* MSI RID is 1 */
2393         if (adapter->msix == 1)
2394                 rid = 1;
2395
2396         /* We allocate a single interrupt resource */
2397         adapter->res = bus_alloc_resource_any(dev,
2398             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2399         if (adapter->res == NULL) {
2400                 device_printf(dev, "Unable to allocate bus resource: "
2401                     "interrupt\n");
2402                 return (ENXIO);
2403         }
2404
2405 #ifndef IGB_LEGACY_TX
2406         TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2407 #endif
2408
2409         /*
2410          * Try allocating a fast interrupt and the associated deferred
2411          * processing contexts.
2412          */
2413         TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2414         /* Make tasklet for deferred link handling */
2415         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2416         que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2417             taskqueue_thread_enqueue, &que->tq);
2418         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2419             device_get_nameunit(adapter->dev));
2420         if ((error = bus_setup_intr(dev, adapter->res,
2421             INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2422             adapter, &adapter->tag)) != 0) {
2423                 device_printf(dev, "Failed to register fast interrupt "
2424                             "handler: %d\n", error);
2425                 taskqueue_free(que->tq);
2426                 que->tq = NULL;
2427                 return (error);
2428         }
2429
2430         return (0);
2431 }
2432
2433
2434 /*********************************************************************
2435  *
2436  *  Setup the MSIX Queue Interrupt handlers: 
2437  *
2438  **********************************************************************/
2439 static int
2440 igb_allocate_msix(struct adapter *adapter)
2441 {
2442         device_t                dev = adapter->dev;
2443         struct igb_queue        *que = adapter->queues;
2444         int                     error, rid, vector = 0;
2445
2446         /* Be sure to start with all interrupts disabled */
2447         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2448         E1000_WRITE_FLUSH(&adapter->hw);
2449
2450         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2451                 rid = vector +1;
2452                 que->res = bus_alloc_resource_any(dev,
2453                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2454                 if (que->res == NULL) {
2455                         device_printf(dev,
2456                             "Unable to allocate bus resource: "
2457                             "MSIX Queue Interrupt\n");
2458                         return (ENXIO);
2459                 }
2460                 error = bus_setup_intr(dev, que->res,
2461                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2462                     igb_msix_que, que, &que->tag);
2463                 if (error) {
2464                         que->res = NULL;
2465                         device_printf(dev, "Failed to register Queue handler");
2466                         return (error);
2467                 }
2468 #if __FreeBSD_version >= 800504
2469                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2470 #endif
2471                 que->msix = vector;
2472                 if (adapter->hw.mac.type == e1000_82575)
2473                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2474                 else
2475                         que->eims = 1 << vector;
2476                 /*
2477                 ** Bind the msix vector, and thus the
2478                 ** rings to the corresponding cpu.
2479                 */
2480                 if (adapter->num_queues > 1) {
2481                         if (igb_last_bind_cpu < 0)
2482                                 igb_last_bind_cpu = CPU_FIRST();
2483                         bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2484                         device_printf(dev,
2485                                 "Bound queue %d to cpu %d\n",
2486                                 i,igb_last_bind_cpu);
2487                         igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2488                 }
2489 #ifndef IGB_LEGACY_TX
2490                 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2491                     que->txr);
2492 #endif
2493                 /* Make tasklet for deferred handling */
2494                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2495                 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2496                     taskqueue_thread_enqueue, &que->tq);
2497                 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2498                     device_get_nameunit(adapter->dev));
2499         }
2500
2501         /* And Link */
2502         rid = vector + 1;
2503         adapter->res = bus_alloc_resource_any(dev,
2504             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2505         if (adapter->res == NULL) {
2506                 device_printf(dev,
2507                     "Unable to allocate bus resource: "
2508                     "MSIX Link Interrupt\n");
2509                 return (ENXIO);
2510         }
2511         if ((error = bus_setup_intr(dev, adapter->res,
2512             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2513             igb_msix_link, adapter, &adapter->tag)) != 0) {
2514                 device_printf(dev, "Failed to register Link handler");
2515                 return (error);
2516         }
2517 #if __FreeBSD_version >= 800504
2518         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2519 #endif
2520         adapter->linkvec = vector;
2521
2522         return (0);
2523 }
2524
2525
2526 static void
2527 igb_configure_queues(struct adapter *adapter)
2528 {
2529         struct  e1000_hw        *hw = &adapter->hw;
2530         struct  igb_queue       *que;
2531         u32                     tmp, ivar = 0, newitr = 0;
2532
2533         /* First turn on RSS capability */
2534         if (adapter->hw.mac.type != e1000_82575)
2535                 E1000_WRITE_REG(hw, E1000_GPIE,
2536                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2537                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2538
2539         /* Turn on MSIX */
2540         switch (adapter->hw.mac.type) {
2541         case e1000_82580:
2542         case e1000_i350:
2543         case e1000_i354:
2544         case e1000_i210:
2545         case e1000_i211:
2546         case e1000_vfadapt:
2547         case e1000_vfadapt_i350:
2548                 /* RX entries */
2549                 for (int i = 0; i < adapter->num_queues; i++) {
2550                         u32 index = i >> 1;
2551                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2552                         que = &adapter->queues[i];
2553                         if (i & 1) {
2554                                 ivar &= 0xFF00FFFF;
2555                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2556                         } else {
2557                                 ivar &= 0xFFFFFF00;
2558                                 ivar |= que->msix | E1000_IVAR_VALID;
2559                         }
2560                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2561                 }
2562                 /* TX entries */
2563                 for (int i = 0; i < adapter->num_queues; i++) {
2564                         u32 index = i >> 1;
2565                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2566                         que = &adapter->queues[i];
2567                         if (i & 1) {
2568                                 ivar &= 0x00FFFFFF;
2569                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2570                         } else {
2571                                 ivar &= 0xFFFF00FF;
2572                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2573                         }
2574                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2575                         adapter->que_mask |= que->eims;
2576                 }
2577
2578                 /* And for the link interrupt */
2579                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2580                 adapter->link_mask = 1 << adapter->linkvec;
2581                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2582                 break;
2583         case e1000_82576:
2584                 /* RX entries */
2585                 for (int i = 0; i < adapter->num_queues; i++) {
2586                         u32 index = i & 0x7; /* Each IVAR has two entries */
2587                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2588                         que = &adapter->queues[i];
2589                         if (i < 8) {
2590                                 ivar &= 0xFFFFFF00;
2591                                 ivar |= que->msix | E1000_IVAR_VALID;
2592                         } else {
2593                                 ivar &= 0xFF00FFFF;
2594                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2595                         }
2596                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2597                         adapter->que_mask |= que->eims;
2598                 }
2599                 /* TX entries */
2600                 for (int i = 0; i < adapter->num_queues; i++) {
2601                         u32 index = i & 0x7; /* Each IVAR has two entries */
2602                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2603                         que = &adapter->queues[i];
2604                         if (i < 8) {
2605                                 ivar &= 0xFFFF00FF;
2606                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2607                         } else {
2608                                 ivar &= 0x00FFFFFF;
2609                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2610                         }
2611                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2612                         adapter->que_mask |= que->eims;
2613                 }
2614
2615                 /* And for the link interrupt */
2616                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2617                 adapter->link_mask = 1 << adapter->linkvec;
2618                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2619                 break;
2620
2621         case e1000_82575:
2622                 /* enable MSI-X support*/
2623                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2624                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2625                 /* Auto-Mask interrupts upon ICR read. */
2626                 tmp |= E1000_CTRL_EXT_EIAME;
2627                 tmp |= E1000_CTRL_EXT_IRCA;
2628                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2629
2630                 /* Queues */
2631                 for (int i = 0; i < adapter->num_queues; i++) {
2632                         que = &adapter->queues[i];
2633                         tmp = E1000_EICR_RX_QUEUE0 << i;
2634                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2635                         que->eims = tmp;
2636                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2637                             i, que->eims);
2638                         adapter->que_mask |= que->eims;
2639                 }
2640
2641                 /* Link */
2642                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2643                     E1000_EIMS_OTHER);
2644                 adapter->link_mask |= E1000_EIMS_OTHER;
2645         default:
2646                 break;
2647         }
2648
2649         /* Set the starting interrupt rate */
2650         if (igb_max_interrupt_rate > 0)
2651                 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2652
2653         if (hw->mac.type == e1000_82575)
2654                 newitr |= newitr << 16;
2655         else
2656                 newitr |= E1000_EITR_CNT_IGNR;
2657
2658         for (int i = 0; i < adapter->num_queues; i++) {
2659                 que = &adapter->queues[i];
2660                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2661         }
2662
2663         return;
2664 }
2665
2666
2667 static void
2668 igb_free_pci_resources(struct adapter *adapter)
2669 {
2670         struct          igb_queue *que = adapter->queues;
2671         device_t        dev = adapter->dev;
2672         int             rid;
2673
2674         /*
2675         ** There is a slight possibility of a failure mode
2676         ** in attach that will result in entering this function
2677         ** before interrupt resources have been initialized, and
2678         ** in that case we do not want to execute the loops below
2679         ** We can detect this reliably by the state of the adapter
2680         ** res pointer.
2681         */
2682         if (adapter->res == NULL)
2683                 goto mem;
2684
2685         /*
2686          * First release all the interrupt resources:
2687          */
2688         for (int i = 0; i < adapter->num_queues; i++, que++) {
2689                 rid = que->msix + 1;
2690                 if (que->tag != NULL) {
2691                         bus_teardown_intr(dev, que->res, que->tag);
2692                         que->tag = NULL;
2693                 }
2694                 if (que->res != NULL)
2695                         bus_release_resource(dev,
2696                             SYS_RES_IRQ, rid, que->res);
2697         }
2698
2699         /* Clean the Legacy or Link interrupt last */
2700         if (adapter->linkvec) /* we are doing MSIX */
2701                 rid = adapter->linkvec + 1;
2702         else
2703                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2704
2705         que = adapter->queues;
2706         if (adapter->tag != NULL) {
2707                 taskqueue_drain(que->tq, &adapter->link_task);
2708                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2709                 adapter->tag = NULL;
2710         }
2711         if (adapter->res != NULL)
2712                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2713
2714         for (int i = 0; i < adapter->num_queues; i++, que++) {
2715                 if (que->tq != NULL) {
2716 #ifndef IGB_LEGACY_TX
2717                         taskqueue_drain(que->tq, &que->txr->txq_task);
2718 #endif
2719                         taskqueue_drain(que->tq, &que->que_task);
2720                         taskqueue_free(que->tq);
2721                 }
2722         }
2723 mem:
2724         if (adapter->msix)
2725                 pci_release_msi(dev);
2726
2727         if (adapter->msix_mem != NULL)
2728                 bus_release_resource(dev, SYS_RES_MEMORY,
2729                     adapter->memrid, adapter->msix_mem);
2730
2731         if (adapter->pci_mem != NULL)
2732                 bus_release_resource(dev, SYS_RES_MEMORY,
2733                     PCIR_BAR(0), adapter->pci_mem);
2734
2735 }
2736
2737 /*
2738  * Setup Either MSI/X or MSI
2739  */
2740 static int
2741 igb_setup_msix(struct adapter *adapter)
2742 {
2743         device_t        dev = adapter->dev;
2744         int             bar, want, queues, msgs, maxqueues;
2745
2746         /* tuneable override */
2747         if (igb_enable_msix == 0)
2748                 goto msi;
2749
2750         /* First try MSI/X */
2751         msgs = pci_msix_count(dev); 
2752         if (msgs == 0)
2753                 goto msi;
2754         /*
2755         ** Some new devices, as with ixgbe, now may
2756         ** use a different BAR, so we need to keep
2757         ** track of which is used.
2758         */
2759         adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2760         bar = pci_read_config(dev, adapter->memrid, 4);
2761         if (bar == 0) /* use next bar */
2762                 adapter->memrid += 4;
2763         adapter->msix_mem = bus_alloc_resource_any(dev,
2764             SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2765         if (adapter->msix_mem == NULL) {
2766                 /* May not be enabled */
2767                 device_printf(adapter->dev,
2768                     "Unable to map MSIX table \n");
2769                 goto msi;
2770         }
2771
2772         /* Figure out a reasonable auto config value */
2773         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2774
2775         /* Manual override */
2776         if (igb_num_queues != 0)
2777                 queues = igb_num_queues;
2778
2779         /* Sanity check based on HW */
2780         switch (adapter->hw.mac.type) {
2781                 case e1000_82575:
2782                         maxqueues = 4;
2783                         break;
2784                 case e1000_82576:
2785                 case e1000_82580:
2786                 case e1000_i350:
2787                 case e1000_i354:
2788                         maxqueues = 8;
2789                         break;
2790                 case e1000_i210:
2791                         maxqueues = 4;
2792                         break;
2793                 case e1000_i211:
2794                         maxqueues = 2;
2795                         break;
2796                 default:  /* VF interfaces */
2797                         maxqueues = 1;
2798                         break;
2799         }
2800         if (queues > maxqueues)
2801                 queues = maxqueues;
2802
2803         /* Manual override */
2804         if (igb_num_queues != 0)
2805                 queues = igb_num_queues;
2806
2807         /*
2808         ** One vector (RX/TX pair) per queue
2809         ** plus an additional for Link interrupt
2810         */
2811         want = queues + 1;
2812         if (msgs >= want)
2813                 msgs = want;
2814         else {
2815                 device_printf(adapter->dev,
2816                     "MSIX Configuration Problem, "
2817                     "%d vectors configured, but %d queues wanted!\n",
2818                     msgs, want);
2819                 goto msi;
2820         }
2821         if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2822                 device_printf(adapter->dev,
2823                     "Using MSIX interrupts with %d vectors\n", msgs);
2824                 adapter->num_queues = queues;
2825                 return (msgs);
2826         }
2827         /*
2828         ** If MSIX alloc failed or provided us with
2829         ** less than needed, free and fall through to MSI
2830         */
2831         pci_release_msi(dev);
2832
2833 msi:
2834         if (adapter->msix_mem != NULL) {
2835                 bus_release_resource(dev, SYS_RES_MEMORY,
2836                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2837                 adapter->msix_mem = NULL;
2838         }
2839         msgs = 1;
2840         if (pci_alloc_msi(dev, &msgs) == 0) {
2841                 device_printf(adapter->dev," Using an MSI interrupt\n");
2842                 return (msgs);
2843         }
2844         device_printf(adapter->dev," Using a Legacy interrupt\n");
2845         return (0);
2846 }
2847
2848 /*********************************************************************
2849  *
2850  *  Initialize the DMA Coalescing feature
2851  *
2852  **********************************************************************/
2853 static void
2854 igb_init_dmac(struct adapter *adapter, u32 pba)
2855 {
2856         device_t        dev = adapter->dev;
2857         struct e1000_hw *hw = &adapter->hw;
2858         u32             dmac, reg = ~E1000_DMACR_DMAC_EN;
2859         u16             hwm;
2860
2861         if (hw->mac.type == e1000_i211)
2862                 return;
2863
2864         if (hw->mac.type > e1000_82580) {
2865
2866                 if (adapter->dmac == 0) { /* Disabling it */
2867                         E1000_WRITE_REG(hw, E1000_DMACR, reg);
2868                         return;
2869                 } else
2870                         device_printf(dev, "DMA Coalescing enabled\n");
2871
2872                 /* Set starting threshold */
2873                 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2874
2875                 hwm = 64 * pba - adapter->max_frame_size / 16;
2876                 if (hwm < 64 * (pba - 6))
2877                         hwm = 64 * (pba - 6);
2878                 reg = E1000_READ_REG(hw, E1000_FCRTC);
2879                 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2880                 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2881                     & E1000_FCRTC_RTH_COAL_MASK);
2882                 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2883
2884
2885                 dmac = pba - adapter->max_frame_size / 512;
2886                 if (dmac < pba - 10)
2887                         dmac = pba - 10;
2888                 reg = E1000_READ_REG(hw, E1000_DMACR);
2889                 reg &= ~E1000_DMACR_DMACTHR_MASK;
2890                 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2891                     & E1000_DMACR_DMACTHR_MASK);
2892
2893                 /* transition to L0x or L1 if available..*/
2894                 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2895
2896                 /* Check if status is 2.5Gb backplane connection
2897                 * before configuration of watchdog timer, which is
2898                 * in msec values in 12.8usec intervals
2899                 * watchdog timer= msec values in 32usec intervals
2900                 * for non 2.5Gb connection
2901                 */
2902                 if (hw->mac.type == e1000_i354) {
2903                         int status = E1000_READ_REG(hw, E1000_STATUS);
2904                         if ((status & E1000_STATUS_2P5_SKU) &&
2905                             (!(status & E1000_STATUS_2P5_SKU_OVER)))
2906                                 reg |= ((adapter->dmac * 5) >> 6);
2907                         else
2908                                 reg |= (adapter->dmac >> 5);
2909                 } else {
2910                         reg |= (adapter->dmac >> 5);
2911                 }
2912
2913                 E1000_WRITE_REG(hw, E1000_DMACR, reg);
2914
2915 #ifdef I210_OBFF_SUPPORT
2916                 /*
2917                  * Set the OBFF Rx threshold to DMA Coalescing Rx
2918                  * threshold - 2KB and enable the feature in the
2919                  * hardware for I210.
2920                  */
2921                 if (hw->mac.type == e1000_i210) {
2922                         int obff = dmac - 2;
2923                         reg = E1000_READ_REG(hw, E1000_DOBFFCTL);
2924                         reg &= ~E1000_DOBFFCTL_OBFFTHR_MASK;
2925                         reg |= (obff & E1000_DOBFFCTL_OBFFTHR_MASK)
2926                             | E1000_DOBFFCTL_EXIT_ACT_MASK;
2927                         E1000_WRITE_REG(hw, E1000_DOBFFCTL, reg);
2928                 }
2929 #endif
2930                 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2931
2932                 /* Set the interval before transition */
2933                 reg = E1000_READ_REG(hw, E1000_DMCTLX);
2934                 if (hw->mac.type == e1000_i350)
2935                         reg |= IGB_DMCTLX_DCFLUSH_DIS;
2936                 /*
2937                 ** in 2.5Gb connection, TTLX unit is 0.4 usec
2938                 ** which is 0x4*2 = 0xA. But delay is still 4 usec
2939                 */
2940                 if (hw->mac.type == e1000_i354) {
2941                         int status = E1000_READ_REG(hw, E1000_STATUS);
2942                         if ((status & E1000_STATUS_2P5_SKU) &&
2943                             (!(status & E1000_STATUS_2P5_SKU_OVER)))
2944                                 reg |= 0xA;
2945                         else
2946                                 reg |= 0x4;
2947                 } else {
2948                         reg |= 0x4;
2949                 }
2950
2951                 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2952
2953                 /* free space in tx packet buffer to wake from DMA coal */
2954                 E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
2955                     (2 * adapter->max_frame_size)) >> 6);
2956
2957                 /* make low power state decision controlled by DMA coal */
2958                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2959                 reg &= ~E1000_PCIEMISC_LX_DECISION;
2960                 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
2961
2962         } else if (hw->mac.type == e1000_82580) {
2963                 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2964                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
2965                     reg & ~E1000_PCIEMISC_LX_DECISION);
2966                 E1000_WRITE_REG(hw, E1000_DMACR, 0);
2967         }
2968 }
2969
2970
2971 /*********************************************************************
2972  *
2973  *  Set up an fresh starting state
2974  *
2975  **********************************************************************/
2976 static void
2977 igb_reset(struct adapter *adapter)
2978 {
2979         device_t        dev = adapter->dev;
2980         struct e1000_hw *hw = &adapter->hw;
2981         struct e1000_fc_info *fc = &hw->fc;
2982         struct ifnet    *ifp = adapter->ifp;
2983         u32             pba = 0;
2984         u16             hwm;
2985
2986         INIT_DEBUGOUT("igb_reset: begin");
2987
2988         /* Let the firmware know the OS is in control */
2989         igb_get_hw_control(adapter);
2990
2991         /*
2992          * Packet Buffer Allocation (PBA)
2993          * Writing PBA sets the receive portion of the buffer
2994          * the remainder is used for the transmit buffer.
2995          */
2996         switch (hw->mac.type) {
2997         case e1000_82575:
2998                 pba = E1000_PBA_32K;
2999                 break;
3000         case e1000_82576:
3001         case e1000_vfadapt:
3002                 pba = E1000_READ_REG(hw, E1000_RXPBS);
3003                 pba &= E1000_RXPBS_SIZE_MASK_82576;
3004                 break;
3005         case e1000_82580:
3006         case e1000_i350:
3007         case e1000_i354:
3008         case e1000_vfadapt_i350:
3009                 pba = E1000_READ_REG(hw, E1000_RXPBS);
3010                 pba = e1000_rxpbs_adjust_82580(pba);
3011                 break;
3012         case e1000_i210:
3013         case e1000_i211:
3014                 pba = E1000_PBA_34K;
3015         default:
3016                 break;
3017         }
3018
3019         /* Special needs in case of Jumbo frames */
3020         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
3021                 u32 tx_space, min_tx, min_rx;
3022                 pba = E1000_READ_REG(hw, E1000_PBA);
3023                 tx_space = pba >> 16;
3024                 pba &= 0xffff;
3025                 min_tx = (adapter->max_frame_size +
3026                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
3027                 min_tx = roundup2(min_tx, 1024);
3028                 min_tx >>= 10;
3029                 min_rx = adapter->max_frame_size;
3030                 min_rx = roundup2(min_rx, 1024);
3031                 min_rx >>= 10;
3032                 if (tx_space < min_tx &&
3033                     ((min_tx - tx_space) < pba)) {
3034                         pba = pba - (min_tx - tx_space);
3035                         /*
3036                          * if short on rx space, rx wins
3037                          * and must trump tx adjustment
3038                          */
3039                         if (pba < min_rx)
3040                                 pba = min_rx;
3041                 }
3042                 E1000_WRITE_REG(hw, E1000_PBA, pba);
3043         }
3044
3045         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3046
3047         /*
3048          * These parameters control the automatic generation (Tx) and
3049          * response (Rx) to Ethernet PAUSE frames.
3050          * - High water mark should allow for at least two frames to be
3051          *   received after sending an XOFF.
3052          * - Low water mark works best when it is very near the high water mark.
3053          *   This allows the receiver to restart by sending XON when it has
3054          *   drained a bit.
3055          */
3056         hwm = min(((pba << 10) * 9 / 10),
3057             ((pba << 10) - 2 * adapter->max_frame_size));
3058
3059         if (hw->mac.type < e1000_82576) {
3060                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3061                 fc->low_water = fc->high_water - 8;
3062         } else {
3063                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3064                 fc->low_water = fc->high_water - 16;
3065         }
3066
3067         fc->pause_time = IGB_FC_PAUSE_TIME;
3068         fc->send_xon = TRUE;
3069         if (adapter->fc)
3070                 fc->requested_mode = adapter->fc;
3071         else
3072                 fc->requested_mode = e1000_fc_default;
3073
3074         /* Issue a global reset */
3075         e1000_reset_hw(hw);
3076         E1000_WRITE_REG(hw, E1000_WUC, 0);
3077
3078         /* Reset for AutoMediaDetect */
3079         if (adapter->flags & IGB_MEDIA_RESET) {
3080                 e1000_setup_init_funcs(hw, TRUE);
3081                 e1000_get_bus_info(hw);
3082                 adapter->flags &= ~IGB_MEDIA_RESET;
3083         }
3084
3085         if (e1000_init_hw(hw) < 0)
3086                 device_printf(dev, "Hardware Initialization Failed\n");
3087
3088         /* Setup DMA Coalescing */
3089         igb_init_dmac(adapter, pba);
3090
3091         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3092         e1000_get_phy_info(hw);
3093         e1000_check_for_link(hw);
3094         return;
3095 }
3096
3097 /*********************************************************************
3098  *
3099  *  Setup networking device structure and register an interface.
3100  *
3101  **********************************************************************/
3102 static int
3103 igb_setup_interface(device_t dev, struct adapter *adapter)
3104 {
3105         struct ifnet   *ifp;
3106
3107         INIT_DEBUGOUT("igb_setup_interface: begin");
3108
3109         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3110         if (ifp == NULL) {
3111                 device_printf(dev, "can not allocate ifnet structure\n");
3112                 return (-1);
3113         }
3114         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3115         ifp->if_init =  igb_init;
3116         ifp->if_softc = adapter;
3117         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3118         ifp->if_ioctl = igb_ioctl;
3119 #ifndef IGB_LEGACY_TX
3120         ifp->if_transmit = igb_mq_start;
3121         ifp->if_qflush = igb_qflush;
3122 #else
3123         ifp->if_start = igb_start;
3124         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3125         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3126         IFQ_SET_READY(&ifp->if_snd);
3127 #endif
3128
3129         ether_ifattach(ifp, adapter->hw.mac.addr);
3130
3131         ifp->if_capabilities = ifp->if_capenable = 0;
3132
3133         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3134         ifp->if_capabilities |= IFCAP_TSO;
3135         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3136         ifp->if_capenable = ifp->if_capabilities;
3137
3138         /* Don't enable LRO by default */
3139         ifp->if_capabilities |= IFCAP_LRO;
3140
3141 #ifdef DEVICE_POLLING
3142         ifp->if_capabilities |= IFCAP_POLLING;
3143 #endif
3144
3145         /*
3146          * Tell the upper layer(s) we
3147          * support full VLAN capability.
3148          */
3149         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3150         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3151                              |  IFCAP_VLAN_HWTSO
3152                              |  IFCAP_VLAN_MTU;
3153         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3154                           |  IFCAP_VLAN_HWTSO
3155                           |  IFCAP_VLAN_MTU;
3156
3157         /*
3158         ** Don't turn this on by default, if vlans are
3159         ** created on another pseudo device (eg. lagg)
3160         ** then vlan events are not passed thru, breaking
3161         ** operation, but with HW FILTER off it works. If
3162         ** using vlans directly on the igb driver you can
3163         ** enable this and get full hardware tag filtering.
3164         */
3165         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3166
3167         /*
3168          * Specify the media types supported by this adapter and register
3169          * callbacks to update media and link information
3170          */
3171         ifmedia_init(&adapter->media, IFM_IMASK,
3172             igb_media_change, igb_media_status);
3173         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3174             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3175                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
3176                             0, NULL);
3177                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3178         } else {
3179                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3180                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3181                             0, NULL);
3182                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3183                             0, NULL);
3184                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3185                             0, NULL);
3186                 if (adapter->hw.phy.type != e1000_phy_ife) {
3187                         ifmedia_add(&adapter->media,
3188                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3189                         ifmedia_add(&adapter->media,
3190                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3191                 }
3192         }
3193         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3194         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3195         return (0);
3196 }
3197
3198
3199 /*
3200  * Manage DMA'able memory.
3201  */
3202 static void
3203 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3204 {
3205         if (error)
3206                 return;
3207         *(bus_addr_t *) arg = segs[0].ds_addr;
3208 }
3209
3210 static int
3211 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3212         struct igb_dma_alloc *dma, int mapflags)
3213 {
3214         int error;
3215
3216         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3217                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
3218                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3219                                 BUS_SPACE_MAXADDR,      /* highaddr */
3220                                 NULL, NULL,             /* filter, filterarg */
3221                                 size,                   /* maxsize */
3222                                 1,                      /* nsegments */
3223                                 size,                   /* maxsegsize */
3224                                 0,                      /* flags */
3225                                 NULL,                   /* lockfunc */
3226                                 NULL,                   /* lockarg */
3227                                 &dma->dma_tag);
3228         if (error) {
3229                 device_printf(adapter->dev,
3230                     "%s: bus_dma_tag_create failed: %d\n",
3231                     __func__, error);
3232                 goto fail_0;
3233         }
3234
3235         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3236             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3237         if (error) {
3238                 device_printf(adapter->dev,
3239                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3240                     __func__, (uintmax_t)size, error);
3241                 goto fail_2;
3242         }
3243
3244         dma->dma_paddr = 0;
3245         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3246             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3247         if (error || dma->dma_paddr == 0) {
3248                 device_printf(adapter->dev,
3249                     "%s: bus_dmamap_load failed: %d\n",
3250                     __func__, error);
3251                 goto fail_3;
3252         }
3253
3254         return (0);
3255
3256 fail_3:
3257         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3258 fail_2:
3259         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3260         bus_dma_tag_destroy(dma->dma_tag);
3261 fail_0:
3262         dma->dma_map = NULL;
3263         dma->dma_tag = NULL;
3264
3265         return (error);
3266 }
3267
3268 static void
3269 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3270 {
3271         if (dma->dma_tag == NULL)
3272                 return;
3273         if (dma->dma_map != NULL) {
3274                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3275                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3276                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3277                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3278                 dma->dma_map = NULL;
3279         }
3280         bus_dma_tag_destroy(dma->dma_tag);
3281         dma->dma_tag = NULL;
3282 }
3283
3284
3285 /*********************************************************************
3286  *
3287  *  Allocate memory for the transmit and receive rings, and then
3288  *  the descriptors associated with each, called only once at attach.
3289  *
3290  **********************************************************************/
3291 static int
3292 igb_allocate_queues(struct adapter *adapter)
3293 {
3294         device_t dev = adapter->dev;
3295         struct igb_queue        *que = NULL;
3296         struct tx_ring          *txr = NULL;
3297         struct rx_ring          *rxr = NULL;
3298         int rsize, tsize, error = E1000_SUCCESS;
3299         int txconf = 0, rxconf = 0;
3300
3301         /* First allocate the top level queue structs */
3302         if (!(adapter->queues =
3303             (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3304             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3305                 device_printf(dev, "Unable to allocate queue memory\n");
3306                 error = ENOMEM;
3307                 goto fail;
3308         }
3309
3310         /* Next allocate the TX ring struct memory */
3311         if (!(adapter->tx_rings =
3312             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3313             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3314                 device_printf(dev, "Unable to allocate TX ring memory\n");
3315                 error = ENOMEM;
3316                 goto tx_fail;
3317         }
3318
3319         /* Now allocate the RX */
3320         if (!(adapter->rx_rings =
3321             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3322             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3323                 device_printf(dev, "Unable to allocate RX ring memory\n");
3324                 error = ENOMEM;
3325                 goto rx_fail;
3326         }
3327
3328         tsize = roundup2(adapter->num_tx_desc *
3329             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3330         /*
3331          * Now set up the TX queues, txconf is needed to handle the
3332          * possibility that things fail midcourse and we need to
3333          * undo memory gracefully
3334          */ 
3335         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3336                 /* Set up some basics */
3337                 txr = &adapter->tx_rings[i];
3338                 txr->adapter = adapter;
3339                 txr->me = i;
3340                 txr->num_desc = adapter->num_tx_desc;
3341
3342                 /* Initialize the TX lock */
3343                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3344                     device_get_nameunit(dev), txr->me);
3345                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3346
3347                 if (igb_dma_malloc(adapter, tsize,
3348                         &txr->txdma, BUS_DMA_NOWAIT)) {
3349                         device_printf(dev,
3350                             "Unable to allocate TX Descriptor memory\n");
3351                         error = ENOMEM;
3352                         goto err_tx_desc;
3353                 }
3354                 txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3355                 bzero((void *)txr->tx_base, tsize);
3356
3357                 /* Now allocate transmit buffers for the ring */
3358                 if (igb_allocate_transmit_buffers(txr)) {
3359                         device_printf(dev,
3360                             "Critical Failure setting up transmit buffers\n");
3361                         error = ENOMEM;
3362                         goto err_tx_desc;
3363                 }
3364 #ifndef IGB_LEGACY_TX
3365                 /* Allocate a buf ring */
3366                 txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3367                     M_WAITOK, &txr->tx_mtx);
3368 #endif
3369         }
3370
3371         /*
3372          * Next the RX queues...
3373          */ 
3374         rsize = roundup2(adapter->num_rx_desc *
3375             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3376         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3377                 rxr = &adapter->rx_rings[i];
3378                 rxr->adapter = adapter;
3379                 rxr->me = i;
3380
3381                 /* Initialize the RX lock */
3382                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3383                     device_get_nameunit(dev), txr->me);
3384                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3385
3386                 if (igb_dma_malloc(adapter, rsize,
3387                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3388                         device_printf(dev,
3389                             "Unable to allocate RxDescriptor memory\n");
3390                         error = ENOMEM;
3391                         goto err_rx_desc;
3392                 }
3393                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3394                 bzero((void *)rxr->rx_base, rsize);
3395
3396                 /* Allocate receive buffers for the ring*/
3397                 if (igb_allocate_receive_buffers(rxr)) {
3398                         device_printf(dev,
3399                             "Critical Failure setting up receive buffers\n");
3400                         error = ENOMEM;
3401                         goto err_rx_desc;
3402                 }
3403         }
3404
3405         /*
3406         ** Finally set up the queue holding structs
3407         */
3408         for (int i = 0; i < adapter->num_queues; i++) {
3409                 que = &adapter->queues[i];
3410                 que->adapter = adapter;
3411                 que->txr = &adapter->tx_rings[i];
3412                 que->rxr = &adapter->rx_rings[i];
3413         }
3414
3415         return (0);
3416
3417 err_rx_desc:
3418         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3419                 igb_dma_free(adapter, &rxr->rxdma);
3420 err_tx_desc:
3421         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3422                 igb_dma_free(adapter, &txr->txdma);
3423         free(adapter->rx_rings, M_DEVBUF);
3424 rx_fail:
3425 #ifndef IGB_LEGACY_TX
3426         buf_ring_free(txr->br, M_DEVBUF);
3427 #endif
3428         free(adapter->tx_rings, M_DEVBUF);
3429 tx_fail:
3430         free(adapter->queues, M_DEVBUF);
3431 fail:
3432         return (error);
3433 }
3434
3435 /*********************************************************************
3436  *
3437  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3438  *  the information needed to transmit a packet on the wire. This is
3439  *  called only once at attach, setup is done every reset.
3440  *
3441  **********************************************************************/
3442 static int
3443 igb_allocate_transmit_buffers(struct tx_ring *txr)
3444 {
3445         struct adapter *adapter = txr->adapter;
3446         device_t dev = adapter->dev;
3447         struct igb_tx_buf *txbuf;
3448         int error, i;
3449
3450         /*
3451          * Setup DMA descriptor areas.
3452          */
3453         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3454                                1, 0,                    /* alignment, bounds */
3455                                BUS_SPACE_MAXADDR,       /* lowaddr */
3456                                BUS_SPACE_MAXADDR,       /* highaddr */
3457                                NULL, NULL,              /* filter, filterarg */
3458                                IGB_TSO_SIZE,            /* maxsize */
3459                                IGB_MAX_SCATTER,         /* nsegments */
3460                                PAGE_SIZE,               /* maxsegsize */
3461                                0,                       /* flags */
3462                                NULL,                    /* lockfunc */
3463                                NULL,                    /* lockfuncarg */
3464                                &txr->txtag))) {
3465                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3466                 goto fail;
3467         }
3468
3469         if (!(txr->tx_buffers =
3470             (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3471             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3472                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3473                 error = ENOMEM;
3474                 goto fail;
3475         }
3476
3477         /* Create the descriptor buffer dma maps */
3478         txbuf = txr->tx_buffers;
3479         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3480                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3481                 if (error != 0) {
3482                         device_printf(dev, "Unable to create TX DMA map\n");
3483                         goto fail;
3484                 }
3485         }
3486
3487         return 0;
3488 fail:
3489         /* We free all, it handles case where we are in the middle */
3490         igb_free_transmit_structures(adapter);
3491         return (error);
3492 }
3493
3494 /*********************************************************************
3495  *
3496  *  Initialize a transmit ring.
3497  *
3498  **********************************************************************/
3499 static void
3500 igb_setup_transmit_ring(struct tx_ring *txr)
3501 {
3502         struct adapter *adapter = txr->adapter;
3503         struct igb_tx_buf *txbuf;
3504         int i;
3505 #ifdef DEV_NETMAP
3506         struct netmap_adapter *na = NA(adapter->ifp);
3507         struct netmap_slot *slot;
3508 #endif /* DEV_NETMAP */
3509
3510         /* Clear the old descriptor contents */
3511         IGB_TX_LOCK(txr);
3512 #ifdef DEV_NETMAP
3513         slot = netmap_reset(na, NR_TX, txr->me, 0);
3514 #endif /* DEV_NETMAP */
3515         bzero((void *)txr->tx_base,
3516               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3517         /* Reset indices */
3518         txr->next_avail_desc = 0;
3519         txr->next_to_clean = 0;
3520
3521         /* Free any existing tx buffers. */
3522         txbuf = txr->tx_buffers;
3523         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3524                 if (txbuf->m_head != NULL) {
3525                         bus_dmamap_sync(txr->txtag, txbuf->map,
3526                             BUS_DMASYNC_POSTWRITE);
3527                         bus_dmamap_unload(txr->txtag, txbuf->map);
3528                         m_freem(txbuf->m_head);
3529                         txbuf->m_head = NULL;
3530                 }
3531 #ifdef DEV_NETMAP
3532                 if (slot) {
3533                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3534                         /* no need to set the address */
3535                         netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3536                 }
3537 #endif /* DEV_NETMAP */
3538                 /* clear the watch index */
3539                 txbuf->eop = NULL;
3540         }
3541
3542         /* Set number of descriptors available */
3543         txr->tx_avail = adapter->num_tx_desc;
3544
3545         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3546             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3547         IGB_TX_UNLOCK(txr);
3548 }
3549
3550 /*********************************************************************
3551  *
3552  *  Initialize all transmit rings.
3553  *
3554  **********************************************************************/
3555 static void
3556 igb_setup_transmit_structures(struct adapter *adapter)
3557 {
3558         struct tx_ring *txr = adapter->tx_rings;
3559
3560         for (int i = 0; i < adapter->num_queues; i++, txr++)
3561                 igb_setup_transmit_ring(txr);
3562
3563         return;
3564 }
3565
3566 /*********************************************************************
3567  *
3568  *  Enable transmit unit.
3569  *
3570  **********************************************************************/
3571 static void
3572 igb_initialize_transmit_units(struct adapter *adapter)
3573 {
3574         struct tx_ring  *txr = adapter->tx_rings;
3575         struct e1000_hw *hw = &adapter->hw;
3576         u32             tctl, txdctl;
3577
3578         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3579         tctl = txdctl = 0;
3580
3581         /* Setup the Tx Descriptor Rings */
3582         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3583                 u64 bus_addr = txr->txdma.dma_paddr;
3584
3585                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3586                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3587                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3588                     (uint32_t)(bus_addr >> 32));
3589                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3590                     (uint32_t)bus_addr);
3591
3592                 /* Setup the HW Tx Head and Tail descriptor pointers */
3593                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3594                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3595
3596                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3597                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3598                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3599
3600                 txr->queue_status = IGB_QUEUE_IDLE;
3601
3602                 txdctl |= IGB_TX_PTHRESH;
3603                 txdctl |= IGB_TX_HTHRESH << 8;
3604                 txdctl |= IGB_TX_WTHRESH << 16;
3605                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3606                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3607         }
3608
3609         if (adapter->vf_ifp)
3610                 return;
3611
3612         e1000_config_collision_dist(hw);
3613
3614         /* Program the Transmit Control Register */
3615         tctl = E1000_READ_REG(hw, E1000_TCTL);
3616         tctl &= ~E1000_TCTL_CT;
3617         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3618                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3619
3620         /* This write will effectively turn on the transmit unit. */
3621         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3622 }
3623
3624 /*********************************************************************
3625  *
3626  *  Free all transmit rings.
3627  *
3628  **********************************************************************/
3629 static void
3630 igb_free_transmit_structures(struct adapter *adapter)
3631 {
3632         struct tx_ring *txr = adapter->tx_rings;
3633
3634         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3635                 IGB_TX_LOCK(txr);
3636                 igb_free_transmit_buffers(txr);
3637                 igb_dma_free(adapter, &txr->txdma);
3638                 IGB_TX_UNLOCK(txr);
3639                 IGB_TX_LOCK_DESTROY(txr);
3640         }
3641         free(adapter->tx_rings, M_DEVBUF);
3642 }
3643
3644 /*********************************************************************
3645  *
3646  *  Free transmit ring related data structures.
3647  *
3648  **********************************************************************/
3649 static void
3650 igb_free_transmit_buffers(struct tx_ring *txr)
3651 {
3652         struct adapter *adapter = txr->adapter;
3653         struct igb_tx_buf *tx_buffer;
3654         int             i;
3655
3656         INIT_DEBUGOUT("free_transmit_ring: begin");
3657
3658         if (txr->tx_buffers == NULL)
3659                 return;
3660
3661         tx_buffer = txr->tx_buffers;
3662         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3663                 if (tx_buffer->m_head != NULL) {
3664                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3665                             BUS_DMASYNC_POSTWRITE);
3666                         bus_dmamap_unload(txr->txtag,
3667                             tx_buffer->map);
3668                         m_freem(tx_buffer->m_head);
3669                         tx_buffer->m_head = NULL;
3670                         if (tx_buffer->map != NULL) {
3671                                 bus_dmamap_destroy(txr->txtag,
3672                                     tx_buffer->map);
3673                                 tx_buffer->map = NULL;
3674                         }
3675                 } else if (tx_buffer->map != NULL) {
3676                         bus_dmamap_unload(txr->txtag,
3677                             tx_buffer->map);
3678                         bus_dmamap_destroy(txr->txtag,
3679                             tx_buffer->map);
3680                         tx_buffer->map = NULL;
3681                 }
3682         }
3683 #ifndef IGB_LEGACY_TX
3684         if (txr->br != NULL)
3685                 buf_ring_free(txr->br, M_DEVBUF);
3686 #endif
3687         if (txr->tx_buffers != NULL) {
3688                 free(txr->tx_buffers, M_DEVBUF);
3689                 txr->tx_buffers = NULL;
3690         }
3691         if (txr->txtag != NULL) {
3692                 bus_dma_tag_destroy(txr->txtag);
3693                 txr->txtag = NULL;
3694         }
3695         return;
3696 }
3697
3698 /**********************************************************************
3699  *
3700  *  Setup work for hardware segmentation offload (TSO) on
3701  *  adapters using advanced tx descriptors
3702  *
3703  **********************************************************************/
3704 static int
3705 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3706     u32 *cmd_type_len, u32 *olinfo_status)
3707 {
3708         struct adapter *adapter = txr->adapter;
3709         struct e1000_adv_tx_context_desc *TXD;
3710         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3711         u32 mss_l4len_idx = 0, paylen;
3712         u16 vtag = 0, eh_type;
3713         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3714         struct ether_vlan_header *eh;
3715 #ifdef INET6
3716         struct ip6_hdr *ip6;
3717 #endif
3718 #ifdef INET
3719         struct ip *ip;
3720 #endif
3721         struct tcphdr *th;
3722
3723
3724         /*
3725          * Determine where frame payload starts.
3726          * Jump over vlan headers if already present
3727          */
3728         eh = mtod(mp, struct ether_vlan_header *);
3729         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3730                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3731                 eh_type = eh->evl_proto;
3732         } else {
3733                 ehdrlen = ETHER_HDR_LEN;
3734                 eh_type = eh->evl_encap_proto;
3735         }
3736
3737         switch (ntohs(eh_type)) {
3738 #ifdef INET6
3739         case ETHERTYPE_IPV6:
3740                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3741                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3742                 if (ip6->ip6_nxt != IPPROTO_TCP)
3743                         return (ENXIO);
3744                 ip_hlen = sizeof(struct ip6_hdr);
3745                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3746                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3747                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3748                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3749                 break;
3750 #endif
3751 #ifdef INET
3752         case ETHERTYPE_IP:
3753                 ip = (struct ip *)(mp->m_data + ehdrlen);
3754                 if (ip->ip_p != IPPROTO_TCP)
3755                         return (ENXIO);
3756                 ip->ip_sum = 0;
3757                 ip_hlen = ip->ip_hl << 2;
3758                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3759                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3760                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3761                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3762                 /* Tell transmit desc to also do IPv4 checksum. */
3763                 *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3764                 break;
3765 #endif
3766         default:
3767                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3768                     __func__, ntohs(eh_type));
3769                 break;
3770         }
3771
3772         ctxd = txr->next_avail_desc;
3773         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3774
3775         tcp_hlen = th->th_off << 2;
3776
3777         /* This is used in the transmit desc in encap */
3778         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3779
3780         /* VLAN MACLEN IPLEN */
3781         if (mp->m_flags & M_VLANTAG) {
3782                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3783                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3784         }
3785
3786         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3787         vlan_macip_lens |= ip_hlen;
3788         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3789
3790         /* ADV DTYPE TUCMD */
3791         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3792         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3793         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3794
3795         /* MSS L4LEN IDX */
3796         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3797         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3798         /* 82575 needs the queue index added */
3799         if (adapter->hw.mac.type == e1000_82575)
3800                 mss_l4len_idx |= txr->me << 4;
3801         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3802
3803         TXD->seqnum_seed = htole32(0);
3804
3805         if (++ctxd == txr->num_desc)
3806                 ctxd = 0;
3807
3808         txr->tx_avail--;
3809         txr->next_avail_desc = ctxd;
3810         *cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3811         *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3812         *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3813         ++txr->tso_tx;
3814         return (0);
3815 }
3816
3817 /*********************************************************************
3818  *
3819  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3820  *
3821  **********************************************************************/
3822
3823 static int
3824 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3825     u32 *cmd_type_len, u32 *olinfo_status)
3826 {
3827         struct e1000_adv_tx_context_desc *TXD;
3828         struct adapter *adapter = txr->adapter;
3829         struct ether_vlan_header *eh;
3830         struct ip *ip;
3831         struct ip6_hdr *ip6;
3832         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3833         int     ehdrlen, ip_hlen = 0;
3834         u16     etype;
3835         u8      ipproto = 0;
3836         int     offload = TRUE;
3837         int     ctxd = txr->next_avail_desc;
3838         u16     vtag = 0;
3839
3840         /* First check if TSO is to be used */
3841         if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3842                 return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3843
3844         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3845                 offload = FALSE;
3846
3847         /* Indicate the whole packet as payload when not doing TSO */
3848         *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3849
3850         /* Now ready a context descriptor */
3851         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3852
3853         /*
3854         ** In advanced descriptors the vlan tag must 
3855         ** be placed into the context descriptor. Hence
3856         ** we need to make one even if not doing offloads.
3857         */
3858         if (mp->m_flags & M_VLANTAG) {
3859                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3860                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3861         } else if (offload == FALSE) /* ... no offload to do */
3862                 return (0);
3863
3864         /*
3865          * Determine where frame payload starts.
3866          * Jump over vlan headers if already present,
3867          * helpful for QinQ too.
3868          */
3869         eh = mtod(mp, struct ether_vlan_header *);
3870         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3871                 etype = ntohs(eh->evl_proto);
3872                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3873         } else {
3874                 etype = ntohs(eh->evl_encap_proto);
3875                 ehdrlen = ETHER_HDR_LEN;
3876         }
3877
3878         /* Set the ether header length */
3879         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3880
3881         switch (etype) {
3882                 case ETHERTYPE_IP:
3883                         ip = (struct ip *)(mp->m_data + ehdrlen);
3884                         ip_hlen = ip->ip_hl << 2;
3885                         ipproto = ip->ip_p;
3886                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3887                         break;
3888                 case ETHERTYPE_IPV6:
3889                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3890                         ip_hlen = sizeof(struct ip6_hdr);
3891                         /* XXX-BZ this will go badly in case of ext hdrs. */
3892                         ipproto = ip6->ip6_nxt;
3893                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3894                         break;
3895                 default:
3896                         offload = FALSE;
3897                         break;
3898         }
3899
3900         vlan_macip_lens |= ip_hlen;
3901         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3902
3903         switch (ipproto) {
3904                 case IPPROTO_TCP:
3905                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3906                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3907                         break;
3908                 case IPPROTO_UDP:
3909                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3910                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3911                         break;
3912
3913 #if __FreeBSD_version >= 800000
3914                 case IPPROTO_SCTP:
3915                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3916                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3917                         break;
3918 #endif
3919                 default:
3920                         offload = FALSE;
3921                         break;
3922         }
3923
3924         if (offload) /* For the TX descriptor setup */
3925                 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3926
3927         /* 82575 needs the queue index added */
3928         if (adapter->hw.mac.type == e1000_82575)
3929                 mss_l4len_idx = txr->me << 4;
3930
3931         /* Now copy bits into descriptor */
3932         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3933         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3934         TXD->seqnum_seed = htole32(0);
3935         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3936
3937         /* We've consumed the first desc, adjust counters */
3938         if (++ctxd == txr->num_desc)
3939                 ctxd = 0;
3940         txr->next_avail_desc = ctxd;
3941         --txr->tx_avail;
3942
3943         return (0);
3944 }
3945
3946 /**********************************************************************
3947  *
3948  *  Examine each tx_buffer in the used queue. If the hardware is done
3949  *  processing the packet then free associated resources. The
3950  *  tx_buffer is put back on the free queue.
3951  *
3952  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3953  **********************************************************************/
3954 static bool
3955 igb_txeof(struct tx_ring *txr)
3956 {
3957         struct adapter          *adapter = txr->adapter;
3958         struct ifnet            *ifp = adapter->ifp;
3959         u32                     work, processed = 0;
3960         u16                     limit = txr->process_limit;
3961         struct igb_tx_buf       *buf;
3962         union e1000_adv_tx_desc *txd;
3963
3964         mtx_assert(&txr->tx_mtx, MA_OWNED);
3965
3966 #ifdef DEV_NETMAP
3967         if (netmap_tx_irq(ifp, txr->me))
3968                 return (FALSE);
3969 #endif /* DEV_NETMAP */
3970
3971         if (txr->tx_avail == txr->num_desc) {
3972                 txr->queue_status = IGB_QUEUE_IDLE;
3973                 return FALSE;
3974         }
3975
3976         /* Get work starting point */
3977         work = txr->next_to_clean;
3978         buf = &txr->tx_buffers[work];
3979         txd = &txr->tx_base[work];
3980         work -= txr->num_desc; /* The distance to ring end */
3981         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3982             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3983         do {
3984                 union e1000_adv_tx_desc *eop = buf->eop;
3985                 if (eop == NULL) /* No work */
3986                         break;
3987
3988                 if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
3989                         break;  /* I/O not complete */
3990
3991                 if (buf->m_head) {
3992                         txr->bytes +=
3993                             buf->m_head->m_pkthdr.len;
3994                         bus_dmamap_sync(txr->txtag,
3995                             buf->map,
3996                             BUS_DMASYNC_POSTWRITE);
3997                         bus_dmamap_unload(txr->txtag,
3998                             buf->map);
3999                         m_freem(buf->m_head);
4000                         buf->m_head = NULL;
4001                 }
4002                 buf->eop = NULL;
4003                 ++txr->tx_avail;
4004
4005                 /* We clean the range if multi segment */
4006                 while (txd != eop) {
4007                         ++txd;
4008                         ++buf;
4009                         ++work;
4010                         /* wrap the ring? */
4011                         if (__predict_false(!work)) {
4012                                 work -= txr->num_desc;
4013                                 buf = txr->tx_buffers;
4014                                 txd = txr->tx_base;
4015                         }
4016                         if (buf->m_head) {
4017                                 txr->bytes +=
4018                                     buf->m_head->m_pkthdr.len;
4019                                 bus_dmamap_sync(txr->txtag,
4020                                     buf->map,
4021                                     BUS_DMASYNC_POSTWRITE);
4022                                 bus_dmamap_unload(txr->txtag,
4023                                     buf->map);
4024                                 m_freem(buf->m_head);
4025                                 buf->m_head = NULL;
4026                         }
4027                         ++txr->tx_avail;
4028                         buf->eop = NULL;
4029
4030                 }
4031                 ++txr->packets;
4032                 ++processed;
4033                 ++ifp->if_opackets;
4034                 txr->watchdog_time = ticks;
4035
4036                 /* Try the next packet */
4037                 ++txd;
4038                 ++buf;
4039                 ++work;
4040                 /* reset with a wrap */
4041                 if (__predict_false(!work)) {
4042                         work -= txr->num_desc;
4043                         buf = txr->tx_buffers;
4044                         txd = txr->tx_base;
4045                 }
4046                 prefetch(txd);
4047         } while (__predict_true(--limit));
4048
4049         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4050             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4051
4052         work += txr->num_desc;
4053         txr->next_to_clean = work;
4054
4055         /*
4056         ** Watchdog calculation, we know there's
4057         ** work outstanding or the first return
4058         ** would have been taken, so none processed
4059         ** for too long indicates a hang.
4060         */
4061         if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4062                 txr->queue_status |= IGB_QUEUE_HUNG;
4063
4064         if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4065                 txr->queue_status &= ~IGB_QUEUE_DEPLETED;       
4066
4067         if (txr->tx_avail == txr->num_desc) {
4068                 txr->queue_status = IGB_QUEUE_IDLE;
4069                 return (FALSE);
4070         }
4071
4072         return (TRUE);
4073 }
4074
4075 /*********************************************************************
4076  *
4077  *  Refresh mbuf buffers for RX descriptor rings
4078  *   - now keeps its own state so discards due to resource
4079  *     exhaustion are unnecessary, if an mbuf cannot be obtained
4080  *     it just returns, keeping its placeholder, thus it can simply
4081  *     be recalled to try again.
4082  *
4083  **********************************************************************/
4084 static void
4085 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4086 {
4087         struct adapter          *adapter = rxr->adapter;
4088         bus_dma_segment_t       hseg[1];
4089         bus_dma_segment_t       pseg[1];
4090         struct igb_rx_buf       *rxbuf;
4091         struct mbuf             *mh, *mp;
4092         int                     i, j, nsegs, error;
4093         bool                    refreshed = FALSE;
4094
4095         i = j = rxr->next_to_refresh;
4096         /*
4097         ** Get one descriptor beyond
4098         ** our work mark to control
4099         ** the loop.
4100         */
4101         if (++j == adapter->num_rx_desc)
4102                 j = 0;
4103
4104         while (j != limit) {
4105                 rxbuf = &rxr->rx_buffers[i];
4106                 /* No hdr mbuf used with header split off */
4107                 if (rxr->hdr_split == FALSE)
4108                         goto no_split;
4109                 if (rxbuf->m_head == NULL) {
4110                         mh = m_gethdr(M_NOWAIT, MT_DATA);
4111                         if (mh == NULL)
4112                                 goto update;
4113                 } else
4114                         mh = rxbuf->m_head;
4115
4116                 mh->m_pkthdr.len = mh->m_len = MHLEN;
4117                 mh->m_len = MHLEN;
4118                 mh->m_flags |= M_PKTHDR;
4119                 /* Get the memory mapping */
4120                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4121                     rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4122                 if (error != 0) {
4123                         printf("Refresh mbufs: hdr dmamap load"
4124                             " failure - %d\n", error);
4125                         m_free(mh);
4126                         rxbuf->m_head = NULL;
4127                         goto update;
4128                 }
4129                 rxbuf->m_head = mh;
4130                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4131                     BUS_DMASYNC_PREREAD);
4132                 rxr->rx_base[i].read.hdr_addr =
4133                     htole64(hseg[0].ds_addr);
4134 no_split:
4135                 if (rxbuf->m_pack == NULL) {
4136                         mp = m_getjcl(M_NOWAIT, MT_DATA,
4137                             M_PKTHDR, adapter->rx_mbuf_sz);
4138                         if (mp == NULL)
4139                                 goto update;
4140                 } else
4141                         mp = rxbuf->m_pack;
4142
4143                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4144                 /* Get the memory mapping */
4145                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4146                     rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4147                 if (error != 0) {
4148                         printf("Refresh mbufs: payload dmamap load"
4149                             " failure - %d\n", error);
4150                         m_free(mp);
4151                         rxbuf->m_pack = NULL;
4152                         goto update;
4153                 }
4154                 rxbuf->m_pack = mp;
4155                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4156                     BUS_DMASYNC_PREREAD);
4157                 rxr->rx_base[i].read.pkt_addr =
4158                     htole64(pseg[0].ds_addr);
4159                 refreshed = TRUE; /* I feel wefreshed :) */
4160
4161                 i = j; /* our next is precalculated */
4162                 rxr->next_to_refresh = i;
4163                 if (++j == adapter->num_rx_desc)
4164                         j = 0;
4165         }
4166 update:
4167         if (refreshed) /* update tail */
4168                 E1000_WRITE_REG(&adapter->hw,
4169                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4170         return;
4171 }
4172
4173
4174 /*********************************************************************
4175  *
4176  *  Allocate memory for rx_buffer structures. Since we use one
4177  *  rx_buffer per received packet, the maximum number of rx_buffer's
4178  *  that we'll need is equal to the number of receive descriptors
4179  *  that we've allocated.
4180  *
4181  **********************************************************************/
4182 static int
4183 igb_allocate_receive_buffers(struct rx_ring *rxr)
4184 {
4185         struct  adapter         *adapter = rxr->adapter;
4186         device_t                dev = adapter->dev;
4187         struct igb_rx_buf       *rxbuf;
4188         int                     i, bsize, error;
4189
4190         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4191         if (!(rxr->rx_buffers =
4192             (struct igb_rx_buf *) malloc(bsize,
4193             M_DEVBUF, M_NOWAIT | M_ZERO))) {
4194                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4195                 error = ENOMEM;
4196                 goto fail;
4197         }
4198
4199         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4200                                    1, 0,                /* alignment, bounds */
4201                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4202                                    BUS_SPACE_MAXADDR,   /* highaddr */
4203                                    NULL, NULL,          /* filter, filterarg */
4204                                    MSIZE,               /* maxsize */
4205                                    1,                   /* nsegments */
4206                                    MSIZE,               /* maxsegsize */
4207                                    0,                   /* flags */
4208                                    NULL,                /* lockfunc */
4209                                    NULL,                /* lockfuncarg */
4210                                    &rxr->htag))) {
4211                 device_printf(dev, "Unable to create RX DMA tag\n");
4212                 goto fail;
4213         }
4214
4215         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4216                                    1, 0,                /* alignment, bounds */
4217                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4218                                    BUS_SPACE_MAXADDR,   /* highaddr */
4219                                    NULL, NULL,          /* filter, filterarg */
4220                                    MJUM9BYTES,          /* maxsize */
4221                                    1,                   /* nsegments */
4222                                    MJUM9BYTES,          /* maxsegsize */
4223                                    0,                   /* flags */
4224                                    NULL,                /* lockfunc */
4225                                    NULL,                /* lockfuncarg */
4226                                    &rxr->ptag))) {
4227                 device_printf(dev, "Unable to create RX payload DMA tag\n");
4228                 goto fail;
4229         }
4230
4231         for (i = 0; i < adapter->num_rx_desc; i++) {
4232                 rxbuf = &rxr->rx_buffers[i];
4233                 error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4234                 if (error) {
4235                         device_printf(dev,
4236                             "Unable to create RX head DMA maps\n");
4237                         goto fail;
4238                 }
4239                 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4240                 if (error) {
4241                         device_printf(dev,
4242                             "Unable to create RX packet DMA maps\n");
4243                         goto fail;
4244                 }
4245         }
4246
4247         return (0);
4248
4249 fail:
4250         /* Frees all, but can handle partial completion */
4251         igb_free_receive_structures(adapter);
4252         return (error);
4253 }
4254
4255
4256 static void
4257 igb_free_receive_ring(struct rx_ring *rxr)
4258 {
4259         struct  adapter         *adapter = rxr->adapter;
4260         struct igb_rx_buf       *rxbuf;
4261
4262
4263         for (int i = 0; i < adapter->num_rx_desc; i++) {
4264                 rxbuf = &rxr->rx_buffers[i];
4265                 if (rxbuf->m_head != NULL) {
4266                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4267                             BUS_DMASYNC_POSTREAD);
4268                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4269                         rxbuf->m_head->m_flags |= M_PKTHDR;
4270                         m_freem(rxbuf->m_head);
4271                 }
4272                 if (rxbuf->m_pack != NULL) {
4273                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4274                             BUS_DMASYNC_POSTREAD);
4275                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4276                         rxbuf->m_pack->m_flags |= M_PKTHDR;
4277                         m_freem(rxbuf->m_pack);
4278                 }
4279                 rxbuf->m_head = NULL;
4280                 rxbuf->m_pack = NULL;
4281         }
4282 }
4283
4284
4285 /*********************************************************************
4286  *
4287  *  Initialize a receive ring and its buffers.
4288  *
4289  **********************************************************************/
4290 static int
4291 igb_setup_receive_ring(struct rx_ring *rxr)
4292 {
4293         struct  adapter         *adapter;
4294         struct  ifnet           *ifp;
4295         device_t                dev;
4296         struct igb_rx_buf       *rxbuf;
4297         bus_dma_segment_t       pseg[1], hseg[1];
4298         struct lro_ctrl         *lro = &rxr->lro;
4299         int                     rsize, nsegs, error = 0;
4300 #ifdef DEV_NETMAP
4301         struct netmap_adapter *na = NA(rxr->adapter->ifp);
4302         struct netmap_slot *slot;
4303 #endif /* DEV_NETMAP */
4304
4305         adapter = rxr->adapter;
4306         dev = adapter->dev;
4307         ifp = adapter->ifp;
4308
4309         /* Clear the ring contents */
4310         IGB_RX_LOCK(rxr);
4311 #ifdef DEV_NETMAP
4312         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4313 #endif /* DEV_NETMAP */
4314         rsize = roundup2(adapter->num_rx_desc *
4315             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4316         bzero((void *)rxr->rx_base, rsize);
4317
4318         /*
4319         ** Free current RX buffer structures and their mbufs
4320         */
4321         igb_free_receive_ring(rxr);
4322
4323         /* Configure for header split? */
4324         if (igb_header_split)
4325                 rxr->hdr_split = TRUE;
4326
4327         /* Now replenish the ring mbufs */
4328         for (int j = 0; j < adapter->num_rx_desc; ++j) {
4329                 struct mbuf     *mh, *mp;
4330
4331                 rxbuf = &rxr->rx_buffers[j];
4332 #ifdef DEV_NETMAP
4333                 if (slot) {
4334                         /* slot sj is mapped to the i-th NIC-ring entry */
4335                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4336                         uint64_t paddr;
4337                         void *addr;
4338
4339                         addr = PNMB(slot + sj, &paddr);
4340                         netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4341                         /* Update descriptor */
4342                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4343                         continue;
4344                 }
4345 #endif /* DEV_NETMAP */
4346                 if (rxr->hdr_split == FALSE)
4347                         goto skip_head;
4348
4349                 /* First the header */
4350                 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4351                 if (rxbuf->m_head == NULL) {
4352                         error = ENOBUFS;
4353                         goto fail;
4354                 }
4355                 m_adj(rxbuf->m_head, ETHER_ALIGN);
4356                 mh = rxbuf->m_head;
4357                 mh->m_len = mh->m_pkthdr.len = MHLEN;
4358                 mh->m_flags |= M_PKTHDR;
4359                 /* Get the memory mapping */
4360                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4361                     rxbuf->hmap, rxbuf->m_head, hseg,
4362                     &nsegs, BUS_DMA_NOWAIT);
4363                 if (error != 0) /* Nothing elegant to do here */
4364                         goto fail;
4365                 bus_dmamap_sync(rxr->htag,
4366                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
4367                 /* Update descriptor */
4368                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4369
4370 skip_head:
4371                 /* Now the payload cluster */
4372                 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4373                     M_PKTHDR, adapter->rx_mbuf_sz);
4374                 if (rxbuf->m_pack == NULL) {
4375                         error = ENOBUFS;
4376                         goto fail;
4377                 }
4378                 mp = rxbuf->m_pack;
4379                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4380                 /* Get the memory mapping */
4381                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4382                     rxbuf->pmap, mp, pseg,
4383                     &nsegs, BUS_DMA_NOWAIT);
4384                 if (error != 0)
4385                         goto fail;
4386                 bus_dmamap_sync(rxr->ptag,
4387                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4388                 /* Update descriptor */
4389                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4390         }
4391
4392         /* Setup our descriptor indices */
4393         rxr->next_to_check = 0;
4394         rxr->next_to_refresh = adapter->num_rx_desc - 1;
4395         rxr->lro_enabled = FALSE;
4396         rxr->rx_split_packets = 0;
4397         rxr->rx_bytes = 0;
4398
4399         rxr->fmp = NULL;
4400         rxr->lmp = NULL;
4401         rxr->discard = FALSE;
4402
4403         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4404             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4405
4406         /*
4407         ** Now set up the LRO interface, we
4408         ** also only do head split when LRO
4409         ** is enabled, since so often they
4410         ** are undesireable in similar setups.
4411         */
4412         if (ifp->if_capenable & IFCAP_LRO) {
4413                 error = tcp_lro_init(lro);
4414                 if (error) {
4415                         device_printf(dev, "LRO Initialization failed!\n");
4416                         goto fail;
4417                 }
4418                 INIT_DEBUGOUT("RX LRO Initialized\n");
4419                 rxr->lro_enabled = TRUE;
4420                 lro->ifp = adapter->ifp;
4421         }
4422
4423         IGB_RX_UNLOCK(rxr);
4424         return (0);
4425
4426 fail:
4427         igb_free_receive_ring(rxr);
4428         IGB_RX_UNLOCK(rxr);
4429         return (error);
4430 }
4431
4432
4433 /*********************************************************************
4434  *
4435  *  Initialize all receive rings.
4436  *
4437  **********************************************************************/
4438 static int
4439 igb_setup_receive_structures(struct adapter *adapter)
4440 {
4441         struct rx_ring *rxr = adapter->rx_rings;
4442         int i;
4443
4444         for (i = 0; i < adapter->num_queues; i++, rxr++)
4445                 if (igb_setup_receive_ring(rxr))
4446                         goto fail;
4447
4448         return (0);
4449 fail:
4450         /*
4451          * Free RX buffers allocated so far, we will only handle
4452          * the rings that completed, the failing case will have
4453          * cleaned up for itself. 'i' is the endpoint.
4454          */
4455         for (int j = 0; j < i; ++j) {
4456                 rxr = &adapter->rx_rings[j];
4457                 IGB_RX_LOCK(rxr);
4458                 igb_free_receive_ring(rxr);
4459                 IGB_RX_UNLOCK(rxr);
4460         }
4461
4462         return (ENOBUFS);
4463 }
4464
4465 /*********************************************************************
4466  *
4467  *  Enable receive unit.
4468  *
4469  **********************************************************************/
4470 static void
4471 igb_initialize_receive_units(struct adapter *adapter)
4472 {
4473         struct rx_ring  *rxr = adapter->rx_rings;
4474         struct ifnet    *ifp = adapter->ifp;
4475         struct e1000_hw *hw = &adapter->hw;
4476         u32             rctl, rxcsum, psize, srrctl = 0;
4477
4478         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4479
4480         /*
4481          * Make sure receives are disabled while setting
4482          * up the descriptor ring
4483          */
4484         rctl = E1000_READ_REG(hw, E1000_RCTL);
4485         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4486
4487         /*
4488         ** Set up for header split
4489         */
4490         if (igb_header_split) {
4491                 /* Use a standard mbuf for the header */
4492                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4493                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4494         } else
4495                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4496
4497         /*
4498         ** Set up for jumbo frames
4499         */
4500         if (ifp->if_mtu > ETHERMTU) {
4501                 rctl |= E1000_RCTL_LPE;
4502                 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4503                         srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4504                         rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4505                 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4506                         srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4507                         rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4508                 }
4509                 /* Set maximum packet len */
4510                 psize = adapter->max_frame_size;
4511                 /* are we on a vlan? */
4512                 if (adapter->ifp->if_vlantrunk != NULL)
4513                         psize += VLAN_TAG_SIZE;
4514                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4515         } else {
4516                 rctl &= ~E1000_RCTL_LPE;
4517                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4518                 rctl |= E1000_RCTL_SZ_2048;
4519         }
4520
4521         /* Setup the Base and Length of the Rx Descriptor Rings */
4522         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4523                 u64 bus_addr = rxr->rxdma.dma_paddr;
4524                 u32 rxdctl;
4525
4526                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4527                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4528                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4529                     (uint32_t)(bus_addr >> 32));
4530                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4531                     (uint32_t)bus_addr);
4532                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4533                 /* Enable this Queue */
4534                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4535                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4536                 rxdctl &= 0xFFF00000;
4537                 rxdctl |= IGB_RX_PTHRESH;
4538                 rxdctl |= IGB_RX_HTHRESH << 8;
4539                 rxdctl |= IGB_RX_WTHRESH << 16;
4540                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4541         }
4542
4543         /*
4544         ** Setup for RX MultiQueue
4545         */
4546         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4547         if (adapter->num_queues >1) {
4548                 u32 random[10], mrqc, shift = 0;
4549                 union igb_reta {
4550                         u32 dword;
4551                         u8  bytes[4];
4552                 } reta;
4553
4554                 arc4rand(&random, sizeof(random), 0);
4555                 if (adapter->hw.mac.type == e1000_82575)
4556                         shift = 6;
4557                 /* Warning FM follows */
4558                 for (int i = 0; i < 128; i++) {
4559                         reta.bytes[i & 3] =
4560                             (i % adapter->num_queues) << shift;
4561                         if ((i & 3) == 3)
4562                                 E1000_WRITE_REG(hw,
4563                                     E1000_RETA(i >> 2), reta.dword);
4564                 }
4565                 /* Now fill in hash table */
4566                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4567                 for (int i = 0; i < 10; i++)
4568                         E1000_WRITE_REG_ARRAY(hw,
4569                             E1000_RSSRK(0), i, random[i]);
4570
4571                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4572                     E1000_MRQC_RSS_FIELD_IPV4_TCP);
4573                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4574                     E1000_MRQC_RSS_FIELD_IPV6_TCP);
4575                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4576                     E1000_MRQC_RSS_FIELD_IPV6_UDP);
4577                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4578                     E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4579
4580                 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4581
4582                 /*
4583                 ** NOTE: Receive Full-Packet Checksum Offload 
4584                 ** is mutually exclusive with Multiqueue. However
4585                 ** this is not the same as TCP/IP checksums which
4586                 ** still work.
4587                 */
4588                 rxcsum |= E1000_RXCSUM_PCSD;
4589 #if __FreeBSD_version >= 800000
4590                 /* For SCTP Offload */
4591                 if ((hw->mac.type == e1000_82576)
4592                     && (ifp->if_capenable & IFCAP_RXCSUM))
4593                         rxcsum |= E1000_RXCSUM_CRCOFL;
4594 #endif
4595         } else {
4596                 /* Non RSS setup */
4597                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4598                         rxcsum |= E1000_RXCSUM_IPPCSE;
4599 #if __FreeBSD_version >= 800000
4600                         if (adapter->hw.mac.type == e1000_82576)
4601                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4602 #endif
4603                 } else
4604                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4605         }
4606         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4607
4608         /* Setup the Receive Control Register */
4609         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4610         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4611                    E1000_RCTL_RDMTS_HALF |
4612                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4613         /* Strip CRC bytes. */
4614         rctl |= E1000_RCTL_SECRC;
4615         /* Make sure VLAN Filters are off */
4616         rctl &= ~E1000_RCTL_VFE;
4617         /* Don't store bad packets */
4618         rctl &= ~E1000_RCTL_SBP;
4619
4620         /* Enable Receives */
4621         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4622
4623         /*
4624          * Setup the HW Rx Head and Tail Descriptor Pointers
4625          *   - needs to be after enable
4626          */
4627         for (int i = 0; i < adapter->num_queues; i++) {
4628                 rxr = &adapter->rx_rings[i];
4629                 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4630 #ifdef DEV_NETMAP
4631                 /*
4632                  * an init() while a netmap client is active must
4633                  * preserve the rx buffers passed to userspace.
4634                  * In this driver it means we adjust RDT to
4635                  * something different from next_to_refresh
4636                  * (which is not used in netmap mode).
4637                  */
4638                 if (ifp->if_capenable & IFCAP_NETMAP) {
4639                         struct netmap_adapter *na = NA(adapter->ifp);
4640                         struct netmap_kring *kring = &na->rx_rings[i];
4641                         int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4642
4643                         if (t >= adapter->num_rx_desc)
4644                                 t -= adapter->num_rx_desc;
4645                         else if (t < 0)
4646                                 t += adapter->num_rx_desc;
4647                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4648                 } else
4649 #endif /* DEV_NETMAP */
4650                 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4651         }
4652         return;
4653 }
4654
4655 /*********************************************************************
4656  *
4657  *  Free receive rings.
4658  *
4659  **********************************************************************/
4660 static void
4661 igb_free_receive_structures(struct adapter *adapter)
4662 {
4663         struct rx_ring *rxr = adapter->rx_rings;
4664
4665         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4666                 struct lro_ctrl *lro = &rxr->lro;
4667                 igb_free_receive_buffers(rxr);
4668                 tcp_lro_free(lro);
4669                 igb_dma_free(adapter, &rxr->rxdma);
4670         }
4671
4672         free(adapter->rx_rings, M_DEVBUF);
4673 }
4674
4675 /*********************************************************************
4676  *
4677  *  Free receive ring data structures.
4678  *
4679  **********************************************************************/
4680 static void
4681 igb_free_receive_buffers(struct rx_ring *rxr)
4682 {
4683         struct adapter          *adapter = rxr->adapter;
4684         struct igb_rx_buf       *rxbuf;
4685         int i;
4686
4687         INIT_DEBUGOUT("free_receive_structures: begin");
4688
4689         /* Cleanup any existing buffers */
4690         if (rxr->rx_buffers != NULL) {
4691                 for (i = 0; i < adapter->num_rx_desc; i++) {
4692                         rxbuf = &rxr->rx_buffers[i];
4693                         if (rxbuf->m_head != NULL) {
4694                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4695                                     BUS_DMASYNC_POSTREAD);
4696                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4697                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4698                                 m_freem(rxbuf->m_head);
4699                         }
4700                         if (rxbuf->m_pack != NULL) {
4701                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4702                                     BUS_DMASYNC_POSTREAD);
4703                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4704                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4705                                 m_freem(rxbuf->m_pack);
4706                         }
4707                         rxbuf->m_head = NULL;
4708                         rxbuf->m_pack = NULL;
4709                         if (rxbuf->hmap != NULL) {
4710                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4711                                 rxbuf->hmap = NULL;
4712                         }
4713                         if (rxbuf->pmap != NULL) {
4714                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4715                                 rxbuf->pmap = NULL;
4716                         }
4717                 }
4718                 if (rxr->rx_buffers != NULL) {
4719                         free(rxr->rx_buffers, M_DEVBUF);
4720                         rxr->rx_buffers = NULL;
4721                 }
4722         }
4723
4724         if (rxr->htag != NULL) {
4725                 bus_dma_tag_destroy(rxr->htag);
4726                 rxr->htag = NULL;
4727         }
4728         if (rxr->ptag != NULL) {
4729                 bus_dma_tag_destroy(rxr->ptag);
4730                 rxr->ptag = NULL;
4731         }
4732 }
4733
4734 static __inline void
4735 igb_rx_discard(struct rx_ring *rxr, int i)
4736 {
4737         struct igb_rx_buf       *rbuf;
4738
4739         rbuf = &rxr->rx_buffers[i];
4740
4741         /* Partially received? Free the chain */
4742         if (rxr->fmp != NULL) {
4743                 rxr->fmp->m_flags |= M_PKTHDR;
4744                 m_freem(rxr->fmp);
4745                 rxr->fmp = NULL;
4746                 rxr->lmp = NULL;
4747         }
4748
4749         /*
4750         ** With advanced descriptors the writeback
4751         ** clobbers the buffer addrs, so its easier
4752         ** to just free the existing mbufs and take
4753         ** the normal refresh path to get new buffers
4754         ** and mapping.
4755         */
4756         if (rbuf->m_head) {
4757                 m_free(rbuf->m_head);
4758                 rbuf->m_head = NULL;
4759                 bus_dmamap_unload(rxr->htag, rbuf->hmap);
4760         }
4761
4762         if (rbuf->m_pack) {
4763                 m_free(rbuf->m_pack);
4764                 rbuf->m_pack = NULL;
4765                 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4766         }
4767
4768         return;
4769 }
4770
4771 static __inline void
4772 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4773 {
4774
4775         /*
4776          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4777          * should be computed by hardware. Also it should not have VLAN tag in
4778          * ethernet header.
4779          */
4780         if (rxr->lro_enabled &&
4781             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4782             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4783             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4784             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4785             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4786             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4787                 /*
4788                  * Send to the stack if:
4789                  **  - LRO not enabled, or
4790                  **  - no LRO resources, or
4791                  **  - lro enqueue fails
4792                  */
4793                 if (rxr->lro.lro_cnt != 0)
4794                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4795                                 return;
4796         }
4797         IGB_RX_UNLOCK(rxr);
4798         (*ifp->if_input)(ifp, m);
4799         IGB_RX_LOCK(rxr);
4800 }
4801
4802 /*********************************************************************
4803  *
4804  *  This routine executes in interrupt context. It replenishes
4805  *  the mbufs in the descriptor and sends data which has been
4806  *  dma'ed into host memory to upper layer.
4807  *
4808  *  We loop at most count times if count is > 0, or until done if
4809  *  count < 0.
4810  *
4811  *  Return TRUE if more to clean, FALSE otherwise
4812  *********************************************************************/
4813 static bool
4814 igb_rxeof(struct igb_queue *que, int count, int *done)
4815 {
4816         struct adapter          *adapter = que->adapter;
4817         struct rx_ring          *rxr = que->rxr;
4818         struct ifnet            *ifp = adapter->ifp;
4819         struct lro_ctrl         *lro = &rxr->lro;
4820         struct lro_entry        *queued;
4821         int                     i, processed = 0, rxdone = 0;
4822         u32                     ptype, staterr = 0;
4823         union e1000_adv_rx_desc *cur;
4824
4825         IGB_RX_LOCK(rxr);
4826         /* Sync the ring. */
4827         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4828             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4829
4830 #ifdef DEV_NETMAP
4831         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4832                 IGB_RX_UNLOCK(rxr);
4833                 return (FALSE);
4834         }
4835 #endif /* DEV_NETMAP */
4836
4837         /* Main clean loop */
4838         for (i = rxr->next_to_check; count != 0;) {
4839                 struct mbuf             *sendmp, *mh, *mp;
4840                 struct igb_rx_buf       *rxbuf;
4841                 u16                     hlen, plen, hdr, vtag;
4842                 bool                    eop = FALSE;
4843  
4844                 cur = &rxr->rx_base[i];
4845                 staterr = le32toh(cur->wb.upper.status_error);
4846                 if ((staterr & E1000_RXD_STAT_DD) == 0)
4847                         break;
4848                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4849                         break;
4850                 count--;
4851                 sendmp = mh = mp = NULL;
4852                 cur->wb.upper.status_error = 0;
4853                 rxbuf = &rxr->rx_buffers[i];
4854                 plen = le16toh(cur->wb.upper.length);
4855                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4856                 if (((adapter->hw.mac.type == e1000_i350) ||
4857                     (adapter->hw.mac.type == e1000_i354)) &&
4858                     (staterr & E1000_RXDEXT_STATERR_LB))
4859                         vtag = be16toh(cur->wb.upper.vlan);
4860                 else
4861                         vtag = le16toh(cur->wb.upper.vlan);
4862                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4863                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4864
4865                 /* Make sure all segments of a bad packet are discarded */
4866                 if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4867                     (rxr->discard)) {
4868                         adapter->dropped_pkts++;
4869                         ++rxr->rx_discarded;
4870                         if (!eop) /* Catch subsequent segs */
4871                                 rxr->discard = TRUE;
4872                         else
4873                                 rxr->discard = FALSE;
4874                         igb_rx_discard(rxr, i);
4875                         goto next_desc;
4876                 }
4877
4878                 /*
4879                 ** The way the hardware is configured to
4880                 ** split, it will ONLY use the header buffer
4881                 ** when header split is enabled, otherwise we
4882                 ** get normal behavior, ie, both header and
4883                 ** payload are DMA'd into the payload buffer.
4884                 **
4885                 ** The fmp test is to catch the case where a
4886                 ** packet spans multiple descriptors, in that
4887                 ** case only the first header is valid.
4888                 */
4889                 if (rxr->hdr_split && rxr->fmp == NULL) {
4890                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4891                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4892                             E1000_RXDADV_HDRBUFLEN_SHIFT;
4893                         if (hlen > IGB_HDR_BUF)
4894                                 hlen = IGB_HDR_BUF;
4895                         mh = rxr->rx_buffers[i].m_head;
4896                         mh->m_len = hlen;
4897                         /* clear buf pointer for refresh */
4898                         rxbuf->m_head = NULL;
4899                         /*
4900                         ** Get the payload length, this
4901                         ** could be zero if its a small
4902                         ** packet.
4903                         */
4904                         if (plen > 0) {
4905                                 mp = rxr->rx_buffers[i].m_pack;
4906                                 mp->m_len = plen;
4907                                 mh->m_next = mp;
4908                                 /* clear buf pointer */
4909                                 rxbuf->m_pack = NULL;
4910                                 rxr->rx_split_packets++;
4911                         }
4912                 } else {
4913                         /*
4914                         ** Either no header split, or a
4915                         ** secondary piece of a fragmented
4916                         ** split packet.
4917                         */
4918                         mh = rxr->rx_buffers[i].m_pack;
4919                         mh->m_len = plen;
4920                         /* clear buf info for refresh */
4921                         rxbuf->m_pack = NULL;
4922                 }
4923                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4924
4925                 ++processed; /* So we know when to refresh */
4926
4927                 /* Initial frame - setup */
4928                 if (rxr->fmp == NULL) {
4929                         mh->m_pkthdr.len = mh->m_len;
4930                         /* Save the head of the chain */
4931                         rxr->fmp = mh;
4932                         rxr->lmp = mh;
4933                         if (mp != NULL) {
4934                                 /* Add payload if split */
4935                                 mh->m_pkthdr.len += mp->m_len;
4936                                 rxr->lmp = mh->m_next;
4937                         }
4938                 } else {
4939                         /* Chain mbuf's together */
4940                         rxr->lmp->m_next = mh;
4941                         rxr->lmp = rxr->lmp->m_next;
4942                         rxr->fmp->m_pkthdr.len += mh->m_len;
4943                 }
4944
4945                 if (eop) {
4946                         rxr->fmp->m_pkthdr.rcvif = ifp;
4947                         ifp->if_ipackets++;
4948                         rxr->rx_packets++;
4949                         /* capture data for AIM */
4950                         rxr->packets++;
4951                         rxr->bytes += rxr->fmp->m_pkthdr.len;
4952                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4953
4954                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4955                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
4956
4957                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4958                             (staterr & E1000_RXD_STAT_VP) != 0) {
4959                                 rxr->fmp->m_pkthdr.ether_vtag = vtag;
4960                                 rxr->fmp->m_flags |= M_VLANTAG;
4961                         }
4962 #ifndef IGB_LEGACY_TX
4963                         rxr->fmp->m_pkthdr.flowid = que->msix;
4964                         rxr->fmp->m_flags |= M_FLOWID;
4965 #endif
4966                         sendmp = rxr->fmp;
4967                         /* Make sure to set M_PKTHDR. */
4968                         sendmp->m_flags |= M_PKTHDR;
4969                         rxr->fmp = NULL;
4970                         rxr->lmp = NULL;
4971                 }
4972
4973 next_desc:
4974                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4975                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4976
4977                 /* Advance our pointers to the next descriptor. */
4978                 if (++i == adapter->num_rx_desc)
4979                         i = 0;
4980                 /*
4981                 ** Send to the stack or LRO
4982                 */
4983                 if (sendmp != NULL) {
4984                         rxr->next_to_check = i;
4985                         igb_rx_input(rxr, ifp, sendmp, ptype);
4986                         i = rxr->next_to_check;
4987                         rxdone++;
4988                 }
4989
4990                 /* Every 8 descriptors we go to refresh mbufs */
4991                 if (processed == 8) {
4992                         igb_refresh_mbufs(rxr, i);
4993                         processed = 0;
4994                 }
4995         }
4996
4997         /* Catch any remainders */
4998         if (igb_rx_unrefreshed(rxr))
4999                 igb_refresh_mbufs(rxr, i);
5000
5001         rxr->next_to_check = i;
5002
5003         /*
5004          * Flush any outstanding LRO work
5005          */
5006         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
5007                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
5008                 tcp_lro_flush(lro, queued);
5009         }
5010
5011         if (done != NULL)
5012                 *done += rxdone;
5013
5014         IGB_RX_UNLOCK(rxr);
5015         return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5016 }
5017
5018 /*********************************************************************
5019  *
5020  *  Verify that the hardware indicated that the checksum is valid.
5021  *  Inform the stack about the status of checksum so that stack
5022  *  doesn't spend time verifying the checksum.
5023  *
5024  *********************************************************************/
5025 static void
5026 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5027 {
5028         u16 status = (u16)staterr;
5029         u8  errors = (u8) (staterr >> 24);
5030         int sctp;
5031
5032         /* Ignore Checksum bit is set */
5033         if (status & E1000_RXD_STAT_IXSM) {
5034                 mp->m_pkthdr.csum_flags = 0;
5035                 return;
5036         }
5037
5038         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5039             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5040                 sctp = 1;
5041         else
5042                 sctp = 0;
5043         if (status & E1000_RXD_STAT_IPCS) {
5044                 /* Did it pass? */
5045                 if (!(errors & E1000_RXD_ERR_IPE)) {
5046                         /* IP Checksum Good */
5047                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5048                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5049                 } else
5050                         mp->m_pkthdr.csum_flags = 0;
5051         }
5052
5053         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5054                 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5055 #if __FreeBSD_version >= 800000
5056                 if (sctp) /* reassign */
5057                         type = CSUM_SCTP_VALID;
5058 #endif
5059                 /* Did it pass? */
5060                 if (!(errors & E1000_RXD_ERR_TCPE)) {
5061                         mp->m_pkthdr.csum_flags |= type;
5062                         if (sctp == 0)
5063                                 mp->m_pkthdr.csum_data = htons(0xffff);
5064                 }
5065         }
5066         return;
5067 }
5068
5069 /*
5070  * This routine is run via an vlan
5071  * config EVENT
5072  */
5073 static void
5074 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5075 {
5076         struct adapter  *adapter = ifp->if_softc;
5077         u32             index, bit;
5078
5079         if (ifp->if_softc !=  arg)   /* Not our event */
5080                 return;
5081
5082         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5083                 return;
5084
5085         IGB_CORE_LOCK(adapter);
5086         index = (vtag >> 5) & 0x7F;
5087         bit = vtag & 0x1F;
5088         adapter->shadow_vfta[index] |= (1 << bit);
5089         ++adapter->num_vlans;
5090         /* Change hw filter setting */
5091         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5092                 igb_setup_vlan_hw_support(adapter);
5093         IGB_CORE_UNLOCK(adapter);
5094 }
5095
5096 /*
5097  * This routine is run via an vlan
5098  * unconfig EVENT
5099  */
5100 static void
5101 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5102 {
5103         struct adapter  *adapter = ifp->if_softc;
5104         u32             index, bit;
5105
5106         if (ifp->if_softc !=  arg)
5107                 return;
5108
5109         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5110                 return;
5111
5112         IGB_CORE_LOCK(adapter);
5113         index = (vtag >> 5) & 0x7F;
5114         bit = vtag & 0x1F;
5115         adapter->shadow_vfta[index] &= ~(1 << bit);
5116         --adapter->num_vlans;
5117         /* Change hw filter setting */
5118         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5119                 igb_setup_vlan_hw_support(adapter);
5120         IGB_CORE_UNLOCK(adapter);
5121 }
5122
5123 static void
5124 igb_setup_vlan_hw_support(struct adapter *adapter)
5125 {
5126         struct e1000_hw *hw = &adapter->hw;
5127         struct ifnet    *ifp = adapter->ifp;
5128         u32             reg;
5129
5130         if (adapter->vf_ifp) {
5131                 e1000_rlpml_set_vf(hw,
5132                     adapter->max_frame_size + VLAN_TAG_SIZE);
5133                 return;
5134         }
5135
5136         reg = E1000_READ_REG(hw, E1000_CTRL);
5137         reg |= E1000_CTRL_VME;
5138         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5139
5140         /* Enable the Filter Table */
5141         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5142                 reg = E1000_READ_REG(hw, E1000_RCTL);
5143                 reg &= ~E1000_RCTL_CFIEN;
5144                 reg |= E1000_RCTL_VFE;
5145                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5146         }
5147
5148         /* Update the frame size */
5149         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5150             adapter->max_frame_size + VLAN_TAG_SIZE);
5151
5152         /* Don't bother with table if no vlans */
5153         if ((adapter->num_vlans == 0) ||
5154             ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5155                 return;
5156         /*
5157         ** A soft reset zero's out the VFTA, so
5158         ** we need to repopulate it now.
5159         */
5160         for (int i = 0; i < IGB_VFTA_SIZE; i++)
5161                 if (adapter->shadow_vfta[i] != 0) {
5162                         if (adapter->vf_ifp)
5163                                 e1000_vfta_set_vf(hw,
5164                                     adapter->shadow_vfta[i], TRUE);
5165                         else
5166                                 e1000_write_vfta(hw,
5167                                     i, adapter->shadow_vfta[i]);
5168                 }
5169 }
5170
5171 static void
5172 igb_enable_intr(struct adapter *adapter)
5173 {
5174         /* With RSS set up what to auto clear */
5175         if (adapter->msix_mem) {
5176                 u32 mask = (adapter->que_mask | adapter->link_mask);
5177                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5178                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5179                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5180                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5181                     E1000_IMS_LSC);
5182         } else {
5183                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5184                     IMS_ENABLE_MASK);
5185         }
5186         E1000_WRITE_FLUSH(&adapter->hw);
5187
5188         return;
5189 }
5190
5191 static void
5192 igb_disable_intr(struct adapter *adapter)
5193 {
5194         if (adapter->msix_mem) {
5195                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5196                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5197         } 
5198         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5199         E1000_WRITE_FLUSH(&adapter->hw);
5200         return;
5201 }
5202
5203 /*
5204  * Bit of a misnomer, what this really means is
5205  * to enable OS management of the system... aka
5206  * to disable special hardware management features 
5207  */
5208 static void
5209 igb_init_manageability(struct adapter *adapter)
5210 {
5211         if (adapter->has_manage) {
5212                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5213                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5214
5215                 /* disable hardware interception of ARP */
5216                 manc &= ~(E1000_MANC_ARP_EN);
5217
5218                 /* enable receiving management packets to the host */
5219                 manc |= E1000_MANC_EN_MNG2HOST;
5220                 manc2h |= 1 << 5;  /* Mng Port 623 */
5221                 manc2h |= 1 << 6;  /* Mng Port 664 */
5222                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5223                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5224         }
5225 }
5226
5227 /*
5228  * Give control back to hardware management
5229  * controller if there is one.
5230  */
5231 static void
5232 igb_release_manageability(struct adapter *adapter)
5233 {
5234         if (adapter->has_manage) {
5235                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5236
5237                 /* re-enable hardware interception of ARP */
5238                 manc |= E1000_MANC_ARP_EN;
5239                 manc &= ~E1000_MANC_EN_MNG2HOST;
5240
5241                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5242         }
5243 }
5244
5245 /*
5246  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5247  * For ASF and Pass Through versions of f/w this means that
5248  * the driver is loaded. 
5249  *
5250  */
5251 static void
5252 igb_get_hw_control(struct adapter *adapter)
5253 {
5254         u32 ctrl_ext;
5255
5256         if (adapter->vf_ifp)
5257                 return;
5258
5259         /* Let firmware know the driver has taken over */
5260         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5261         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5262             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5263 }
5264
5265 /*
5266  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5267  * For ASF and Pass Through versions of f/w this means that the
5268  * driver is no longer loaded.
5269  *
5270  */
5271 static void
5272 igb_release_hw_control(struct adapter *adapter)
5273 {
5274         u32 ctrl_ext;
5275
5276         if (adapter->vf_ifp)
5277                 return;
5278
5279         /* Let firmware taken over control of h/w */
5280         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5281         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5282             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5283 }
5284
5285 static int
5286 igb_is_valid_ether_addr(uint8_t *addr)
5287 {
5288         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5289
5290         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5291                 return (FALSE);
5292         }
5293
5294         return (TRUE);
5295 }
5296
5297
5298 /*
5299  * Enable PCI Wake On Lan capability
5300  */
5301 static void
5302 igb_enable_wakeup(device_t dev)
5303 {
5304         u16     cap, status;
5305         u8      id;
5306
5307         /* First find the capabilities pointer*/
5308         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5309         /* Read the PM Capabilities */
5310         id = pci_read_config(dev, cap, 1);
5311         if (id != PCIY_PMG)     /* Something wrong */
5312                 return;
5313         /* OK, we have the power capabilities, so
5314            now get the status register */
5315         cap += PCIR_POWER_STATUS;
5316         status = pci_read_config(dev, cap, 2);
5317         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5318         pci_write_config(dev, cap, status, 2);
5319         return;
5320 }
5321
5322 static void
5323 igb_led_func(void *arg, int onoff)
5324 {
5325         struct adapter  *adapter = arg;
5326
5327         IGB_CORE_LOCK(adapter);
5328         if (onoff) {
5329                 e1000_setup_led(&adapter->hw);
5330                 e1000_led_on(&adapter->hw);
5331         } else {
5332                 e1000_led_off(&adapter->hw);
5333                 e1000_cleanup_led(&adapter->hw);
5334         }
5335         IGB_CORE_UNLOCK(adapter);
5336 }
5337
5338 /**********************************************************************
5339  *
5340  *  Update the board statistics counters.
5341  *
5342  **********************************************************************/
5343 static void
5344 igb_update_stats_counters(struct adapter *adapter)
5345 {
5346         struct ifnet            *ifp;
5347         struct e1000_hw         *hw = &adapter->hw;
5348         struct e1000_hw_stats   *stats;
5349
5350         /* 
5351         ** The virtual function adapter has only a
5352         ** small controlled set of stats, do only 
5353         ** those and return.
5354         */
5355         if (adapter->vf_ifp) {
5356                 igb_update_vf_stats_counters(adapter);
5357                 return;
5358         }
5359
5360         stats = (struct e1000_hw_stats  *)adapter->stats;
5361
5362         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5363            (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5364                 stats->symerrs +=
5365                     E1000_READ_REG(hw,E1000_SYMERRS);
5366                 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5367         }
5368
5369         stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5370         stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5371         stats->scc += E1000_READ_REG(hw, E1000_SCC);
5372         stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5373
5374         stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5375         stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5376         stats->colc += E1000_READ_REG(hw, E1000_COLC);
5377         stats->dc += E1000_READ_REG(hw, E1000_DC);
5378         stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5379         stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5380         stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5381         /*
5382         ** For watchdog management we need to know if we have been
5383         ** paused during the last interval, so capture that here.
5384         */ 
5385         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5386         stats->xoffrxc += adapter->pause_frames;
5387         stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5388         stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5389         stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5390         stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5391         stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5392         stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5393         stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5394         stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5395         stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5396         stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5397         stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5398         stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5399
5400         /* For the 64-bit byte counters the low dword must be read first. */
5401         /* Both registers clear on the read of the high dword */
5402
5403         stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5404             ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5405         stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5406             ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5407
5408         stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5409         stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5410         stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5411         stats->roc += E1000_READ_REG(hw, E1000_ROC);
5412         stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5413
5414         stats->tor += E1000_READ_REG(hw, E1000_TORH);
5415         stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5416
5417         stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5418         stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5419         stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5420         stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5421         stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5422         stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5423         stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5424         stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5425         stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5426         stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5427
5428         /* Interrupt Counts */
5429
5430         stats->iac += E1000_READ_REG(hw, E1000_IAC);
5431         stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5432         stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5433         stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5434         stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5435         stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5436         stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5437         stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5438         stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5439
5440         /* Host to Card Statistics */
5441
5442         stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5443         stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5444         stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5445         stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5446         stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5447         stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5448         stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5449         stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5450             ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5451         stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5452             ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5453         stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5454         stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5455         stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5456
5457         stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5458         stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5459         stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5460         stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5461         stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5462         stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5463
5464         ifp = adapter->ifp;
5465         ifp->if_collisions = stats->colc;
5466
5467         /* Rx Errors */
5468         ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5469             stats->crcerrs + stats->algnerrc +
5470             stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5471
5472         /* Tx Errors */
5473         ifp->if_oerrors = stats->ecol +
5474             stats->latecol + adapter->watchdog_events;
5475
5476         /* Driver specific counters */
5477         adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5478         adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5479         adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5480         adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5481         adapter->packet_buf_alloc_tx =
5482             ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5483         adapter->packet_buf_alloc_rx =
5484             (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5485 }
5486
5487
5488 /**********************************************************************
5489  *
5490  *  Initialize the VF board statistics counters.
5491  *
5492  **********************************************************************/
5493 static void
5494 igb_vf_init_stats(struct adapter *adapter)
5495 {
5496         struct e1000_hw *hw = &adapter->hw;
5497         struct e1000_vf_stats   *stats;
5498
5499         stats = (struct e1000_vf_stats  *)adapter->stats;
5500         if (stats == NULL)
5501                 return;
5502         stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5503         stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5504         stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5505         stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5506         stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5507 }
5508  
5509 /**********************************************************************
5510  *
5511  *  Update the VF board statistics counters.
5512  *
5513  **********************************************************************/
5514 static void
5515 igb_update_vf_stats_counters(struct adapter *adapter)
5516 {
5517         struct e1000_hw *hw = &adapter->hw;
5518         struct e1000_vf_stats   *stats;
5519
5520         if (adapter->link_speed == 0)
5521                 return;
5522
5523         stats = (struct e1000_vf_stats  *)adapter->stats;
5524
5525         UPDATE_VF_REG(E1000_VFGPRC,
5526             stats->last_gprc, stats->gprc);
5527         UPDATE_VF_REG(E1000_VFGORC,
5528             stats->last_gorc, stats->gorc);
5529         UPDATE_VF_REG(E1000_VFGPTC,
5530             stats->last_gptc, stats->gptc);
5531         UPDATE_VF_REG(E1000_VFGOTC,
5532             stats->last_gotc, stats->gotc);
5533         UPDATE_VF_REG(E1000_VFMPRC,
5534             stats->last_mprc, stats->mprc);
5535 }
5536
5537 /* Export a single 32-bit register via a read-only sysctl. */
5538 static int
5539 igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5540 {
5541         struct adapter *adapter;
5542         u_int val;
5543
5544         adapter = oidp->oid_arg1;
5545         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5546         return (sysctl_handle_int(oidp, &val, 0, req));
5547 }
5548
5549 /*
5550 **  Tuneable interrupt rate handler
5551 */
5552 static int
5553 igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5554 {
5555         struct igb_queue        *que = ((struct igb_queue *)oidp->oid_arg1);
5556         int                     error;
5557         u32                     reg, usec, rate;
5558                         
5559         reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5560         usec = ((reg & 0x7FFC) >> 2);
5561         if (usec > 0)
5562                 rate = 1000000 / usec;
5563         else
5564                 rate = 0;
5565         error = sysctl_handle_int(oidp, &rate, 0, req);
5566         if (error || !req->newptr)
5567                 return error;
5568         return 0;
5569 }
5570
5571 /*
5572  * Add sysctl variables, one per statistic, to the system.
5573  */
5574 static void
5575 igb_add_hw_stats(struct adapter *adapter)
5576 {
5577         device_t dev = adapter->dev;
5578
5579         struct tx_ring *txr = adapter->tx_rings;
5580         struct rx_ring *rxr = adapter->rx_rings;
5581
5582         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5583         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5584         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5585         struct e1000_hw_stats *stats = adapter->stats;
5586
5587         struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5588         struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5589
5590 #define QUEUE_NAME_LEN 32
5591         char namebuf[QUEUE_NAME_LEN];
5592
5593         /* Driver Statistics */
5594         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq", 
5595                         CTLFLAG_RD, &adapter->link_irq, 0,
5596                         "Link MSIX IRQ Handled");
5597         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5598                         CTLFLAG_RD, &adapter->dropped_pkts,
5599                         "Driver dropped packets");
5600         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5601                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5602                         "Driver tx dma failure in xmit");
5603         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5604                         CTLFLAG_RD, &adapter->rx_overruns,
5605                         "RX overruns");
5606         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5607                         CTLFLAG_RD, &adapter->watchdog_events,
5608                         "Watchdog timeouts");
5609
5610         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", 
5611                         CTLFLAG_RD, &adapter->device_control,
5612                         "Device Control Register");
5613         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", 
5614                         CTLFLAG_RD, &adapter->rx_control,
5615                         "Receiver Control Register");
5616         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", 
5617                         CTLFLAG_RD, &adapter->int_mask,
5618                         "Interrupt Mask");
5619         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", 
5620                         CTLFLAG_RD, &adapter->eint_mask,
5621                         "Extended Interrupt Mask");
5622         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", 
5623                         CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5624                         "Transmit Buffer Packet Allocation");
5625         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", 
5626                         CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5627                         "Receive Buffer Packet Allocation");
5628         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5629                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5630                         "Flow Control High Watermark");
5631         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5632                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5633                         "Flow Control Low Watermark");
5634
5635         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5636                 struct lro_ctrl *lro = &rxr->lro;
5637
5638                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5639                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5640                                             CTLFLAG_RD, NULL, "Queue Name");
5641                 queue_list = SYSCTL_CHILDREN(queue_node);
5642
5643                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", 
5644                                 CTLFLAG_RD, &adapter->queues[i],
5645                                 sizeof(&adapter->queues[i]),
5646                                 igb_sysctl_interrupt_rate_handler,
5647                                 "IU", "Interrupt Rate");
5648
5649                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5650                                 CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5651                                 igb_sysctl_reg_handler, "IU",
5652                                 "Transmit Descriptor Head");
5653                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5654                                 CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5655                                 igb_sysctl_reg_handler, "IU",
5656                                 "Transmit Descriptor Tail");
5657                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5658                                 CTLFLAG_RD, &txr->no_desc_avail,
5659                                 "Queue No Descriptor Available");
5660                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5661                                 CTLFLAG_RD, &txr->total_packets,
5662                                 "Queue Packets Transmitted");
5663
5664                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5665                                 CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5666                                 igb_sysctl_reg_handler, "IU",
5667                                 "Receive Descriptor Head");
5668                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5669                                 CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5670                                 igb_sysctl_reg_handler, "IU",
5671                                 "Receive Descriptor Tail");
5672                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5673                                 CTLFLAG_RD, &rxr->rx_packets,
5674                                 "Queue Packets Received");
5675                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5676                                 CTLFLAG_RD, &rxr->rx_bytes,
5677                                 "Queue Bytes Received");
5678                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5679                                 CTLFLAG_RD, &lro->lro_queued, 0,
5680                                 "LRO Queued");
5681                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5682                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5683                                 "LRO Flushed");
5684         }
5685
5686         /* MAC stats get their own sub node */
5687
5688         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5689                                     CTLFLAG_RD, NULL, "MAC Statistics");
5690         stat_list = SYSCTL_CHILDREN(stat_node);
5691
5692         /*
5693         ** VF adapter has a very limited set of stats
5694         ** since its not managing the metal, so to speak.
5695         */
5696         if (adapter->vf_ifp) {
5697         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5698                         CTLFLAG_RD, &stats->gprc,
5699                         "Good Packets Received");
5700         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5701                         CTLFLAG_RD, &stats->gptc,
5702                         "Good Packets Transmitted");
5703         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5704                         CTLFLAG_RD, &stats->gorc, 
5705                         "Good Octets Received"); 
5706         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5707                         CTLFLAG_RD, &stats->gotc, 
5708                         "Good Octets Transmitted"); 
5709         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5710                         CTLFLAG_RD, &stats->mprc,
5711                         "Multicast Packets Received");
5712                 return;
5713         }
5714
5715         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
5716                         CTLFLAG_RD, &stats->ecol,
5717                         "Excessive collisions");
5718         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
5719                         CTLFLAG_RD, &stats->scc,
5720                         "Single collisions");
5721         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
5722                         CTLFLAG_RD, &stats->mcc,
5723                         "Multiple collisions");
5724         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
5725                         CTLFLAG_RD, &stats->latecol,
5726                         "Late collisions");
5727         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
5728                         CTLFLAG_RD, &stats->colc,
5729                         "Collision Count");
5730         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5731                         CTLFLAG_RD, &stats->symerrs,
5732                         "Symbol Errors");
5733         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5734                         CTLFLAG_RD, &stats->sec,
5735                         "Sequence Errors");
5736         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5737                         CTLFLAG_RD, &stats->dc,
5738                         "Defer Count");
5739         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5740                         CTLFLAG_RD, &stats->mpc,
5741                         "Missed Packets");
5742         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5743                         CTLFLAG_RD, &stats->rnbc,
5744                         "Receive No Buffers");
5745         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5746                         CTLFLAG_RD, &stats->ruc,
5747                         "Receive Undersize");
5748         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5749                         CTLFLAG_RD, &stats->rfc,
5750                         "Fragmented Packets Received ");
5751         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5752                         CTLFLAG_RD, &stats->roc,
5753                         "Oversized Packets Received");
5754         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5755                         CTLFLAG_RD, &stats->rjc,
5756                         "Recevied Jabber");
5757         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5758                         CTLFLAG_RD, &stats->rxerrc,
5759                         "Receive Errors");
5760         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5761                         CTLFLAG_RD, &stats->crcerrs,
5762                         "CRC errors");
5763         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5764                         CTLFLAG_RD, &stats->algnerrc,
5765                         "Alignment Errors");
5766         /* On 82575 these are collision counts */
5767         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5768                         CTLFLAG_RD, &stats->cexterr,
5769                         "Collision/Carrier extension errors");
5770         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5771                         CTLFLAG_RD, &stats->xonrxc,
5772                         "XON Received");
5773         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5774                         CTLFLAG_RD, &stats->xontxc,
5775                         "XON Transmitted");
5776         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5777                         CTLFLAG_RD, &stats->xoffrxc,
5778                         "XOFF Received");
5779         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5780                         CTLFLAG_RD, &stats->xofftxc,
5781                         "XOFF Transmitted");
5782         /* Packet Reception Stats */
5783         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5784                         CTLFLAG_RD, &stats->tpr,
5785                         "Total Packets Received ");
5786         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5787                         CTLFLAG_RD, &stats->gprc,
5788                         "Good Packets Received");
5789         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5790                         CTLFLAG_RD, &stats->bprc,
5791                         "Broadcast Packets Received");
5792         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5793                         CTLFLAG_RD, &stats->mprc,
5794                         "Multicast Packets Received");
5795         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5796                         CTLFLAG_RD, &stats->prc64,
5797                         "64 byte frames received ");
5798         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5799                         CTLFLAG_RD, &stats->prc127,
5800                         "65-127 byte frames received");
5801         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5802                         CTLFLAG_RD, &stats->prc255,
5803                         "128-255 byte frames received");
5804         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5805                         CTLFLAG_RD, &stats->prc511,
5806                         "256-511 byte frames received");
5807         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5808                         CTLFLAG_RD, &stats->prc1023,
5809                         "512-1023 byte frames received");
5810         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5811                         CTLFLAG_RD, &stats->prc1522,
5812                         "1023-1522 byte frames received");
5813         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5814                         CTLFLAG_RD, &stats->gorc, 
5815                         "Good Octets Received"); 
5816
5817         /* Packet Transmission Stats */
5818         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5819                         CTLFLAG_RD, &stats->gotc, 
5820                         "Good Octets Transmitted"); 
5821         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5822                         CTLFLAG_RD, &stats->tpt,
5823                         "Total Packets Transmitted");
5824         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5825                         CTLFLAG_RD, &stats->gptc,
5826                         "Good Packets Transmitted");
5827         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5828                         CTLFLAG_RD, &stats->bptc,
5829                         "Broadcast Packets Transmitted");
5830         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5831                         CTLFLAG_RD, &stats->mptc,
5832                         "Multicast Packets Transmitted");
5833         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5834                         CTLFLAG_RD, &stats->ptc64,
5835                         "64 byte frames transmitted ");
5836         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5837                         CTLFLAG_RD, &stats->ptc127,
5838                         "65-127 byte frames transmitted");
5839         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5840                         CTLFLAG_RD, &stats->ptc255,
5841                         "128-255 byte frames transmitted");
5842         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5843                         CTLFLAG_RD, &stats->ptc511,
5844                         "256-511 byte frames transmitted");
5845         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5846                         CTLFLAG_RD, &stats->ptc1023,
5847                         "512-1023 byte frames transmitted");
5848         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5849                         CTLFLAG_RD, &stats->ptc1522,
5850                         "1024-1522 byte frames transmitted");
5851         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5852                         CTLFLAG_RD, &stats->tsctc,
5853                         "TSO Contexts Transmitted");
5854         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5855                         CTLFLAG_RD, &stats->tsctfc,
5856                         "TSO Contexts Failed");
5857
5858
5859         /* Interrupt Stats */
5860
5861         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5862                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5863         int_list = SYSCTL_CHILDREN(int_node);
5864
5865         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5866                         CTLFLAG_RD, &stats->iac,
5867                         "Interrupt Assertion Count");
5868
5869         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5870                         CTLFLAG_RD, &stats->icrxptc,
5871                         "Interrupt Cause Rx Pkt Timer Expire Count");
5872
5873         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5874                         CTLFLAG_RD, &stats->icrxatc,
5875                         "Interrupt Cause Rx Abs Timer Expire Count");
5876
5877         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5878                         CTLFLAG_RD, &stats->ictxptc,
5879                         "Interrupt Cause Tx Pkt Timer Expire Count");
5880
5881         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5882                         CTLFLAG_RD, &stats->ictxatc,
5883                         "Interrupt Cause Tx Abs Timer Expire Count");
5884
5885         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5886                         CTLFLAG_RD, &stats->ictxqec,
5887                         "Interrupt Cause Tx Queue Empty Count");
5888
5889         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5890                         CTLFLAG_RD, &stats->ictxqmtc,
5891                         "Interrupt Cause Tx Queue Min Thresh Count");
5892
5893         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5894                         CTLFLAG_RD, &stats->icrxdmtc,
5895                         "Interrupt Cause Rx Desc Min Thresh Count");
5896
5897         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5898                         CTLFLAG_RD, &stats->icrxoc,
5899                         "Interrupt Cause Receiver Overrun Count");
5900
5901         /* Host to Card Stats */
5902
5903         host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", 
5904                                     CTLFLAG_RD, NULL, 
5905                                     "Host to Card Statistics");
5906
5907         host_list = SYSCTL_CHILDREN(host_node);
5908
5909         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5910                         CTLFLAG_RD, &stats->cbtmpc,
5911                         "Circuit Breaker Tx Packet Count");
5912
5913         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5914                         CTLFLAG_RD, &stats->htdpmc,
5915                         "Host Transmit Discarded Packets");
5916
5917         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5918                         CTLFLAG_RD, &stats->rpthc,
5919                         "Rx Packets To Host");
5920
5921         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5922                         CTLFLAG_RD, &stats->cbrmpc,
5923                         "Circuit Breaker Rx Packet Count");
5924
5925         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5926                         CTLFLAG_RD, &stats->cbrdpc,
5927                         "Circuit Breaker Rx Dropped Count");
5928
5929         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5930                         CTLFLAG_RD, &stats->hgptc,
5931                         "Host Good Packets Tx Count");
5932
5933         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5934                         CTLFLAG_RD, &stats->htcbdpc,
5935                         "Host Tx Circuit Breaker Dropped Count");
5936
5937         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5938                         CTLFLAG_RD, &stats->hgorc,
5939                         "Host Good Octets Received Count");
5940
5941         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5942                         CTLFLAG_RD, &stats->hgotc,
5943                         "Host Good Octets Transmit Count");
5944
5945         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5946                         CTLFLAG_RD, &stats->lenerrs,
5947                         "Length Errors");
5948
5949         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5950                         CTLFLAG_RD, &stats->scvpc,
5951                         "SerDes/SGMII Code Violation Pkt Count");
5952
5953         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5954                         CTLFLAG_RD, &stats->hrmpc,
5955                         "Header Redirection Missed Packet Count");
5956 }
5957
5958
5959 /**********************************************************************
5960  *
5961  *  This routine provides a way to dump out the adapter eeprom,
5962  *  often a useful debug/service tool. This only dumps the first
5963  *  32 words, stuff that matters is in that extent.
5964  *
5965  **********************************************************************/
5966 static int
5967 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5968 {
5969         struct adapter *adapter;
5970         int error;
5971         int result;
5972
5973         result = -1;
5974         error = sysctl_handle_int(oidp, &result, 0, req);
5975
5976         if (error || !req->newptr)
5977                 return (error);
5978
5979         /*
5980          * This value will cause a hex dump of the
5981          * first 32 16-bit words of the EEPROM to
5982          * the screen.
5983          */
5984         if (result == 1) {
5985                 adapter = (struct adapter *)arg1;
5986                 igb_print_nvm_info(adapter);
5987         }
5988
5989         return (error);
5990 }
5991
5992 static void
5993 igb_print_nvm_info(struct adapter *adapter)
5994 {
5995         u16     eeprom_data;
5996         int     i, j, row = 0;
5997
5998         /* Its a bit crude, but it gets the job done */
5999         printf("\nInterface EEPROM Dump:\n");
6000         printf("Offset\n0x0000  ");
6001         for (i = 0, j = 0; i < 32; i++, j++) {
6002                 if (j == 8) { /* Make the offset block */
6003                         j = 0; ++row;
6004                         printf("\n0x00%x0  ",row);
6005                 }
6006                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6007                 printf("%04x ", eeprom_data);
6008         }
6009         printf("\n");
6010 }
6011
6012 static void
6013 igb_set_sysctl_value(struct adapter *adapter, const char *name,
6014         const char *description, int *limit, int value)
6015 {
6016         *limit = value;
6017         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6018             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6019             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
6020 }
6021
6022 /*
6023 ** Set flow control using sysctl:
6024 ** Flow control values:
6025 **      0 - off
6026 **      1 - rx pause
6027 **      2 - tx pause
6028 **      3 - full
6029 */
6030 static int
6031 igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6032 {
6033         int             error;
6034         static int      input = 3; /* default is full */
6035         struct adapter  *adapter = (struct adapter *) arg1;
6036
6037         error = sysctl_handle_int(oidp, &input, 0, req);
6038
6039         if ((error) || (req->newptr == NULL))
6040                 return (error);
6041
6042         switch (input) {
6043                 case e1000_fc_rx_pause:
6044                 case e1000_fc_tx_pause:
6045                 case e1000_fc_full:
6046                 case e1000_fc_none:
6047                         adapter->hw.fc.requested_mode = input;
6048                         adapter->fc = input;
6049                         break;
6050                 default:
6051                         /* Do nothing */
6052                         return (error);
6053         }
6054
6055         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6056         e1000_force_mac_fc(&adapter->hw);
6057         return (error);
6058 }
6059
6060 /*
6061 ** Manage DMA Coalesce:
6062 ** Control values:
6063 **      0/1 - off/on
6064 **      Legal timer values are:
6065 **      250,500,1000-10000 in thousands
6066 */
6067 static int
6068 igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6069 {
6070         struct adapter *adapter = (struct adapter *) arg1;
6071         int             error;
6072
6073         error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6074
6075         if ((error) || (req->newptr == NULL))
6076                 return (error);
6077
6078         switch (adapter->dmac) {
6079                 case 0:
6080                         /*Disabling */
6081                         break;
6082                 case 1: /* Just enable and use default */
6083                         adapter->dmac = 1000;
6084                         break;
6085                 case 250:
6086                 case 500:
6087                 case 1000:
6088                 case 2000:
6089                 case 3000:
6090                 case 4000:
6091                 case 5000:
6092                 case 6000:
6093                 case 7000:
6094                 case 8000:
6095                 case 9000:
6096                 case 10000:
6097                         /* Legal values - allow */
6098                         break;
6099                 default:
6100                         /* Do nothing, illegal value */
6101                         adapter->dmac = 0;
6102                         return (EINVAL);
6103         }
6104         /* Reinit the interface */
6105         igb_init(adapter);
6106         return (error);
6107 }
6108
6109 /*
6110 ** Manage Energy Efficient Ethernet:
6111 ** Control values:
6112 **     0/1 - enabled/disabled
6113 */
6114 static int
6115 igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6116 {
6117         struct adapter  *adapter = (struct adapter *) arg1;
6118         int             error, value;
6119
6120         value = adapter->hw.dev_spec._82575.eee_disable;
6121         error = sysctl_handle_int(oidp, &value, 0, req);
6122         if (error || req->newptr == NULL)
6123                 return (error);
6124         IGB_CORE_LOCK(adapter);
6125         adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6126         igb_init_locked(adapter);
6127         IGB_CORE_UNLOCK(adapter);
6128         return (0);
6129 }