]> CyberLeo.Net >> Repos - FreeBSD/releng/10.2.git/blob - sys/dev/e1000/if_igb.c
- Copy stable/10@285827 to releng/10.2 in preparation for 10.2-RC1
[FreeBSD/releng/10.2.git] / sys / dev / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2013, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38
39 #ifdef HAVE_KERNEL_OPTION_HEADERS
40 #include "opt_device_polling.h"
41 #include "opt_altq.h"
42 #endif
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifndef IGB_LEGACY_TX
47 #include <sys/buf_ring.h>
48 #endif
49 #include <sys/bus.h>
50 #include <sys/endian.h>
51 #include <sys/kernel.h>
52 #include <sys/kthread.h>
53 #include <sys/malloc.h>
54 #include <sys/mbuf.h>
55 #include <sys/module.h>
56 #include <sys/rman.h>
57 #include <sys/socket.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/taskqueue.h>
61 #include <sys/eventhandler.h>
62 #include <sys/pcpu.h>
63 #include <sys/smp.h>
64 #include <machine/smp.h>
65 #include <machine/bus.h>
66 #include <machine/resource.h>
67
68 #include <net/bpf.h>
69 #include <net/ethernet.h>
70 #include <net/if.h>
71 #include <net/if_arp.h>
72 #include <net/if_dl.h>
73 #include <net/if_media.h>
74
75 #include <net/if_types.h>
76 #include <net/if_vlan_var.h>
77
78 #include <netinet/in_systm.h>
79 #include <netinet/in.h>
80 #include <netinet/if_ether.h>
81 #include <netinet/ip.h>
82 #include <netinet/ip6.h>
83 #include <netinet/tcp.h>
84 #include <netinet/tcp_lro.h>
85 #include <netinet/udp.h>
86
87 #include <machine/in_cksum.h>
88 #include <dev/led/led.h>
89 #include <dev/pci/pcivar.h>
90 #include <dev/pci/pcireg.h>
91
92 #include "e1000_api.h"
93 #include "e1000_82575.h"
94 #include "if_igb.h"
95
96 /*********************************************************************
97  *  Set this to one to display debug statistics
98  *********************************************************************/
99 int     igb_display_debug_stats = 0;
100
101 /*********************************************************************
102  *  Driver version:
103  *********************************************************************/
104 char igb_driver_version[] = "version - 2.4.0";
105
106
107 /*********************************************************************
108  *  PCI Device ID Table
109  *
110  *  Used by probe to select devices to load on
111  *  Last field stores an index into e1000_strings
112  *  Last entry must be all 0s
113  *
114  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
115  *********************************************************************/
116
117 static igb_vendor_info_t igb_vendor_info_array[] =
118 {
119         { 0x8086, E1000_DEV_ID_82575EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
123                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82576,           PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82576_NS,        PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82576_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82576_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
130                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
132                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
134                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82576_VF,        PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82580_COPPER,    PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82580_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_82580_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_82580_SGMII,     PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
141                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
143                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_DH89XXCC_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,  PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_DH89XXCC_SFP,    PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
148                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_I350_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_I350_FIBER,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_I350_SERDES,     PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_I350_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_I350_VF,         PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_I210_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_I210_COPPER_IT,  PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
157                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_I210_COPPER_FLASHLESS,
159                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_I210_SERDES_FLASHLESS,
161                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_I210_FIBER,      PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_I210_SERDES,     PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_I210_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_I211_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_I354_BACKPLANE_1GBPS,
167                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS,
169                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_I354_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
171         /* required last entry */
172         { 0, 0, 0, 0, 0}
173 };
174
175 /*********************************************************************
176  *  Table of branding strings for all supported NICs.
177  *********************************************************************/
178
179 static char *igb_strings[] = {
180         "Intel(R) PRO/1000 Network Connection"
181 };
182
183 /*********************************************************************
184  *  Function prototypes
185  *********************************************************************/
186 static int      igb_probe(device_t);
187 static int      igb_attach(device_t);
188 static int      igb_detach(device_t);
189 static int      igb_shutdown(device_t);
190 static int      igb_suspend(device_t);
191 static int      igb_resume(device_t);
192 #ifndef IGB_LEGACY_TX
193 static int      igb_mq_start(struct ifnet *, struct mbuf *);
194 static int      igb_mq_start_locked(struct ifnet *, struct tx_ring *);
195 static void     igb_qflush(struct ifnet *);
196 static void     igb_deferred_mq_start(void *, int);
197 #else
198 static void     igb_start(struct ifnet *);
199 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
200 #endif
201 static int      igb_ioctl(struct ifnet *, u_long, caddr_t);
202 static void     igb_init(void *);
203 static void     igb_init_locked(struct adapter *);
204 static void     igb_stop(void *);
205 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
206 static int      igb_media_change(struct ifnet *);
207 static void     igb_identify_hardware(struct adapter *);
208 static int      igb_allocate_pci_resources(struct adapter *);
209 static int      igb_allocate_msix(struct adapter *);
210 static int      igb_allocate_legacy(struct adapter *);
211 static int      igb_setup_msix(struct adapter *);
212 static void     igb_free_pci_resources(struct adapter *);
213 static void     igb_local_timer(void *);
214 static void     igb_reset(struct adapter *);
215 static int      igb_setup_interface(device_t, struct adapter *);
216 static int      igb_allocate_queues(struct adapter *);
217 static void     igb_configure_queues(struct adapter *);
218
219 static int      igb_allocate_transmit_buffers(struct tx_ring *);
220 static void     igb_setup_transmit_structures(struct adapter *);
221 static void     igb_setup_transmit_ring(struct tx_ring *);
222 static void     igb_initialize_transmit_units(struct adapter *);
223 static void     igb_free_transmit_structures(struct adapter *);
224 static void     igb_free_transmit_buffers(struct tx_ring *);
225
226 static int      igb_allocate_receive_buffers(struct rx_ring *);
227 static int      igb_setup_receive_structures(struct adapter *);
228 static int      igb_setup_receive_ring(struct rx_ring *);
229 static void     igb_initialize_receive_units(struct adapter *);
230 static void     igb_free_receive_structures(struct adapter *);
231 static void     igb_free_receive_buffers(struct rx_ring *);
232 static void     igb_free_receive_ring(struct rx_ring *);
233
234 static void     igb_enable_intr(struct adapter *);
235 static void     igb_disable_intr(struct adapter *);
236 static void     igb_update_stats_counters(struct adapter *);
237 static bool     igb_txeof(struct tx_ring *);
238
239 static __inline void igb_rx_discard(struct rx_ring *, int);
240 static __inline void igb_rx_input(struct rx_ring *,
241                     struct ifnet *, struct mbuf *, u32);
242
243 static bool     igb_rxeof(struct igb_queue *, int, int *);
244 static void     igb_rx_checksum(u32, struct mbuf *, u32);
245 static int      igb_tx_ctx_setup(struct tx_ring *,
246                     struct mbuf *, u32 *, u32 *);
247 static int      igb_tso_setup(struct tx_ring *,
248                     struct mbuf *, u32 *, u32 *);
249 static void     igb_set_promisc(struct adapter *);
250 static void     igb_disable_promisc(struct adapter *);
251 static void     igb_set_multi(struct adapter *);
252 static void     igb_update_link_status(struct adapter *);
253 static void     igb_refresh_mbufs(struct rx_ring *, int);
254
255 static void     igb_register_vlan(void *, struct ifnet *, u16);
256 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
257 static void     igb_setup_vlan_hw_support(struct adapter *);
258
259 static int      igb_xmit(struct tx_ring *, struct mbuf **);
260 static int      igb_dma_malloc(struct adapter *, bus_size_t,
261                     struct igb_dma_alloc *, int);
262 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
263 static int      igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
264 static void     igb_print_nvm_info(struct adapter *);
265 static int      igb_is_valid_ether_addr(u8 *);
266 static void     igb_add_hw_stats(struct adapter *);
267
268 static void     igb_vf_init_stats(struct adapter *);
269 static void     igb_update_vf_stats_counters(struct adapter *);
270
271 /* Management and WOL Support */
272 static void     igb_init_manageability(struct adapter *);
273 static void     igb_release_manageability(struct adapter *);
274 static void     igb_get_hw_control(struct adapter *);
275 static void     igb_release_hw_control(struct adapter *);
276 static void     igb_enable_wakeup(device_t);
277 static void     igb_led_func(void *, int);
278
279 static int      igb_irq_fast(void *);
280 static void     igb_msix_que(void *);
281 static void     igb_msix_link(void *);
282 static void     igb_handle_que(void *context, int pending);
283 static void     igb_handle_link(void *context, int pending);
284 static void     igb_handle_link_locked(struct adapter *);
285
286 static void     igb_set_sysctl_value(struct adapter *, const char *,
287                     const char *, int *, int);
288 static int      igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
289 static int      igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
290 static int      igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
291
292 #ifdef DEVICE_POLLING
293 static poll_handler_t igb_poll;
294 #endif /* POLLING */
295
296 /*********************************************************************
297  *  FreeBSD Device Interface Entry Points
298  *********************************************************************/
299
300 static device_method_t igb_methods[] = {
301         /* Device interface */
302         DEVMETHOD(device_probe, igb_probe),
303         DEVMETHOD(device_attach, igb_attach),
304         DEVMETHOD(device_detach, igb_detach),
305         DEVMETHOD(device_shutdown, igb_shutdown),
306         DEVMETHOD(device_suspend, igb_suspend),
307         DEVMETHOD(device_resume, igb_resume),
308         DEVMETHOD_END
309 };
310
311 static driver_t igb_driver = {
312         "igb", igb_methods, sizeof(struct adapter),
313 };
314
315 static devclass_t igb_devclass;
316 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
317 MODULE_DEPEND(igb, pci, 1, 1, 1);
318 MODULE_DEPEND(igb, ether, 1, 1, 1);
319
320 /*********************************************************************
321  *  Tunable default values.
322  *********************************************************************/
323
324 static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
325
326 /* Descriptor defaults */
327 static int igb_rxd = IGB_DEFAULT_RXD;
328 static int igb_txd = IGB_DEFAULT_TXD;
329 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
330 TUNABLE_INT("hw.igb.txd", &igb_txd);
331 SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
332     "Number of receive descriptors per queue");
333 SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
334     "Number of transmit descriptors per queue");
335
336 /*
337 ** AIM: Adaptive Interrupt Moderation
338 ** which means that the interrupt rate
339 ** is varied over time based on the
340 ** traffic for that interrupt vector
341 */
342 static int igb_enable_aim = TRUE;
343 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
344 SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
345     "Enable adaptive interrupt moderation");
346
347 /*
348  * MSIX should be the default for best performance,
349  * but this allows it to be forced off for testing.
350  */         
351 static int igb_enable_msix = 1;
352 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
353 SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
354     "Enable MSI-X interrupts");
355
356 /*
357 ** Tuneable Interrupt rate
358 */
359 static int igb_max_interrupt_rate = 8000;
360 TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
361 SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
362     &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
363
364 #ifndef IGB_LEGACY_TX
365 /*
366 ** Tuneable number of buffers in the buf-ring (drbr_xxx)
367 */
368 static int igb_buf_ring_size = IGB_BR_SIZE;
369 TUNABLE_INT("hw.igb.buf_ring_size", &igb_buf_ring_size);
370 SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
371     &igb_buf_ring_size, 0, "Size of the bufring");
372 #endif
373
374 /*
375 ** Header split causes the packet header to
376 ** be dma'd to a seperate mbuf from the payload.
377 ** this can have memory alignment benefits. But
378 ** another plus is that small packets often fit
379 ** into the header and thus use no cluster. Its
380 ** a very workload dependent type feature.
381 */
382 static int igb_header_split = FALSE;
383 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
384 SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
385     "Enable receive mbuf header split");
386
387 /*
388 ** This will autoconfigure based on the
389 ** number of CPUs and max supported
390 ** MSIX messages if left at 0.
391 */
392 static int igb_num_queues = 0;
393 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
394 SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
395     "Number of queues to configure, 0 indicates autoconfigure");
396
397 /*
398 ** Global variable to store last used CPU when binding queues
399 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
400 ** queue is bound to a cpu.
401 */
402 static int igb_last_bind_cpu = -1;
403
404 /* How many packets rxeof tries to clean at a time */
405 static int igb_rx_process_limit = 100;
406 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
407 SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
408     &igb_rx_process_limit, 0,
409     "Maximum number of received packets to process at a time, -1 means unlimited");
410
411 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
412 #include <dev/netmap/if_igb_netmap.h>
413 #endif /* DEV_NETMAP */
414 /*********************************************************************
415  *  Device identification routine
416  *
417  *  igb_probe determines if the driver should be loaded on
418  *  adapter based on PCI vendor/device id of the adapter.
419  *
420  *  return BUS_PROBE_DEFAULT on success, positive on failure
421  *********************************************************************/
422
423 static int
424 igb_probe(device_t dev)
425 {
426         char            adapter_name[60];
427         uint16_t        pci_vendor_id = 0;
428         uint16_t        pci_device_id = 0;
429         uint16_t        pci_subvendor_id = 0;
430         uint16_t        pci_subdevice_id = 0;
431         igb_vendor_info_t *ent;
432
433         INIT_DEBUGOUT("igb_probe: begin");
434
435         pci_vendor_id = pci_get_vendor(dev);
436         if (pci_vendor_id != IGB_VENDOR_ID)
437                 return (ENXIO);
438
439         pci_device_id = pci_get_device(dev);
440         pci_subvendor_id = pci_get_subvendor(dev);
441         pci_subdevice_id = pci_get_subdevice(dev);
442
443         ent = igb_vendor_info_array;
444         while (ent->vendor_id != 0) {
445                 if ((pci_vendor_id == ent->vendor_id) &&
446                     (pci_device_id == ent->device_id) &&
447
448                     ((pci_subvendor_id == ent->subvendor_id) ||
449                     (ent->subvendor_id == PCI_ANY_ID)) &&
450
451                     ((pci_subdevice_id == ent->subdevice_id) ||
452                     (ent->subdevice_id == PCI_ANY_ID))) {
453                         sprintf(adapter_name, "%s %s",
454                                 igb_strings[ent->index],
455                                 igb_driver_version);
456                         device_set_desc_copy(dev, adapter_name);
457                         return (BUS_PROBE_DEFAULT);
458                 }
459                 ent++;
460         }
461
462         return (ENXIO);
463 }
464
465 /*********************************************************************
466  *  Device initialization routine
467  *
468  *  The attach entry point is called when the driver is being loaded.
469  *  This routine identifies the type of hardware, allocates all resources
470  *  and initializes the hardware.
471  *
472  *  return 0 on success, positive on failure
473  *********************************************************************/
474
475 static int
476 igb_attach(device_t dev)
477 {
478         struct adapter  *adapter;
479         int             error = 0;
480         u16             eeprom_data;
481
482         INIT_DEBUGOUT("igb_attach: begin");
483
484         if (resource_disabled("igb", device_get_unit(dev))) {
485                 device_printf(dev, "Disabled by device hint\n");
486                 return (ENXIO);
487         }
488
489         adapter = device_get_softc(dev);
490         adapter->dev = adapter->osdep.dev = dev;
491         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
492
493         /* SYSCTL stuff */
494         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
495             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
496             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
497             igb_sysctl_nvm_info, "I", "NVM Information");
498
499         igb_set_sysctl_value(adapter, "enable_aim",
500             "Interrupt Moderation", &adapter->enable_aim,
501             igb_enable_aim);
502
503         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
504             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
505             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
506             adapter, 0, igb_set_flowcntl, "I", "Flow Control");
507
508         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
509
510         /* Determine hardware and mac info */
511         igb_identify_hardware(adapter);
512
513         /* Setup PCI resources */
514         if (igb_allocate_pci_resources(adapter)) {
515                 device_printf(dev, "Allocation of PCI resources failed\n");
516                 error = ENXIO;
517                 goto err_pci;
518         }
519
520         /* Do Shared Code initialization */
521         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
522                 device_printf(dev, "Setup of Shared code failed\n");
523                 error = ENXIO;
524                 goto err_pci;
525         }
526
527         e1000_get_bus_info(&adapter->hw);
528
529         /* Sysctl for limiting the amount of work done in the taskqueue */
530         igb_set_sysctl_value(adapter, "rx_processing_limit",
531             "max number of rx packets to process",
532             &adapter->rx_process_limit, igb_rx_process_limit);
533
534         /*
535          * Validate number of transmit and receive descriptors. It
536          * must not exceed hardware maximum, and must be multiple
537          * of E1000_DBA_ALIGN.
538          */
539         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
540             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
541                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
542                     IGB_DEFAULT_TXD, igb_txd);
543                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
544         } else
545                 adapter->num_tx_desc = igb_txd;
546         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
547             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
548                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
549                     IGB_DEFAULT_RXD, igb_rxd);
550                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
551         } else
552                 adapter->num_rx_desc = igb_rxd;
553
554         adapter->hw.mac.autoneg = DO_AUTO_NEG;
555         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
556         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
557
558         /* Copper options */
559         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
560                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
561                 adapter->hw.phy.disable_polarity_correction = FALSE;
562                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
563         }
564
565         /*
566          * Set the frame limits assuming
567          * standard ethernet sized frames.
568          */
569         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
570
571         /*
572         ** Allocate and Setup Queues
573         */
574         if (igb_allocate_queues(adapter)) {
575                 error = ENOMEM;
576                 goto err_pci;
577         }
578
579         /* Allocate the appropriate stats memory */
580         if (adapter->vf_ifp) {
581                 adapter->stats =
582                     (struct e1000_vf_stats *)malloc(sizeof \
583                     (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
584                 igb_vf_init_stats(adapter);
585         } else
586                 adapter->stats =
587                     (struct e1000_hw_stats *)malloc(sizeof \
588                     (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
589         if (adapter->stats == NULL) {
590                 device_printf(dev, "Can not allocate stats memory\n");
591                 error = ENOMEM;
592                 goto err_late;
593         }
594
595         /* Allocate multicast array memory. */
596         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
597             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
598         if (adapter->mta == NULL) {
599                 device_printf(dev, "Can not allocate multicast setup array\n");
600                 error = ENOMEM;
601                 goto err_late;
602         }
603
604         /* Some adapter-specific advanced features */
605         if (adapter->hw.mac.type >= e1000_i350) {
606                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
607                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
608                     OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
609                     adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
610                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
611                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
612                     OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
613                     adapter, 0, igb_sysctl_eee, "I",
614                     "Disable Energy Efficient Ethernet");
615                 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
616                         if (adapter->hw.mac.type == e1000_i354)
617                                 e1000_set_eee_i354(&adapter->hw);
618                         else
619                                 e1000_set_eee_i350(&adapter->hw);
620                 }
621         }
622
623         /*
624         ** Start from a known state, this is
625         ** important in reading the nvm and
626         ** mac from that.
627         */
628         e1000_reset_hw(&adapter->hw);
629
630         /* Make sure we have a good EEPROM before we read from it */
631         if (((adapter->hw.mac.type != e1000_i210) &&
632             (adapter->hw.mac.type != e1000_i211)) &&
633             (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
634                 /*
635                 ** Some PCI-E parts fail the first check due to
636                 ** the link being in sleep state, call it again,
637                 ** if it fails a second time its a real issue.
638                 */
639                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
640                         device_printf(dev,
641                             "The EEPROM Checksum Is Not Valid\n");
642                         error = EIO;
643                         goto err_late;
644                 }
645         }
646
647         /*
648         ** Copy the permanent MAC address out of the EEPROM
649         */
650         if (e1000_read_mac_addr(&adapter->hw) < 0) {
651                 device_printf(dev, "EEPROM read error while reading MAC"
652                     " address\n");
653                 error = EIO;
654                 goto err_late;
655         }
656         /* Check its sanity */
657         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
658                 device_printf(dev, "Invalid MAC address\n");
659                 error = EIO;
660                 goto err_late;
661         }
662
663         /* Setup OS specific network interface */
664         if (igb_setup_interface(dev, adapter) != 0)
665                 goto err_late;
666
667         /* Now get a good starting state */
668         igb_reset(adapter);
669
670         /* Initialize statistics */
671         igb_update_stats_counters(adapter);
672
673         adapter->hw.mac.get_link_status = 1;
674         igb_update_link_status(adapter);
675
676         /* Indicate SOL/IDER usage */
677         if (e1000_check_reset_block(&adapter->hw))
678                 device_printf(dev,
679                     "PHY reset is blocked due to SOL/IDER session.\n");
680
681         /* Determine if we have to control management hardware */
682         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
683
684         /*
685          * Setup Wake-on-Lan
686          */
687         /* APME bit in EEPROM is mapped to WUC.APME */
688         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
689         if (eeprom_data)
690                 adapter->wol = E1000_WUFC_MAG;
691
692         /* Register for VLAN events */
693         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
694              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
695         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
696              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
697
698         igb_add_hw_stats(adapter);
699
700         /* Tell the stack that the interface is not active */
701         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
702         adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
703
704         adapter->led_dev = led_create(igb_led_func, adapter,
705             device_get_nameunit(dev));
706
707         /* 
708         ** Configure Interrupts
709         */
710         if ((adapter->msix > 1) && (igb_enable_msix))
711                 error = igb_allocate_msix(adapter);
712         else /* MSI or Legacy */
713                 error = igb_allocate_legacy(adapter);
714         if (error)
715                 goto err_late;
716
717 #ifdef DEV_NETMAP
718         igb_netmap_attach(adapter);
719 #endif /* DEV_NETMAP */
720         INIT_DEBUGOUT("igb_attach: end");
721
722         return (0);
723
724 err_late:
725         igb_detach(dev);
726         igb_free_transmit_structures(adapter);
727         igb_free_receive_structures(adapter);
728         igb_release_hw_control(adapter);
729 err_pci:
730         igb_free_pci_resources(adapter);
731         if (adapter->ifp != NULL)
732                 if_free(adapter->ifp);
733         free(adapter->mta, M_DEVBUF);
734         IGB_CORE_LOCK_DESTROY(adapter);
735
736         return (error);
737 }
738
739 /*********************************************************************
740  *  Device removal routine
741  *
742  *  The detach entry point is called when the driver is being removed.
743  *  This routine stops the adapter and deallocates all the resources
744  *  that were allocated for driver operation.
745  *
746  *  return 0 on success, positive on failure
747  *********************************************************************/
748
749 static int
750 igb_detach(device_t dev)
751 {
752         struct adapter  *adapter = device_get_softc(dev);
753         struct ifnet    *ifp = adapter->ifp;
754
755         INIT_DEBUGOUT("igb_detach: begin");
756
757         /* Make sure VLANS are not using driver */
758         if (adapter->ifp->if_vlantrunk != NULL) {
759                 device_printf(dev,"Vlan in use, detach first\n");
760                 return (EBUSY);
761         }
762
763         ether_ifdetach(adapter->ifp);
764
765         if (adapter->led_dev != NULL)
766                 led_destroy(adapter->led_dev);
767
768 #ifdef DEVICE_POLLING
769         if (ifp->if_capenable & IFCAP_POLLING)
770                 ether_poll_deregister(ifp);
771 #endif
772
773         IGB_CORE_LOCK(adapter);
774         adapter->in_detach = 1;
775         igb_stop(adapter);
776         IGB_CORE_UNLOCK(adapter);
777
778         e1000_phy_hw_reset(&adapter->hw);
779
780         /* Give control back to firmware */
781         igb_release_manageability(adapter);
782         igb_release_hw_control(adapter);
783
784         if (adapter->wol) {
785                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
786                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
787                 igb_enable_wakeup(dev);
788         }
789
790         /* Unregister VLAN events */
791         if (adapter->vlan_attach != NULL)
792                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
793         if (adapter->vlan_detach != NULL)
794                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
795
796         callout_drain(&adapter->timer);
797
798 #ifdef DEV_NETMAP
799         netmap_detach(adapter->ifp);
800 #endif /* DEV_NETMAP */
801         igb_free_pci_resources(adapter);
802         bus_generic_detach(dev);
803         if_free(ifp);
804
805         igb_free_transmit_structures(adapter);
806         igb_free_receive_structures(adapter);
807         if (adapter->mta != NULL)
808                 free(adapter->mta, M_DEVBUF);
809
810         IGB_CORE_LOCK_DESTROY(adapter);
811
812         return (0);
813 }
814
815 /*********************************************************************
816  *
817  *  Shutdown entry point
818  *
819  **********************************************************************/
820
821 static int
822 igb_shutdown(device_t dev)
823 {
824         return igb_suspend(dev);
825 }
826
827 /*
828  * Suspend/resume device methods.
829  */
830 static int
831 igb_suspend(device_t dev)
832 {
833         struct adapter *adapter = device_get_softc(dev);
834
835         IGB_CORE_LOCK(adapter);
836
837         igb_stop(adapter);
838
839         igb_release_manageability(adapter);
840         igb_release_hw_control(adapter);
841
842         if (adapter->wol) {
843                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
844                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
845                 igb_enable_wakeup(dev);
846         }
847
848         IGB_CORE_UNLOCK(adapter);
849
850         return bus_generic_suspend(dev);
851 }
852
853 static int
854 igb_resume(device_t dev)
855 {
856         struct adapter *adapter = device_get_softc(dev);
857         struct tx_ring  *txr = adapter->tx_rings;
858         struct ifnet *ifp = adapter->ifp;
859
860         IGB_CORE_LOCK(adapter);
861         igb_init_locked(adapter);
862         igb_init_manageability(adapter);
863
864         if ((ifp->if_flags & IFF_UP) &&
865             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
866                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
867                         IGB_TX_LOCK(txr);
868 #ifndef IGB_LEGACY_TX
869                         /* Process the stack queue only if not depleted */
870                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
871                             !drbr_empty(ifp, txr->br))
872                                 igb_mq_start_locked(ifp, txr);
873 #else
874                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
875                                 igb_start_locked(txr, ifp);
876 #endif
877                         IGB_TX_UNLOCK(txr);
878                 }
879         }
880         IGB_CORE_UNLOCK(adapter);
881
882         return bus_generic_resume(dev);
883 }
884
885
886 #ifdef IGB_LEGACY_TX
887
888 /*********************************************************************
889  *  Transmit entry point
890  *
891  *  igb_start is called by the stack to initiate a transmit.
892  *  The driver will remain in this routine as long as there are
893  *  packets to transmit and transmit resources are available.
894  *  In case resources are not available stack is notified and
895  *  the packet is requeued.
896  **********************************************************************/
897
898 static void
899 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
900 {
901         struct adapter  *adapter = ifp->if_softc;
902         struct mbuf     *m_head;
903
904         IGB_TX_LOCK_ASSERT(txr);
905
906         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
907             IFF_DRV_RUNNING)
908                 return;
909         if (!adapter->link_active)
910                 return;
911
912         /* Call cleanup if number of TX descriptors low */
913         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
914                 igb_txeof(txr);
915
916         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
917                 if (txr->tx_avail <= IGB_MAX_SCATTER) {
918                         txr->queue_status |= IGB_QUEUE_DEPLETED;
919                         break;
920                 }
921                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
922                 if (m_head == NULL)
923                         break;
924                 /*
925                  *  Encapsulation can modify our pointer, and or make it
926                  *  NULL on failure.  In that event, we can't requeue.
927                  */
928                 if (igb_xmit(txr, &m_head)) {
929                         if (m_head != NULL)
930                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
931                         if (txr->tx_avail <= IGB_MAX_SCATTER)
932                                 txr->queue_status |= IGB_QUEUE_DEPLETED;
933                         break;
934                 }
935
936                 /* Send a copy of the frame to the BPF listener */
937                 ETHER_BPF_MTAP(ifp, m_head);
938
939                 /* Set watchdog on */
940                 txr->watchdog_time = ticks;
941                 txr->queue_status |= IGB_QUEUE_WORKING;
942         }
943 }
944  
945 /*
946  * Legacy TX driver routine, called from the
947  * stack, always uses tx[0], and spins for it.
948  * Should not be used with multiqueue tx
949  */
950 static void
951 igb_start(struct ifnet *ifp)
952 {
953         struct adapter  *adapter = ifp->if_softc;
954         struct tx_ring  *txr = adapter->tx_rings;
955
956         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
957                 IGB_TX_LOCK(txr);
958                 igb_start_locked(txr, ifp);
959                 IGB_TX_UNLOCK(txr);
960         }
961         return;
962 }
963
964 #else /* ~IGB_LEGACY_TX */
965
966 /*
967 ** Multiqueue Transmit Entry:
968 **  quick turnaround to the stack
969 **
970 */
971 static int
972 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
973 {
974         struct adapter          *adapter = ifp->if_softc;
975         struct igb_queue        *que;
976         struct tx_ring          *txr;
977         int                     i, err = 0;
978
979         /* Which queue to use */
980         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
981                 i = m->m_pkthdr.flowid % adapter->num_queues;
982         else
983                 i = curcpu % adapter->num_queues;
984         txr = &adapter->tx_rings[i];
985         que = &adapter->queues[i];
986
987         err = drbr_enqueue(ifp, txr->br, m);
988         if (err)
989                 return (err);
990         if (IGB_TX_TRYLOCK(txr)) {
991                 igb_mq_start_locked(ifp, txr);
992                 IGB_TX_UNLOCK(txr);
993         } else
994                 taskqueue_enqueue(que->tq, &txr->txq_task);
995
996         return (0);
997 }
998
999 static int
1000 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1001 {
1002         struct adapter  *adapter = txr->adapter;
1003         struct mbuf     *next;
1004         int             err = 0, enq = 0;
1005
1006         IGB_TX_LOCK_ASSERT(txr);
1007
1008         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
1009             adapter->link_active == 0)
1010                 return (ENETDOWN);
1011
1012
1013         /* Process the queue */
1014         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1015                 if ((err = igb_xmit(txr, &next)) != 0) {
1016                         if (next == NULL) {
1017                                 /* It was freed, move forward */
1018                                 drbr_advance(ifp, txr->br);
1019                         } else {
1020                                 /* 
1021                                  * Still have one left, it may not be
1022                                  * the same since the transmit function
1023                                  * may have changed it.
1024                                  */
1025                                 drbr_putback(ifp, txr->br, next);
1026                         }
1027                         break;
1028                 }
1029                 drbr_advance(ifp, txr->br);
1030                 enq++;
1031                 ifp->if_obytes += next->m_pkthdr.len;
1032                 if (next->m_flags & M_MCAST)
1033                         ifp->if_omcasts++;
1034                 ETHER_BPF_MTAP(ifp, next);
1035                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1036                         break;
1037         }
1038         if (enq > 0) {
1039                 /* Set the watchdog */
1040                 txr->queue_status |= IGB_QUEUE_WORKING;
1041                 txr->watchdog_time = ticks;
1042         }
1043         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1044                 igb_txeof(txr);
1045         if (txr->tx_avail <= IGB_MAX_SCATTER)
1046                 txr->queue_status |= IGB_QUEUE_DEPLETED;
1047         return (err);
1048 }
1049
1050 /*
1051  * Called from a taskqueue to drain queued transmit packets.
1052  */
1053 static void
1054 igb_deferred_mq_start(void *arg, int pending)
1055 {
1056         struct tx_ring *txr = arg;
1057         struct adapter *adapter = txr->adapter;
1058         struct ifnet *ifp = adapter->ifp;
1059
1060         IGB_TX_LOCK(txr);
1061         if (!drbr_empty(ifp, txr->br))
1062                 igb_mq_start_locked(ifp, txr);
1063         IGB_TX_UNLOCK(txr);
1064 }
1065
1066 /*
1067 ** Flush all ring buffers
1068 */
1069 static void
1070 igb_qflush(struct ifnet *ifp)
1071 {
1072         struct adapter  *adapter = ifp->if_softc;
1073         struct tx_ring  *txr = adapter->tx_rings;
1074         struct mbuf     *m;
1075
1076         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1077                 IGB_TX_LOCK(txr);
1078                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1079                         m_freem(m);
1080                 IGB_TX_UNLOCK(txr);
1081         }
1082         if_qflush(ifp);
1083 }
1084 #endif /* ~IGB_LEGACY_TX */
1085
1086 /*********************************************************************
1087  *  Ioctl entry point
1088  *
1089  *  igb_ioctl is called when the user wants to configure the
1090  *  interface.
1091  *
1092  *  return 0 on success, positive on failure
1093  **********************************************************************/
1094
1095 static int
1096 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1097 {
1098         struct adapter  *adapter = ifp->if_softc;
1099         struct ifreq    *ifr = (struct ifreq *)data;
1100 #if defined(INET) || defined(INET6)
1101         struct ifaddr   *ifa = (struct ifaddr *)data;
1102 #endif
1103         bool            avoid_reset = FALSE;
1104         int             error = 0;
1105
1106         if (adapter->in_detach)
1107                 return (error);
1108
1109         switch (command) {
1110         case SIOCSIFADDR:
1111 #ifdef INET
1112                 if (ifa->ifa_addr->sa_family == AF_INET)
1113                         avoid_reset = TRUE;
1114 #endif
1115 #ifdef INET6
1116                 if (ifa->ifa_addr->sa_family == AF_INET6)
1117                         avoid_reset = TRUE;
1118 #endif
1119                 /*
1120                 ** Calling init results in link renegotiation,
1121                 ** so we avoid doing it when possible.
1122                 */
1123                 if (avoid_reset) {
1124                         ifp->if_flags |= IFF_UP;
1125                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1126                                 igb_init(adapter);
1127 #ifdef INET
1128                         if (!(ifp->if_flags & IFF_NOARP))
1129                                 arp_ifinit(ifp, ifa);
1130 #endif
1131                 } else
1132                         error = ether_ioctl(ifp, command, data);
1133                 break;
1134         case SIOCSIFMTU:
1135             {
1136                 int max_frame_size;
1137
1138                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1139
1140                 IGB_CORE_LOCK(adapter);
1141                 max_frame_size = 9234;
1142                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1143                     ETHER_CRC_LEN) {
1144                         IGB_CORE_UNLOCK(adapter);
1145                         error = EINVAL;
1146                         break;
1147                 }
1148
1149                 ifp->if_mtu = ifr->ifr_mtu;
1150                 adapter->max_frame_size =
1151                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1152                 igb_init_locked(adapter);
1153                 IGB_CORE_UNLOCK(adapter);
1154                 break;
1155             }
1156         case SIOCSIFFLAGS:
1157                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1158                     SIOCSIFFLAGS (Set Interface Flags)");
1159                 IGB_CORE_LOCK(adapter);
1160                 if (ifp->if_flags & IFF_UP) {
1161                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1162                                 if ((ifp->if_flags ^ adapter->if_flags) &
1163                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1164                                         igb_disable_promisc(adapter);
1165                                         igb_set_promisc(adapter);
1166                                 }
1167                         } else
1168                                 igb_init_locked(adapter);
1169                 } else
1170                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1171                                 igb_stop(adapter);
1172                 adapter->if_flags = ifp->if_flags;
1173                 IGB_CORE_UNLOCK(adapter);
1174                 break;
1175         case SIOCADDMULTI:
1176         case SIOCDELMULTI:
1177                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1178                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1179                         IGB_CORE_LOCK(adapter);
1180                         igb_disable_intr(adapter);
1181                         igb_set_multi(adapter);
1182 #ifdef DEVICE_POLLING
1183                         if (!(ifp->if_capenable & IFCAP_POLLING))
1184 #endif
1185                                 igb_enable_intr(adapter);
1186                         IGB_CORE_UNLOCK(adapter);
1187                 }
1188                 break;
1189         case SIOCSIFMEDIA:
1190                 /* Check SOL/IDER usage */
1191                 IGB_CORE_LOCK(adapter);
1192                 if (e1000_check_reset_block(&adapter->hw)) {
1193                         IGB_CORE_UNLOCK(adapter);
1194                         device_printf(adapter->dev, "Media change is"
1195                             " blocked due to SOL/IDER session.\n");
1196                         break;
1197                 }
1198                 IGB_CORE_UNLOCK(adapter);
1199         case SIOCGIFMEDIA:
1200                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1201                     SIOCxIFMEDIA (Get/Set Interface Media)");
1202                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1203                 break;
1204         case SIOCSIFCAP:
1205             {
1206                 int mask, reinit;
1207
1208                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1209                 reinit = 0;
1210                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1211 #ifdef DEVICE_POLLING
1212                 if (mask & IFCAP_POLLING) {
1213                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1214                                 error = ether_poll_register(igb_poll, ifp);
1215                                 if (error)
1216                                         return (error);
1217                                 IGB_CORE_LOCK(adapter);
1218                                 igb_disable_intr(adapter);
1219                                 ifp->if_capenable |= IFCAP_POLLING;
1220                                 IGB_CORE_UNLOCK(adapter);
1221                         } else {
1222                                 error = ether_poll_deregister(ifp);
1223                                 /* Enable interrupt even in error case */
1224                                 IGB_CORE_LOCK(adapter);
1225                                 igb_enable_intr(adapter);
1226                                 ifp->if_capenable &= ~IFCAP_POLLING;
1227                                 IGB_CORE_UNLOCK(adapter);
1228                         }
1229                 }
1230 #endif
1231                 if (mask & IFCAP_HWCSUM) {
1232                         ifp->if_capenable ^= IFCAP_HWCSUM;
1233                         reinit = 1;
1234                 }
1235                 if (mask & IFCAP_TSO4) {
1236                         ifp->if_capenable ^= IFCAP_TSO4;
1237                         reinit = 1;
1238                 }
1239                 if (mask & IFCAP_TSO6) {
1240                         ifp->if_capenable ^= IFCAP_TSO6;
1241                         reinit = 1;
1242                 }
1243                 if (mask & IFCAP_VLAN_HWTAGGING) {
1244                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1245                         reinit = 1;
1246                 }
1247                 if (mask & IFCAP_VLAN_HWFILTER) {
1248                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1249                         reinit = 1;
1250                 }
1251                 if (mask & IFCAP_VLAN_HWTSO) {
1252                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1253                         reinit = 1;
1254                 }
1255                 if (mask & IFCAP_LRO) {
1256                         ifp->if_capenable ^= IFCAP_LRO;
1257                         reinit = 1;
1258                 }
1259                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1260                         igb_init(adapter);
1261                 VLAN_CAPABILITIES(ifp);
1262                 break;
1263             }
1264
1265         default:
1266                 error = ether_ioctl(ifp, command, data);
1267                 break;
1268         }
1269
1270         return (error);
1271 }
1272
1273
1274 /*********************************************************************
1275  *  Init entry point
1276  *
1277  *  This routine is used in two ways. It is used by the stack as
1278  *  init entry point in network interface structure. It is also used
1279  *  by the driver as a hw/sw initialization routine to get to a
1280  *  consistent state.
1281  *
1282  *  return 0 on success, positive on failure
1283  **********************************************************************/
1284
1285 static void
1286 igb_init_locked(struct adapter *adapter)
1287 {
1288         struct ifnet    *ifp = adapter->ifp;
1289         device_t        dev = adapter->dev;
1290
1291         INIT_DEBUGOUT("igb_init: begin");
1292
1293         IGB_CORE_LOCK_ASSERT(adapter);
1294
1295         igb_disable_intr(adapter);
1296         callout_stop(&adapter->timer);
1297
1298         /* Get the latest mac address, User can use a LAA */
1299         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1300               ETHER_ADDR_LEN);
1301
1302         /* Put the address into the Receive Address Array */
1303         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1304
1305         igb_reset(adapter);
1306         igb_update_link_status(adapter);
1307
1308         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1309
1310         /* Set hardware offload abilities */
1311         ifp->if_hwassist = 0;
1312         if (ifp->if_capenable & IFCAP_TXCSUM) {
1313                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1314 #if __FreeBSD_version >= 800000
1315                 if (adapter->hw.mac.type == e1000_82576)
1316                         ifp->if_hwassist |= CSUM_SCTP;
1317 #endif
1318         }
1319
1320         if (ifp->if_capenable & IFCAP_TSO)
1321                 ifp->if_hwassist |= CSUM_TSO;
1322
1323         /* Configure for OS presence */
1324         igb_init_manageability(adapter);
1325
1326         /* Prepare transmit descriptors and buffers */
1327         igb_setup_transmit_structures(adapter);
1328         igb_initialize_transmit_units(adapter);
1329
1330         /* Setup Multicast table */
1331         igb_set_multi(adapter);
1332
1333         /*
1334         ** Figure out the desired mbuf pool
1335         ** for doing jumbo/packetsplit
1336         */
1337         if (adapter->max_frame_size <= 2048)
1338                 adapter->rx_mbuf_sz = MCLBYTES;
1339         else if (adapter->max_frame_size <= 4096)
1340                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1341         else
1342                 adapter->rx_mbuf_sz = MJUM9BYTES;
1343
1344         /* Prepare receive descriptors and buffers */
1345         if (igb_setup_receive_structures(adapter)) {
1346                 device_printf(dev, "Could not setup receive structures\n");
1347                 return;
1348         }
1349         igb_initialize_receive_units(adapter);
1350
1351         /* Enable VLAN support */
1352         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1353                 igb_setup_vlan_hw_support(adapter);
1354                                 
1355         /* Don't lose promiscuous settings */
1356         igb_set_promisc(adapter);
1357
1358         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1359         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1360
1361         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1362         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1363
1364         if (adapter->msix > 1) /* Set up queue routing */
1365                 igb_configure_queues(adapter);
1366
1367         /* this clears any pending interrupts */
1368         E1000_READ_REG(&adapter->hw, E1000_ICR);
1369 #ifdef DEVICE_POLLING
1370         /*
1371          * Only enable interrupts if we are not polling, make sure
1372          * they are off otherwise.
1373          */
1374         if (ifp->if_capenable & IFCAP_POLLING)
1375                 igb_disable_intr(adapter);
1376         else
1377 #endif /* DEVICE_POLLING */
1378         {
1379                 igb_enable_intr(adapter);
1380                 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1381         }
1382
1383         /* Set Energy Efficient Ethernet */
1384         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1385                 if (adapter->hw.mac.type == e1000_i354)
1386                         e1000_set_eee_i354(&adapter->hw);
1387                 else
1388                         e1000_set_eee_i350(&adapter->hw);
1389         }
1390 }
1391
1392 static void
1393 igb_init(void *arg)
1394 {
1395         struct adapter *adapter = arg;
1396
1397         IGB_CORE_LOCK(adapter);
1398         igb_init_locked(adapter);
1399         IGB_CORE_UNLOCK(adapter);
1400 }
1401
1402
1403 static void
1404 igb_handle_que(void *context, int pending)
1405 {
1406         struct igb_queue *que = context;
1407         struct adapter *adapter = que->adapter;
1408         struct tx_ring *txr = que->txr;
1409         struct ifnet    *ifp = adapter->ifp;
1410
1411         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1412                 bool    more;
1413
1414                 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1415
1416                 IGB_TX_LOCK(txr);
1417                 igb_txeof(txr);
1418 #ifndef IGB_LEGACY_TX
1419                 /* Process the stack queue only if not depleted */
1420                 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1421                     !drbr_empty(ifp, txr->br))
1422                         igb_mq_start_locked(ifp, txr);
1423 #else
1424                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1425                         igb_start_locked(txr, ifp);
1426 #endif
1427                 IGB_TX_UNLOCK(txr);
1428                 /* Do we need another? */
1429                 if (more) {
1430                         taskqueue_enqueue(que->tq, &que->que_task);
1431                         return;
1432                 }
1433         }
1434
1435 #ifdef DEVICE_POLLING
1436         if (ifp->if_capenable & IFCAP_POLLING)
1437                 return;
1438 #endif
1439         /* Reenable this interrupt */
1440         if (que->eims)
1441                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1442         else
1443                 igb_enable_intr(adapter);
1444 }
1445
1446 /* Deal with link in a sleepable context */
1447 static void
1448 igb_handle_link(void *context, int pending)
1449 {
1450         struct adapter *adapter = context;
1451
1452         IGB_CORE_LOCK(adapter);
1453         igb_handle_link_locked(adapter);
1454         IGB_CORE_UNLOCK(adapter);
1455 }
1456
1457 static void
1458 igb_handle_link_locked(struct adapter *adapter)
1459 {
1460         struct tx_ring  *txr = adapter->tx_rings;
1461         struct ifnet *ifp = adapter->ifp;
1462
1463         IGB_CORE_LOCK_ASSERT(adapter);
1464         adapter->hw.mac.get_link_status = 1;
1465         igb_update_link_status(adapter);
1466         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1467                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1468                         IGB_TX_LOCK(txr);
1469 #ifndef IGB_LEGACY_TX
1470                         /* Process the stack queue only if not depleted */
1471                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1472                             !drbr_empty(ifp, txr->br))
1473                                 igb_mq_start_locked(ifp, txr);
1474 #else
1475                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1476                                 igb_start_locked(txr, ifp);
1477 #endif
1478                         IGB_TX_UNLOCK(txr);
1479                 }
1480         }
1481 }
1482
1483 /*********************************************************************
1484  *
1485  *  MSI/Legacy Deferred
1486  *  Interrupt Service routine  
1487  *
1488  *********************************************************************/
1489 static int
1490 igb_irq_fast(void *arg)
1491 {
1492         struct adapter          *adapter = arg;
1493         struct igb_queue        *que = adapter->queues;
1494         u32                     reg_icr;
1495
1496
1497         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1498
1499         /* Hot eject?  */
1500         if (reg_icr == 0xffffffff)
1501                 return FILTER_STRAY;
1502
1503         /* Definitely not our interrupt.  */
1504         if (reg_icr == 0x0)
1505                 return FILTER_STRAY;
1506
1507         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1508                 return FILTER_STRAY;
1509
1510         /*
1511          * Mask interrupts until the taskqueue is finished running.  This is
1512          * cheap, just assume that it is needed.  This also works around the
1513          * MSI message reordering errata on certain systems.
1514          */
1515         igb_disable_intr(adapter);
1516         taskqueue_enqueue(que->tq, &que->que_task);
1517
1518         /* Link status change */
1519         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1520                 taskqueue_enqueue(que->tq, &adapter->link_task);
1521
1522         if (reg_icr & E1000_ICR_RXO)
1523                 adapter->rx_overruns++;
1524         return FILTER_HANDLED;
1525 }
1526
1527 #ifdef DEVICE_POLLING
1528 #if __FreeBSD_version >= 800000
1529 #define POLL_RETURN_COUNT(a) (a)
1530 static int
1531 #else
1532 #define POLL_RETURN_COUNT(a)
1533 static void
1534 #endif
1535 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1536 {
1537         struct adapter          *adapter = ifp->if_softc;
1538         struct igb_queue        *que;
1539         struct tx_ring          *txr;
1540         u32                     reg_icr, rx_done = 0;
1541         u32                     loop = IGB_MAX_LOOP;
1542         bool                    more;
1543
1544         IGB_CORE_LOCK(adapter);
1545         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1546                 IGB_CORE_UNLOCK(adapter);
1547                 return POLL_RETURN_COUNT(rx_done);
1548         }
1549
1550         if (cmd == POLL_AND_CHECK_STATUS) {
1551                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1552                 /* Link status change */
1553                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1554                         igb_handle_link_locked(adapter);
1555
1556                 if (reg_icr & E1000_ICR_RXO)
1557                         adapter->rx_overruns++;
1558         }
1559         IGB_CORE_UNLOCK(adapter);
1560
1561         for (int i = 0; i < adapter->num_queues; i++) {
1562                 que = &adapter->queues[i];
1563                 txr = que->txr;
1564
1565                 igb_rxeof(que, count, &rx_done);
1566
1567                 IGB_TX_LOCK(txr);
1568                 do {
1569                         more = igb_txeof(txr);
1570                 } while (loop-- && more);
1571 #ifndef IGB_LEGACY_TX
1572                 if (!drbr_empty(ifp, txr->br))
1573                         igb_mq_start_locked(ifp, txr);
1574 #else
1575                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1576                         igb_start_locked(txr, ifp);
1577 #endif
1578                 IGB_TX_UNLOCK(txr);
1579         }
1580
1581         return POLL_RETURN_COUNT(rx_done);
1582 }
1583 #endif /* DEVICE_POLLING */
1584
1585 /*********************************************************************
1586  *
1587  *  MSIX Que Interrupt Service routine
1588  *
1589  **********************************************************************/
1590 static void
1591 igb_msix_que(void *arg)
1592 {
1593         struct igb_queue *que = arg;
1594         struct adapter *adapter = que->adapter;
1595         struct ifnet   *ifp = adapter->ifp;
1596         struct tx_ring *txr = que->txr;
1597         struct rx_ring *rxr = que->rxr;
1598         u32             newitr = 0;
1599         bool            more_rx;
1600
1601         /* Ignore spurious interrupts */
1602         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1603                 return;
1604
1605         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1606         ++que->irqs;
1607
1608         IGB_TX_LOCK(txr);
1609         igb_txeof(txr);
1610 #ifndef IGB_LEGACY_TX
1611         /* Process the stack queue only if not depleted */
1612         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1613             !drbr_empty(ifp, txr->br))
1614                 igb_mq_start_locked(ifp, txr);
1615 #else
1616         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1617                 igb_start_locked(txr, ifp);
1618 #endif
1619         IGB_TX_UNLOCK(txr);
1620
1621         more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1622
1623         if (adapter->enable_aim == FALSE)
1624                 goto no_calc;
1625         /*
1626         ** Do Adaptive Interrupt Moderation:
1627         **  - Write out last calculated setting
1628         **  - Calculate based on average size over
1629         **    the last interval.
1630         */
1631         if (que->eitr_setting)
1632                 E1000_WRITE_REG(&adapter->hw,
1633                     E1000_EITR(que->msix), que->eitr_setting);
1634  
1635         que->eitr_setting = 0;
1636
1637         /* Idle, do nothing */
1638         if ((txr->bytes == 0) && (rxr->bytes == 0))
1639                 goto no_calc;
1640                                 
1641         /* Used half Default if sub-gig */
1642         if (adapter->link_speed != 1000)
1643                 newitr = IGB_DEFAULT_ITR / 2;
1644         else {
1645                 if ((txr->bytes) && (txr->packets))
1646                         newitr = txr->bytes/txr->packets;
1647                 if ((rxr->bytes) && (rxr->packets))
1648                         newitr = max(newitr,
1649                             (rxr->bytes / rxr->packets));
1650                 newitr += 24; /* account for hardware frame, crc */
1651                 /* set an upper boundary */
1652                 newitr = min(newitr, 3000);
1653                 /* Be nice to the mid range */
1654                 if ((newitr > 300) && (newitr < 1200))
1655                         newitr = (newitr / 3);
1656                 else
1657                         newitr = (newitr / 2);
1658         }
1659         newitr &= 0x7FFC;  /* Mask invalid bits */
1660         if (adapter->hw.mac.type == e1000_82575)
1661                 newitr |= newitr << 16;
1662         else
1663                 newitr |= E1000_EITR_CNT_IGNR;
1664                  
1665         /* save for next interrupt */
1666         que->eitr_setting = newitr;
1667
1668         /* Reset state */
1669         txr->bytes = 0;
1670         txr->packets = 0;
1671         rxr->bytes = 0;
1672         rxr->packets = 0;
1673
1674 no_calc:
1675         /* Schedule a clean task if needed*/
1676         if (more_rx)
1677                 taskqueue_enqueue(que->tq, &que->que_task);
1678         else
1679                 /* Reenable this interrupt */
1680                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1681         return;
1682 }
1683
1684
1685 /*********************************************************************
1686  *
1687  *  MSIX Link Interrupt Service routine
1688  *
1689  **********************************************************************/
1690
1691 static void
1692 igb_msix_link(void *arg)
1693 {
1694         struct adapter  *adapter = arg;
1695         u32             icr;
1696
1697         ++adapter->link_irq;
1698         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1699         if (!(icr & E1000_ICR_LSC))
1700                 goto spurious;
1701         igb_handle_link(adapter, 0);
1702
1703 spurious:
1704         /* Rearm */
1705         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1706         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1707         return;
1708 }
1709
1710
1711 /*********************************************************************
1712  *
1713  *  Media Ioctl callback
1714  *
1715  *  This routine is called whenever the user queries the status of
1716  *  the interface using ifconfig.
1717  *
1718  **********************************************************************/
1719 static void
1720 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1721 {
1722         struct adapter *adapter = ifp->if_softc;
1723
1724         INIT_DEBUGOUT("igb_media_status: begin");
1725
1726         IGB_CORE_LOCK(adapter);
1727         igb_update_link_status(adapter);
1728
1729         ifmr->ifm_status = IFM_AVALID;
1730         ifmr->ifm_active = IFM_ETHER;
1731
1732         if (!adapter->link_active) {
1733                 IGB_CORE_UNLOCK(adapter);
1734                 return;
1735         }
1736
1737         ifmr->ifm_status |= IFM_ACTIVE;
1738
1739         switch (adapter->link_speed) {
1740         case 10:
1741                 ifmr->ifm_active |= IFM_10_T;
1742                 break;
1743         case 100:
1744                 /*
1745                 ** Support for 100Mb SFP - these are Fiber 
1746                 ** but the media type appears as serdes
1747                 */
1748                 if (adapter->hw.phy.media_type ==
1749                     e1000_media_type_internal_serdes)
1750                         ifmr->ifm_active |= IFM_100_FX;
1751                 else
1752                         ifmr->ifm_active |= IFM_100_TX;
1753                 break;
1754         case 1000:
1755                 ifmr->ifm_active |= IFM_1000_T;
1756                 break;
1757         case 2500:
1758                 ifmr->ifm_active |= IFM_2500_SX;
1759                 break;
1760         }
1761
1762         if (adapter->link_duplex == FULL_DUPLEX)
1763                 ifmr->ifm_active |= IFM_FDX;
1764         else
1765                 ifmr->ifm_active |= IFM_HDX;
1766
1767         IGB_CORE_UNLOCK(adapter);
1768 }
1769
1770 /*********************************************************************
1771  *
1772  *  Media Ioctl callback
1773  *
1774  *  This routine is called when the user changes speed/duplex using
1775  *  media/mediopt option with ifconfig.
1776  *
1777  **********************************************************************/
1778 static int
1779 igb_media_change(struct ifnet *ifp)
1780 {
1781         struct adapter *adapter = ifp->if_softc;
1782         struct ifmedia  *ifm = &adapter->media;
1783
1784         INIT_DEBUGOUT("igb_media_change: begin");
1785
1786         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1787                 return (EINVAL);
1788
1789         IGB_CORE_LOCK(adapter);
1790         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1791         case IFM_AUTO:
1792                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1793                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1794                 break;
1795         case IFM_1000_LX:
1796         case IFM_1000_SX:
1797         case IFM_1000_T:
1798                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1799                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1800                 break;
1801         case IFM_100_TX:
1802                 adapter->hw.mac.autoneg = FALSE;
1803                 adapter->hw.phy.autoneg_advertised = 0;
1804                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1805                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1806                 else
1807                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1808                 break;
1809         case IFM_10_T:
1810                 adapter->hw.mac.autoneg = FALSE;
1811                 adapter->hw.phy.autoneg_advertised = 0;
1812                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1813                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1814                 else
1815                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1816                 break;
1817         default:
1818                 device_printf(adapter->dev, "Unsupported media type\n");
1819         }
1820
1821         igb_init_locked(adapter);
1822         IGB_CORE_UNLOCK(adapter);
1823
1824         return (0);
1825 }
1826
1827
1828 /*********************************************************************
1829  *
1830  *  This routine maps the mbufs to Advanced TX descriptors.
1831  *  
1832  **********************************************************************/
1833 static int
1834 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1835 {
1836         struct adapter  *adapter = txr->adapter;
1837         u32             olinfo_status = 0, cmd_type_len;
1838         int             i, j, error, nsegs;
1839         int             first;
1840         bool            remap = TRUE;
1841         struct mbuf     *m_head;
1842         bus_dma_segment_t segs[IGB_MAX_SCATTER];
1843         bus_dmamap_t    map;
1844         struct igb_tx_buf *txbuf;
1845         union e1000_adv_tx_desc *txd = NULL;
1846
1847         m_head = *m_headp;
1848
1849         /* Basic descriptor defines */
1850         cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1851             E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1852
1853         if (m_head->m_flags & M_VLANTAG)
1854                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1855
1856         /*
1857          * Important to capture the first descriptor
1858          * used because it will contain the index of
1859          * the one we tell the hardware to report back
1860          */
1861         first = txr->next_avail_desc;
1862         txbuf = &txr->tx_buffers[first];
1863         map = txbuf->map;
1864
1865         /*
1866          * Map the packet for DMA.
1867          */
1868 retry:
1869         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1870             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1871
1872         if (__predict_false(error)) {
1873                 struct mbuf *m;
1874
1875                 switch (error) {
1876                 case EFBIG:
1877                         /* Try it again? - one try */
1878                         if (remap == TRUE) {
1879                                 remap = FALSE;
1880                                 m = m_defrag(*m_headp, M_NOWAIT);
1881                                 if (m == NULL) {
1882                                         adapter->mbuf_defrag_failed++;
1883                                         m_freem(*m_headp);
1884                                         *m_headp = NULL;
1885                                         return (ENOBUFS);
1886                                 }
1887                                 *m_headp = m;
1888                                 goto retry;
1889                         } else
1890                                 return (error);
1891                 case ENOMEM:
1892                         txr->no_tx_dma_setup++;
1893                         return (error);
1894                 default:
1895                         txr->no_tx_dma_setup++;
1896                         m_freem(*m_headp);
1897                         *m_headp = NULL;
1898                         return (error);
1899                 }
1900         }
1901
1902         /* Make certain there are enough descriptors */
1903         if (nsegs > txr->tx_avail - 2) {
1904                 txr->no_desc_avail++;
1905                 bus_dmamap_unload(txr->txtag, map);
1906                 return (ENOBUFS);
1907         }
1908         m_head = *m_headp;
1909
1910         /*
1911         ** Set up the appropriate offload context
1912         ** this will consume the first descriptor
1913         */
1914         error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1915         if (__predict_false(error)) {
1916                 m_freem(*m_headp);
1917                 *m_headp = NULL;
1918                 return (error);
1919         }
1920
1921         /* 82575 needs the queue index added */
1922         if (adapter->hw.mac.type == e1000_82575)
1923                 olinfo_status |= txr->me << 4;
1924
1925         i = txr->next_avail_desc;
1926         for (j = 0; j < nsegs; j++) {
1927                 bus_size_t seglen;
1928                 bus_addr_t segaddr;
1929
1930                 txbuf = &txr->tx_buffers[i];
1931                 txd = &txr->tx_base[i];
1932                 seglen = segs[j].ds_len;
1933                 segaddr = htole64(segs[j].ds_addr);
1934
1935                 txd->read.buffer_addr = segaddr;
1936                 txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1937                     cmd_type_len | seglen);
1938                 txd->read.olinfo_status = htole32(olinfo_status);
1939
1940                 if (++i == txr->num_desc)
1941                         i = 0;
1942         }
1943
1944         txd->read.cmd_type_len |=
1945             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1946         txr->tx_avail -= nsegs;
1947         txr->next_avail_desc = i;
1948
1949         txbuf->m_head = m_head;
1950         /*
1951         ** Here we swap the map so the last descriptor,
1952         ** which gets the completion interrupt has the
1953         ** real map, and the first descriptor gets the
1954         ** unused map from this descriptor.
1955         */
1956         txr->tx_buffers[first].map = txbuf->map;
1957         txbuf->map = map;
1958         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1959
1960         /* Set the EOP descriptor that will be marked done */
1961         txbuf = &txr->tx_buffers[first];
1962         txbuf->eop = txd;
1963
1964         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1965             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1966         /*
1967          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1968          * hardware that this frame is available to transmit.
1969          */
1970         ++txr->total_packets;
1971         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1972
1973         return (0);
1974 }
1975 static void
1976 igb_set_promisc(struct adapter *adapter)
1977 {
1978         struct ifnet    *ifp = adapter->ifp;
1979         struct e1000_hw *hw = &adapter->hw;
1980         u32             reg;
1981
1982         if (adapter->vf_ifp) {
1983                 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1984                 return;
1985         }
1986
1987         reg = E1000_READ_REG(hw, E1000_RCTL);
1988         if (ifp->if_flags & IFF_PROMISC) {
1989                 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1990                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1991         } else if (ifp->if_flags & IFF_ALLMULTI) {
1992                 reg |= E1000_RCTL_MPE;
1993                 reg &= ~E1000_RCTL_UPE;
1994                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1995         }
1996 }
1997
1998 static void
1999 igb_disable_promisc(struct adapter *adapter)
2000 {
2001         struct e1000_hw *hw = &adapter->hw;
2002         struct ifnet    *ifp = adapter->ifp;
2003         u32             reg;
2004         int             mcnt = 0;
2005
2006         if (adapter->vf_ifp) {
2007                 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2008                 return;
2009         }
2010         reg = E1000_READ_REG(hw, E1000_RCTL);
2011         reg &=  (~E1000_RCTL_UPE);
2012         if (ifp->if_flags & IFF_ALLMULTI)
2013                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2014         else {
2015                 struct  ifmultiaddr *ifma;
2016 #if __FreeBSD_version < 800000
2017                 IF_ADDR_LOCK(ifp);
2018 #else   
2019                 if_maddr_rlock(ifp);
2020 #endif
2021                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2022                         if (ifma->ifma_addr->sa_family != AF_LINK)
2023                                 continue;
2024                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2025                                 break;
2026                         mcnt++;
2027                 }
2028 #if __FreeBSD_version < 800000
2029                 IF_ADDR_UNLOCK(ifp);
2030 #else
2031                 if_maddr_runlock(ifp);
2032 #endif
2033         }
2034         /* Don't disable if in MAX groups */
2035         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2036                 reg &=  (~E1000_RCTL_MPE);
2037         E1000_WRITE_REG(hw, E1000_RCTL, reg);
2038 }
2039
2040
2041 /*********************************************************************
2042  *  Multicast Update
2043  *
2044  *  This routine is called whenever multicast address list is updated.
2045  *
2046  **********************************************************************/
2047
2048 static void
2049 igb_set_multi(struct adapter *adapter)
2050 {
2051         struct ifnet    *ifp = adapter->ifp;
2052         struct ifmultiaddr *ifma;
2053         u32 reg_rctl = 0;
2054         u8  *mta;
2055
2056         int mcnt = 0;
2057
2058         IOCTL_DEBUGOUT("igb_set_multi: begin");
2059
2060         mta = adapter->mta;
2061         bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2062             MAX_NUM_MULTICAST_ADDRESSES);
2063
2064 #if __FreeBSD_version < 800000
2065         IF_ADDR_LOCK(ifp);
2066 #else
2067         if_maddr_rlock(ifp);
2068 #endif
2069         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2070                 if (ifma->ifma_addr->sa_family != AF_LINK)
2071                         continue;
2072
2073                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2074                         break;
2075
2076                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2077                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2078                 mcnt++;
2079         }
2080 #if __FreeBSD_version < 800000
2081         IF_ADDR_UNLOCK(ifp);
2082 #else
2083         if_maddr_runlock(ifp);
2084 #endif
2085
2086         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2087                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2088                 reg_rctl |= E1000_RCTL_MPE;
2089                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2090         } else
2091                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2092 }
2093
2094
2095 /*********************************************************************
2096  *  Timer routine:
2097  *      This routine checks for link status,
2098  *      updates statistics, and does the watchdog.
2099  *
2100  **********************************************************************/
2101
2102 static void
2103 igb_local_timer(void *arg)
2104 {
2105         struct adapter          *adapter = arg;
2106         device_t                dev = adapter->dev;
2107         struct ifnet            *ifp = adapter->ifp;
2108         struct tx_ring          *txr = adapter->tx_rings;
2109         struct igb_queue        *que = adapter->queues;
2110         int                     hung = 0, busy = 0;
2111
2112
2113         IGB_CORE_LOCK_ASSERT(adapter);
2114
2115         igb_update_link_status(adapter);
2116         igb_update_stats_counters(adapter);
2117
2118         /*
2119         ** Check the TX queues status
2120         **      - central locked handling of OACTIVE
2121         **      - watchdog only if all queues show hung
2122         */
2123         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2124                 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2125                     (adapter->pause_frames == 0))
2126                         ++hung;
2127                 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2128                         ++busy;
2129                 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2130                         taskqueue_enqueue(que->tq, &que->que_task);
2131         }
2132         if (hung == adapter->num_queues)
2133                 goto timeout;
2134         if (busy == adapter->num_queues)
2135                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2136         else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2137             (busy < adapter->num_queues))
2138                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2139
2140         adapter->pause_frames = 0;
2141         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2142 #ifndef DEVICE_POLLING
2143         /* Schedule all queue interrupts - deadlock protection */
2144         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2145 #endif
2146         return;
2147
2148 timeout:
2149         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2150         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2151             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2152             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2153         device_printf(dev,"TX(%d) desc avail = %d,"
2154             "Next TX to Clean = %d\n",
2155             txr->me, txr->tx_avail, txr->next_to_clean);
2156         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2157         adapter->watchdog_events++;
2158         igb_init_locked(adapter);
2159 }
2160
2161 static void
2162 igb_update_link_status(struct adapter *adapter)
2163 {
2164         struct e1000_hw         *hw = &adapter->hw;
2165         struct e1000_fc_info    *fc = &hw->fc;
2166         struct ifnet            *ifp = adapter->ifp;
2167         device_t                dev = adapter->dev;
2168         struct tx_ring          *txr = adapter->tx_rings;
2169         u32                     link_check, thstat, ctrl;
2170         char                    *flowctl = NULL;
2171
2172         link_check = thstat = ctrl = 0;
2173
2174         /* Get the cached link value or read for real */
2175         switch (hw->phy.media_type) {
2176         case e1000_media_type_copper:
2177                 if (hw->mac.get_link_status) {
2178                         /* Do the work to read phy */
2179                         e1000_check_for_link(hw);
2180                         link_check = !hw->mac.get_link_status;
2181                 } else
2182                         link_check = TRUE;
2183                 break;
2184         case e1000_media_type_fiber:
2185                 e1000_check_for_link(hw);
2186                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2187                                  E1000_STATUS_LU);
2188                 break;
2189         case e1000_media_type_internal_serdes:
2190                 e1000_check_for_link(hw);
2191                 link_check = adapter->hw.mac.serdes_has_link;
2192                 break;
2193         /* VF device is type_unknown */
2194         case e1000_media_type_unknown:
2195                 e1000_check_for_link(hw);
2196                 link_check = !hw->mac.get_link_status;
2197                 /* Fall thru */
2198         default:
2199                 break;
2200         }
2201
2202         /* Check for thermal downshift or shutdown */
2203         if (hw->mac.type == e1000_i350) {
2204                 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2205                 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2206         }
2207
2208         /* Get the flow control for display */
2209         switch (fc->current_mode) {
2210         case e1000_fc_rx_pause:
2211                 flowctl = "RX";
2212                 break;  
2213         case e1000_fc_tx_pause:
2214                 flowctl = "TX";
2215                 break;  
2216         case e1000_fc_full:
2217                 flowctl = "Full";
2218                 break;  
2219         case e1000_fc_none:
2220         default:
2221                 flowctl = "None";
2222                 break;  
2223         }
2224
2225         /* Now we check if a transition has happened */
2226         if (link_check && (adapter->link_active == 0)) {
2227                 e1000_get_speed_and_duplex(&adapter->hw, 
2228                     &adapter->link_speed, &adapter->link_duplex);
2229                 if (bootverbose)
2230                         device_printf(dev, "Link is up %d Mbps %s,"
2231                             " Flow Control: %s\n",
2232                             adapter->link_speed,
2233                             ((adapter->link_duplex == FULL_DUPLEX) ?
2234                             "Full Duplex" : "Half Duplex"), flowctl);
2235                 adapter->link_active = 1;
2236                 ifp->if_baudrate = adapter->link_speed * 1000000;
2237                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2238                     (thstat & E1000_THSTAT_LINK_THROTTLE))
2239                         device_printf(dev, "Link: thermal downshift\n");
2240                 /* Delay Link Up for Phy update */
2241                 if (((hw->mac.type == e1000_i210) ||
2242                     (hw->mac.type == e1000_i211)) &&
2243                     (hw->phy.id == I210_I_PHY_ID))
2244                         msec_delay(I210_LINK_DELAY);
2245                 /* Reset if the media type changed. */
2246                 if (hw->dev_spec._82575.media_changed) {
2247                         hw->dev_spec._82575.media_changed = false;
2248                         adapter->flags |= IGB_MEDIA_RESET;
2249                         igb_reset(adapter);
2250                 }       
2251                 /* This can sleep */
2252                 if_link_state_change(ifp, LINK_STATE_UP);
2253         } else if (!link_check && (adapter->link_active == 1)) {
2254                 ifp->if_baudrate = adapter->link_speed = 0;
2255                 adapter->link_duplex = 0;
2256                 if (bootverbose)
2257                         device_printf(dev, "Link is Down\n");
2258                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2259                     (thstat & E1000_THSTAT_PWR_DOWN))
2260                         device_printf(dev, "Link: thermal shutdown\n");
2261                 adapter->link_active = 0;
2262                 /* This can sleep */
2263                 if_link_state_change(ifp, LINK_STATE_DOWN);
2264                 /* Reset queue state */
2265                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2266                         txr->queue_status = IGB_QUEUE_IDLE;
2267         }
2268 }
2269
2270 /*********************************************************************
2271  *
2272  *  This routine disables all traffic on the adapter by issuing a
2273  *  global reset on the MAC and deallocates TX/RX buffers.
2274  *
2275  **********************************************************************/
2276
2277 static void
2278 igb_stop(void *arg)
2279 {
2280         struct adapter  *adapter = arg;
2281         struct ifnet    *ifp = adapter->ifp;
2282         struct tx_ring *txr = adapter->tx_rings;
2283
2284         IGB_CORE_LOCK_ASSERT(adapter);
2285
2286         INIT_DEBUGOUT("igb_stop: begin");
2287
2288         igb_disable_intr(adapter);
2289
2290         callout_stop(&adapter->timer);
2291
2292         /* Tell the stack that the interface is no longer active */
2293         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2294         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2295
2296         /* Disarm watchdog timer. */
2297         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2298                 IGB_TX_LOCK(txr);
2299                 txr->queue_status = IGB_QUEUE_IDLE;
2300                 IGB_TX_UNLOCK(txr);
2301         }
2302
2303         e1000_reset_hw(&adapter->hw);
2304         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2305
2306         e1000_led_off(&adapter->hw);
2307         e1000_cleanup_led(&adapter->hw);
2308 }
2309
2310
2311 /*********************************************************************
2312  *
2313  *  Determine hardware revision.
2314  *
2315  **********************************************************************/
2316 static void
2317 igb_identify_hardware(struct adapter *adapter)
2318 {
2319         device_t dev = adapter->dev;
2320
2321         /* Make sure our PCI config space has the necessary stuff set */
2322         pci_enable_busmaster(dev);
2323         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2324
2325         /* Save off the information about this board */
2326         adapter->hw.vendor_id = pci_get_vendor(dev);
2327         adapter->hw.device_id = pci_get_device(dev);
2328         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2329         adapter->hw.subsystem_vendor_id =
2330             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2331         adapter->hw.subsystem_device_id =
2332             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2333
2334         /* Set MAC type early for PCI setup */
2335         e1000_set_mac_type(&adapter->hw);
2336
2337         /* Are we a VF device? */
2338         if ((adapter->hw.mac.type == e1000_vfadapt) ||
2339             (adapter->hw.mac.type == e1000_vfadapt_i350))
2340                 adapter->vf_ifp = 1;
2341         else
2342                 adapter->vf_ifp = 0;
2343 }
2344
2345 static int
2346 igb_allocate_pci_resources(struct adapter *adapter)
2347 {
2348         device_t        dev = adapter->dev;
2349         int             rid;
2350
2351         rid = PCIR_BAR(0);
2352         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2353             &rid, RF_ACTIVE);
2354         if (adapter->pci_mem == NULL) {
2355                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2356                 return (ENXIO);
2357         }
2358         adapter->osdep.mem_bus_space_tag =
2359             rman_get_bustag(adapter->pci_mem);
2360         adapter->osdep.mem_bus_space_handle =
2361             rman_get_bushandle(adapter->pci_mem);
2362         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2363
2364         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2365
2366         /* This will setup either MSI/X or MSI */
2367         adapter->msix = igb_setup_msix(adapter);
2368         adapter->hw.back = &adapter->osdep;
2369
2370         return (0);
2371 }
2372
2373 /*********************************************************************
2374  *
2375  *  Setup the Legacy or MSI Interrupt handler
2376  *
2377  **********************************************************************/
2378 static int
2379 igb_allocate_legacy(struct adapter *adapter)
2380 {
2381         device_t                dev = adapter->dev;
2382         struct igb_queue        *que = adapter->queues;
2383 #ifndef IGB_LEGACY_TX
2384         struct tx_ring          *txr = adapter->tx_rings;
2385 #endif
2386         int                     error, rid = 0;
2387
2388         /* Turn off all interrupts */
2389         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2390
2391         /* MSI RID is 1 */
2392         if (adapter->msix == 1)
2393                 rid = 1;
2394
2395         /* We allocate a single interrupt resource */
2396         adapter->res = bus_alloc_resource_any(dev,
2397             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2398         if (adapter->res == NULL) {
2399                 device_printf(dev, "Unable to allocate bus resource: "
2400                     "interrupt\n");
2401                 return (ENXIO);
2402         }
2403
2404 #ifndef IGB_LEGACY_TX
2405         TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2406 #endif
2407
2408         /*
2409          * Try allocating a fast interrupt and the associated deferred
2410          * processing contexts.
2411          */
2412         TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2413         /* Make tasklet for deferred link handling */
2414         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2415         que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2416             taskqueue_thread_enqueue, &que->tq);
2417         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2418             device_get_nameunit(adapter->dev));
2419         if ((error = bus_setup_intr(dev, adapter->res,
2420             INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2421             adapter, &adapter->tag)) != 0) {
2422                 device_printf(dev, "Failed to register fast interrupt "
2423                             "handler: %d\n", error);
2424                 taskqueue_free(que->tq);
2425                 que->tq = NULL;
2426                 return (error);
2427         }
2428
2429         return (0);
2430 }
2431
2432
2433 /*********************************************************************
2434  *
2435  *  Setup the MSIX Queue Interrupt handlers: 
2436  *
2437  **********************************************************************/
2438 static int
2439 igb_allocate_msix(struct adapter *adapter)
2440 {
2441         device_t                dev = adapter->dev;
2442         struct igb_queue        *que = adapter->queues;
2443         int                     error, rid, vector = 0;
2444
2445         /* Be sure to start with all interrupts disabled */
2446         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2447         E1000_WRITE_FLUSH(&adapter->hw);
2448
2449         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2450                 rid = vector +1;
2451                 que->res = bus_alloc_resource_any(dev,
2452                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2453                 if (que->res == NULL) {
2454                         device_printf(dev,
2455                             "Unable to allocate bus resource: "
2456                             "MSIX Queue Interrupt\n");
2457                         return (ENXIO);
2458                 }
2459                 error = bus_setup_intr(dev, que->res,
2460                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2461                     igb_msix_que, que, &que->tag);
2462                 if (error) {
2463                         que->res = NULL;
2464                         device_printf(dev, "Failed to register Queue handler");
2465                         return (error);
2466                 }
2467 #if __FreeBSD_version >= 800504
2468                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2469 #endif
2470                 que->msix = vector;
2471                 if (adapter->hw.mac.type == e1000_82575)
2472                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2473                 else
2474                         que->eims = 1 << vector;
2475                 /*
2476                 ** Bind the msix vector, and thus the
2477                 ** rings to the corresponding cpu.
2478                 */
2479                 if (adapter->num_queues > 1) {
2480                         if (igb_last_bind_cpu < 0)
2481                                 igb_last_bind_cpu = CPU_FIRST();
2482                         bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2483                         device_printf(dev,
2484                                 "Bound queue %d to cpu %d\n",
2485                                 i,igb_last_bind_cpu);
2486                         igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2487                 }
2488 #ifndef IGB_LEGACY_TX
2489                 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2490                     que->txr);
2491 #endif
2492                 /* Make tasklet for deferred handling */
2493                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2494                 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2495                     taskqueue_thread_enqueue, &que->tq);
2496                 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2497                     device_get_nameunit(adapter->dev));
2498         }
2499
2500         /* And Link */
2501         rid = vector + 1;
2502         adapter->res = bus_alloc_resource_any(dev,
2503             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2504         if (adapter->res == NULL) {
2505                 device_printf(dev,
2506                     "Unable to allocate bus resource: "
2507                     "MSIX Link Interrupt\n");
2508                 return (ENXIO);
2509         }
2510         if ((error = bus_setup_intr(dev, adapter->res,
2511             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2512             igb_msix_link, adapter, &adapter->tag)) != 0) {
2513                 device_printf(dev, "Failed to register Link handler");
2514                 return (error);
2515         }
2516 #if __FreeBSD_version >= 800504
2517         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2518 #endif
2519         adapter->linkvec = vector;
2520
2521         return (0);
2522 }
2523
2524
2525 static void
2526 igb_configure_queues(struct adapter *adapter)
2527 {
2528         struct  e1000_hw        *hw = &adapter->hw;
2529         struct  igb_queue       *que;
2530         u32                     tmp, ivar = 0, newitr = 0;
2531
2532         /* First turn on RSS capability */
2533         if (adapter->hw.mac.type != e1000_82575)
2534                 E1000_WRITE_REG(hw, E1000_GPIE,
2535                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2536                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2537
2538         /* Turn on MSIX */
2539         switch (adapter->hw.mac.type) {
2540         case e1000_82580:
2541         case e1000_i350:
2542         case e1000_i354:
2543         case e1000_i210:
2544         case e1000_i211:
2545         case e1000_vfadapt:
2546         case e1000_vfadapt_i350:
2547                 /* RX entries */
2548                 for (int i = 0; i < adapter->num_queues; i++) {
2549                         u32 index = i >> 1;
2550                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2551                         que = &adapter->queues[i];
2552                         if (i & 1) {
2553                                 ivar &= 0xFF00FFFF;
2554                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2555                         } else {
2556                                 ivar &= 0xFFFFFF00;
2557                                 ivar |= que->msix | E1000_IVAR_VALID;
2558                         }
2559                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2560                 }
2561                 /* TX entries */
2562                 for (int i = 0; i < adapter->num_queues; i++) {
2563                         u32 index = i >> 1;
2564                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2565                         que = &adapter->queues[i];
2566                         if (i & 1) {
2567                                 ivar &= 0x00FFFFFF;
2568                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2569                         } else {
2570                                 ivar &= 0xFFFF00FF;
2571                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2572                         }
2573                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2574                         adapter->que_mask |= que->eims;
2575                 }
2576
2577                 /* And for the link interrupt */
2578                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2579                 adapter->link_mask = 1 << adapter->linkvec;
2580                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2581                 break;
2582         case e1000_82576:
2583                 /* RX entries */
2584                 for (int i = 0; i < adapter->num_queues; i++) {
2585                         u32 index = i & 0x7; /* Each IVAR has two entries */
2586                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2587                         que = &adapter->queues[i];
2588                         if (i < 8) {
2589                                 ivar &= 0xFFFFFF00;
2590                                 ivar |= que->msix | E1000_IVAR_VALID;
2591                         } else {
2592                                 ivar &= 0xFF00FFFF;
2593                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2594                         }
2595                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2596                         adapter->que_mask |= que->eims;
2597                 }
2598                 /* TX entries */
2599                 for (int i = 0; i < adapter->num_queues; i++) {
2600                         u32 index = i & 0x7; /* Each IVAR has two entries */
2601                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2602                         que = &adapter->queues[i];
2603                         if (i < 8) {
2604                                 ivar &= 0xFFFF00FF;
2605                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2606                         } else {
2607                                 ivar &= 0x00FFFFFF;
2608                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2609                         }
2610                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2611                         adapter->que_mask |= que->eims;
2612                 }
2613
2614                 /* And for the link interrupt */
2615                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2616                 adapter->link_mask = 1 << adapter->linkvec;
2617                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2618                 break;
2619
2620         case e1000_82575:
2621                 /* enable MSI-X support*/
2622                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2623                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2624                 /* Auto-Mask interrupts upon ICR read. */
2625                 tmp |= E1000_CTRL_EXT_EIAME;
2626                 tmp |= E1000_CTRL_EXT_IRCA;
2627                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2628
2629                 /* Queues */
2630                 for (int i = 0; i < adapter->num_queues; i++) {
2631                         que = &adapter->queues[i];
2632                         tmp = E1000_EICR_RX_QUEUE0 << i;
2633                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2634                         que->eims = tmp;
2635                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2636                             i, que->eims);
2637                         adapter->que_mask |= que->eims;
2638                 }
2639
2640                 /* Link */
2641                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2642                     E1000_EIMS_OTHER);
2643                 adapter->link_mask |= E1000_EIMS_OTHER;
2644         default:
2645                 break;
2646         }
2647
2648         /* Set the starting interrupt rate */
2649         if (igb_max_interrupt_rate > 0)
2650                 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2651
2652         if (hw->mac.type == e1000_82575)
2653                 newitr |= newitr << 16;
2654         else
2655                 newitr |= E1000_EITR_CNT_IGNR;
2656
2657         for (int i = 0; i < adapter->num_queues; i++) {
2658                 que = &adapter->queues[i];
2659                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2660         }
2661
2662         return;
2663 }
2664
2665
2666 static void
2667 igb_free_pci_resources(struct adapter *adapter)
2668 {
2669         struct          igb_queue *que = adapter->queues;
2670         device_t        dev = adapter->dev;
2671         int             rid;
2672
2673         /*
2674         ** There is a slight possibility of a failure mode
2675         ** in attach that will result in entering this function
2676         ** before interrupt resources have been initialized, and
2677         ** in that case we do not want to execute the loops below
2678         ** We can detect this reliably by the state of the adapter
2679         ** res pointer.
2680         */
2681         if (adapter->res == NULL)
2682                 goto mem;
2683
2684         /*
2685          * First release all the interrupt resources:
2686          */
2687         for (int i = 0; i < adapter->num_queues; i++, que++) {
2688                 rid = que->msix + 1;
2689                 if (que->tag != NULL) {
2690                         bus_teardown_intr(dev, que->res, que->tag);
2691                         que->tag = NULL;
2692                 }
2693                 if (que->res != NULL)
2694                         bus_release_resource(dev,
2695                             SYS_RES_IRQ, rid, que->res);
2696         }
2697
2698         /* Clean the Legacy or Link interrupt last */
2699         if (adapter->linkvec) /* we are doing MSIX */
2700                 rid = adapter->linkvec + 1;
2701         else
2702                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2703
2704         que = adapter->queues;
2705         if (adapter->tag != NULL) {
2706                 taskqueue_drain(que->tq, &adapter->link_task);
2707                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2708                 adapter->tag = NULL;
2709         }
2710         if (adapter->res != NULL)
2711                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2712
2713         for (int i = 0; i < adapter->num_queues; i++, que++) {
2714                 if (que->tq != NULL) {
2715 #ifndef IGB_LEGACY_TX
2716                         taskqueue_drain(que->tq, &que->txr->txq_task);
2717 #endif
2718                         taskqueue_drain(que->tq, &que->que_task);
2719                         taskqueue_free(que->tq);
2720                 }
2721         }
2722 mem:
2723         if (adapter->msix)
2724                 pci_release_msi(dev);
2725
2726         if (adapter->msix_mem != NULL)
2727                 bus_release_resource(dev, SYS_RES_MEMORY,
2728                     adapter->memrid, adapter->msix_mem);
2729
2730         if (adapter->pci_mem != NULL)
2731                 bus_release_resource(dev, SYS_RES_MEMORY,
2732                     PCIR_BAR(0), adapter->pci_mem);
2733
2734 }
2735
2736 /*
2737  * Setup Either MSI/X or MSI
2738  */
2739 static int
2740 igb_setup_msix(struct adapter *adapter)
2741 {
2742         device_t        dev = adapter->dev;
2743         int             bar, want, queues, msgs, maxqueues;
2744
2745         /* tuneable override */
2746         if (igb_enable_msix == 0)
2747                 goto msi;
2748
2749         /* First try MSI/X */
2750         msgs = pci_msix_count(dev); 
2751         if (msgs == 0)
2752                 goto msi;
2753         /*
2754         ** Some new devices, as with ixgbe, now may
2755         ** use a different BAR, so we need to keep
2756         ** track of which is used.
2757         */
2758         adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2759         bar = pci_read_config(dev, adapter->memrid, 4);
2760         if (bar == 0) /* use next bar */
2761                 adapter->memrid += 4;
2762         adapter->msix_mem = bus_alloc_resource_any(dev,
2763             SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2764         if (adapter->msix_mem == NULL) {
2765                 /* May not be enabled */
2766                 device_printf(adapter->dev,
2767                     "Unable to map MSIX table \n");
2768                 goto msi;
2769         }
2770
2771         /* Figure out a reasonable auto config value */
2772         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2773
2774         /* Manual override */
2775         if (igb_num_queues != 0)
2776                 queues = igb_num_queues;
2777
2778         /* Sanity check based on HW */
2779         switch (adapter->hw.mac.type) {
2780                 case e1000_82575:
2781                         maxqueues = 4;
2782                         break;
2783                 case e1000_82576:
2784                 case e1000_82580:
2785                 case e1000_i350:
2786                 case e1000_i354:
2787                         maxqueues = 8;
2788                         break;
2789                 case e1000_i210:
2790                         maxqueues = 4;
2791                         break;
2792                 case e1000_i211:
2793                         maxqueues = 2;
2794                         break;
2795                 default:  /* VF interfaces */
2796                         maxqueues = 1;
2797                         break;
2798         }
2799         if (queues > maxqueues)
2800                 queues = maxqueues;
2801
2802         /* Manual override */
2803         if (igb_num_queues != 0)
2804                 queues = igb_num_queues;
2805
2806         /*
2807         ** One vector (RX/TX pair) per queue
2808         ** plus an additional for Link interrupt
2809         */
2810         want = queues + 1;
2811         if (msgs >= want)
2812                 msgs = want;
2813         else {
2814                 device_printf(adapter->dev,
2815                     "MSIX Configuration Problem, "
2816                     "%d vectors configured, but %d queues wanted!\n",
2817                     msgs, want);
2818                 goto msi;
2819         }
2820         if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2821                 device_printf(adapter->dev,
2822                     "Using MSIX interrupts with %d vectors\n", msgs);
2823                 adapter->num_queues = queues;
2824                 return (msgs);
2825         }
2826         /*
2827         ** If MSIX alloc failed or provided us with
2828         ** less than needed, free and fall through to MSI
2829         */
2830         pci_release_msi(dev);
2831
2832 msi:
2833         if (adapter->msix_mem != NULL) {
2834                 bus_release_resource(dev, SYS_RES_MEMORY,
2835                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2836                 adapter->msix_mem = NULL;
2837         }
2838         msgs = 1;
2839         if (pci_alloc_msi(dev, &msgs) == 0) {
2840                 device_printf(adapter->dev," Using an MSI interrupt\n");
2841                 return (msgs);
2842         }
2843         device_printf(adapter->dev," Using a Legacy interrupt\n");
2844         return (0);
2845 }
2846
2847 /*********************************************************************
2848  *
2849  *  Initialize the DMA Coalescing feature
2850  *
2851  **********************************************************************/
2852 static void
2853 igb_init_dmac(struct adapter *adapter, u32 pba)
2854 {
2855         device_t        dev = adapter->dev;
2856         struct e1000_hw *hw = &adapter->hw;
2857         u32             dmac, reg = ~E1000_DMACR_DMAC_EN;
2858         u16             hwm;
2859
2860         if (hw->mac.type == e1000_i211)
2861                 return;
2862
2863         if (hw->mac.type > e1000_82580) {
2864
2865                 if (adapter->dmac == 0) { /* Disabling it */
2866                         E1000_WRITE_REG(hw, E1000_DMACR, reg);
2867                         return;
2868                 } else
2869                         device_printf(dev, "DMA Coalescing enabled\n");
2870
2871                 /* Set starting threshold */
2872                 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2873
2874                 hwm = 64 * pba - adapter->max_frame_size / 16;
2875                 if (hwm < 64 * (pba - 6))
2876                         hwm = 64 * (pba - 6);
2877                 reg = E1000_READ_REG(hw, E1000_FCRTC);
2878                 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2879                 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2880                     & E1000_FCRTC_RTH_COAL_MASK);
2881                 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2882
2883
2884                 dmac = pba - adapter->max_frame_size / 512;
2885                 if (dmac < pba - 10)
2886                         dmac = pba - 10;
2887                 reg = E1000_READ_REG(hw, E1000_DMACR);
2888                 reg &= ~E1000_DMACR_DMACTHR_MASK;
2889                 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2890                     & E1000_DMACR_DMACTHR_MASK);
2891
2892                 /* transition to L0x or L1 if available..*/
2893                 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2894
2895                 /* Check if status is 2.5Gb backplane connection
2896                 * before configuration of watchdog timer, which is
2897                 * in msec values in 12.8usec intervals
2898                 * watchdog timer= msec values in 32usec intervals
2899                 * for non 2.5Gb connection
2900                 */
2901                 if (hw->mac.type == e1000_i354) {
2902                         int status = E1000_READ_REG(hw, E1000_STATUS);
2903                         if ((status & E1000_STATUS_2P5_SKU) &&
2904                             (!(status & E1000_STATUS_2P5_SKU_OVER)))
2905                                 reg |= ((adapter->dmac * 5) >> 6);
2906                         else
2907                                 reg |= (adapter->dmac >> 5);
2908                 } else {
2909                         reg |= (adapter->dmac >> 5);
2910                 }
2911
2912                 E1000_WRITE_REG(hw, E1000_DMACR, reg);
2913
2914 #ifdef I210_OBFF_SUPPORT
2915                 /*
2916                  * Set the OBFF Rx threshold to DMA Coalescing Rx
2917                  * threshold - 2KB and enable the feature in the
2918                  * hardware for I210.
2919                  */
2920                 if (hw->mac.type == e1000_i210) {
2921                         int obff = dmac - 2;
2922                         reg = E1000_READ_REG(hw, E1000_DOBFFCTL);
2923                         reg &= ~E1000_DOBFFCTL_OBFFTHR_MASK;
2924                         reg |= (obff & E1000_DOBFFCTL_OBFFTHR_MASK)
2925                             | E1000_DOBFFCTL_EXIT_ACT_MASK;
2926                         E1000_WRITE_REG(hw, E1000_DOBFFCTL, reg);
2927                 }
2928 #endif
2929                 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2930
2931                 /* Set the interval before transition */
2932                 reg = E1000_READ_REG(hw, E1000_DMCTLX);
2933                 if (hw->mac.type == e1000_i350)
2934                         reg |= IGB_DMCTLX_DCFLUSH_DIS;
2935                 /*
2936                 ** in 2.5Gb connection, TTLX unit is 0.4 usec
2937                 ** which is 0x4*2 = 0xA. But delay is still 4 usec
2938                 */
2939                 if (hw->mac.type == e1000_i354) {
2940                         int status = E1000_READ_REG(hw, E1000_STATUS);
2941                         if ((status & E1000_STATUS_2P5_SKU) &&
2942                             (!(status & E1000_STATUS_2P5_SKU_OVER)))
2943                                 reg |= 0xA;
2944                         else
2945                                 reg |= 0x4;
2946                 } else {
2947                         reg |= 0x4;
2948                 }
2949
2950                 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2951
2952                 /* free space in tx packet buffer to wake from DMA coal */
2953                 E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
2954                     (2 * adapter->max_frame_size)) >> 6);
2955
2956                 /* make low power state decision controlled by DMA coal */
2957                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2958                 reg &= ~E1000_PCIEMISC_LX_DECISION;
2959                 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
2960
2961         } else if (hw->mac.type == e1000_82580) {
2962                 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2963                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
2964                     reg & ~E1000_PCIEMISC_LX_DECISION);
2965                 E1000_WRITE_REG(hw, E1000_DMACR, 0);
2966         }
2967 }
2968
2969
2970 /*********************************************************************
2971  *
2972  *  Set up an fresh starting state
2973  *
2974  **********************************************************************/
2975 static void
2976 igb_reset(struct adapter *adapter)
2977 {
2978         device_t        dev = adapter->dev;
2979         struct e1000_hw *hw = &adapter->hw;
2980         struct e1000_fc_info *fc = &hw->fc;
2981         struct ifnet    *ifp = adapter->ifp;
2982         u32             pba = 0;
2983         u16             hwm;
2984
2985         INIT_DEBUGOUT("igb_reset: begin");
2986
2987         /* Let the firmware know the OS is in control */
2988         igb_get_hw_control(adapter);
2989
2990         /*
2991          * Packet Buffer Allocation (PBA)
2992          * Writing PBA sets the receive portion of the buffer
2993          * the remainder is used for the transmit buffer.
2994          */
2995         switch (hw->mac.type) {
2996         case e1000_82575:
2997                 pba = E1000_PBA_32K;
2998                 break;
2999         case e1000_82576:
3000         case e1000_vfadapt:
3001                 pba = E1000_READ_REG(hw, E1000_RXPBS);
3002                 pba &= E1000_RXPBS_SIZE_MASK_82576;
3003                 break;
3004         case e1000_82580:
3005         case e1000_i350:
3006         case e1000_i354:
3007         case e1000_vfadapt_i350:
3008                 pba = E1000_READ_REG(hw, E1000_RXPBS);
3009                 pba = e1000_rxpbs_adjust_82580(pba);
3010                 break;
3011         case e1000_i210:
3012         case e1000_i211:
3013                 pba = E1000_PBA_34K;
3014         default:
3015                 break;
3016         }
3017
3018         /* Special needs in case of Jumbo frames */
3019         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
3020                 u32 tx_space, min_tx, min_rx;
3021                 pba = E1000_READ_REG(hw, E1000_PBA);
3022                 tx_space = pba >> 16;
3023                 pba &= 0xffff;
3024                 min_tx = (adapter->max_frame_size +
3025                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
3026                 min_tx = roundup2(min_tx, 1024);
3027                 min_tx >>= 10;
3028                 min_rx = adapter->max_frame_size;
3029                 min_rx = roundup2(min_rx, 1024);
3030                 min_rx >>= 10;
3031                 if (tx_space < min_tx &&
3032                     ((min_tx - tx_space) < pba)) {
3033                         pba = pba - (min_tx - tx_space);
3034                         /*
3035                          * if short on rx space, rx wins
3036                          * and must trump tx adjustment
3037                          */
3038                         if (pba < min_rx)
3039                                 pba = min_rx;
3040                 }
3041                 E1000_WRITE_REG(hw, E1000_PBA, pba);
3042         }
3043
3044         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3045
3046         /*
3047          * These parameters control the automatic generation (Tx) and
3048          * response (Rx) to Ethernet PAUSE frames.
3049          * - High water mark should allow for at least two frames to be
3050          *   received after sending an XOFF.
3051          * - Low water mark works best when it is very near the high water mark.
3052          *   This allows the receiver to restart by sending XON when it has
3053          *   drained a bit.
3054          */
3055         hwm = min(((pba << 10) * 9 / 10),
3056             ((pba << 10) - 2 * adapter->max_frame_size));
3057
3058         if (hw->mac.type < e1000_82576) {
3059                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3060                 fc->low_water = fc->high_water - 8;
3061         } else {
3062                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3063                 fc->low_water = fc->high_water - 16;
3064         }
3065
3066         fc->pause_time = IGB_FC_PAUSE_TIME;
3067         fc->send_xon = TRUE;
3068         if (adapter->fc)
3069                 fc->requested_mode = adapter->fc;
3070         else
3071                 fc->requested_mode = e1000_fc_default;
3072
3073         /* Issue a global reset */
3074         e1000_reset_hw(hw);
3075         E1000_WRITE_REG(hw, E1000_WUC, 0);
3076
3077         /* Reset for AutoMediaDetect */
3078         if (adapter->flags & IGB_MEDIA_RESET) {
3079                 e1000_setup_init_funcs(hw, TRUE);
3080                 e1000_get_bus_info(hw);
3081                 adapter->flags &= ~IGB_MEDIA_RESET;
3082         }
3083
3084         if (e1000_init_hw(hw) < 0)
3085                 device_printf(dev, "Hardware Initialization Failed\n");
3086
3087         /* Setup DMA Coalescing */
3088         igb_init_dmac(adapter, pba);
3089
3090         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3091         e1000_get_phy_info(hw);
3092         e1000_check_for_link(hw);
3093         return;
3094 }
3095
3096 /*********************************************************************
3097  *
3098  *  Setup networking device structure and register an interface.
3099  *
3100  **********************************************************************/
3101 static int
3102 igb_setup_interface(device_t dev, struct adapter *adapter)
3103 {
3104         struct ifnet   *ifp;
3105
3106         INIT_DEBUGOUT("igb_setup_interface: begin");
3107
3108         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3109         if (ifp == NULL) {
3110                 device_printf(dev, "can not allocate ifnet structure\n");
3111                 return (-1);
3112         }
3113         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3114         ifp->if_init =  igb_init;
3115         ifp->if_softc = adapter;
3116         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3117         ifp->if_ioctl = igb_ioctl;
3118 #ifndef IGB_LEGACY_TX
3119         ifp->if_transmit = igb_mq_start;
3120         ifp->if_qflush = igb_qflush;
3121 #else
3122         ifp->if_start = igb_start;
3123         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3124         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3125         IFQ_SET_READY(&ifp->if_snd);
3126 #endif
3127
3128         ether_ifattach(ifp, adapter->hw.mac.addr);
3129
3130         ifp->if_capabilities = ifp->if_capenable = 0;
3131
3132         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3133         ifp->if_capabilities |= IFCAP_TSO;
3134         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3135         ifp->if_capenable = ifp->if_capabilities;
3136
3137         /* Don't enable LRO by default */
3138         ifp->if_capabilities |= IFCAP_LRO;
3139
3140 #ifdef DEVICE_POLLING
3141         ifp->if_capabilities |= IFCAP_POLLING;
3142 #endif
3143
3144         /*
3145          * Tell the upper layer(s) we
3146          * support full VLAN capability.
3147          */
3148         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3149         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3150                              |  IFCAP_VLAN_HWTSO
3151                              |  IFCAP_VLAN_MTU;
3152         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3153                           |  IFCAP_VLAN_HWTSO
3154                           |  IFCAP_VLAN_MTU;
3155
3156         /*
3157         ** Don't turn this on by default, if vlans are
3158         ** created on another pseudo device (eg. lagg)
3159         ** then vlan events are not passed thru, breaking
3160         ** operation, but with HW FILTER off it works. If
3161         ** using vlans directly on the igb driver you can
3162         ** enable this and get full hardware tag filtering.
3163         */
3164         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3165
3166         /*
3167          * Specify the media types supported by this adapter and register
3168          * callbacks to update media and link information
3169          */
3170         ifmedia_init(&adapter->media, IFM_IMASK,
3171             igb_media_change, igb_media_status);
3172         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3173             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3174                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
3175                             0, NULL);
3176                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3177         } else {
3178                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3179                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3180                             0, NULL);
3181                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3182                             0, NULL);
3183                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3184                             0, NULL);
3185                 if (adapter->hw.phy.type != e1000_phy_ife) {
3186                         ifmedia_add(&adapter->media,
3187                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3188                         ifmedia_add(&adapter->media,
3189                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3190                 }
3191         }
3192         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3193         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3194         return (0);
3195 }
3196
3197
3198 /*
3199  * Manage DMA'able memory.
3200  */
3201 static void
3202 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3203 {
3204         if (error)
3205                 return;
3206         *(bus_addr_t *) arg = segs[0].ds_addr;
3207 }
3208
3209 static int
3210 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3211         struct igb_dma_alloc *dma, int mapflags)
3212 {
3213         int error;
3214
3215         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3216                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
3217                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3218                                 BUS_SPACE_MAXADDR,      /* highaddr */
3219                                 NULL, NULL,             /* filter, filterarg */
3220                                 size,                   /* maxsize */
3221                                 1,                      /* nsegments */
3222                                 size,                   /* maxsegsize */
3223                                 0,                      /* flags */
3224                                 NULL,                   /* lockfunc */
3225                                 NULL,                   /* lockarg */
3226                                 &dma->dma_tag);
3227         if (error) {
3228                 device_printf(adapter->dev,
3229                     "%s: bus_dma_tag_create failed: %d\n",
3230                     __func__, error);
3231                 goto fail_0;
3232         }
3233
3234         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3235             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3236         if (error) {
3237                 device_printf(adapter->dev,
3238                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3239                     __func__, (uintmax_t)size, error);
3240                 goto fail_2;
3241         }
3242
3243         dma->dma_paddr = 0;
3244         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3245             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3246         if (error || dma->dma_paddr == 0) {
3247                 device_printf(adapter->dev,
3248                     "%s: bus_dmamap_load failed: %d\n",
3249                     __func__, error);
3250                 goto fail_3;
3251         }
3252
3253         return (0);
3254
3255 fail_3:
3256         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3257 fail_2:
3258         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3259         bus_dma_tag_destroy(dma->dma_tag);
3260 fail_0:
3261         dma->dma_map = NULL;
3262         dma->dma_tag = NULL;
3263
3264         return (error);
3265 }
3266
3267 static void
3268 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3269 {
3270         if (dma->dma_tag == NULL)
3271                 return;
3272         if (dma->dma_map != NULL) {
3273                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3274                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3275                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3276                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3277                 dma->dma_map = NULL;
3278         }
3279         bus_dma_tag_destroy(dma->dma_tag);
3280         dma->dma_tag = NULL;
3281 }
3282
3283
3284 /*********************************************************************
3285  *
3286  *  Allocate memory for the transmit and receive rings, and then
3287  *  the descriptors associated with each, called only once at attach.
3288  *
3289  **********************************************************************/
3290 static int
3291 igb_allocate_queues(struct adapter *adapter)
3292 {
3293         device_t dev = adapter->dev;
3294         struct igb_queue        *que = NULL;
3295         struct tx_ring          *txr = NULL;
3296         struct rx_ring          *rxr = NULL;
3297         int rsize, tsize, error = E1000_SUCCESS;
3298         int txconf = 0, rxconf = 0;
3299
3300         /* First allocate the top level queue structs */
3301         if (!(adapter->queues =
3302             (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3303             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3304                 device_printf(dev, "Unable to allocate queue memory\n");
3305                 error = ENOMEM;
3306                 goto fail;
3307         }
3308
3309         /* Next allocate the TX ring struct memory */
3310         if (!(adapter->tx_rings =
3311             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3312             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3313                 device_printf(dev, "Unable to allocate TX ring memory\n");
3314                 error = ENOMEM;
3315                 goto tx_fail;
3316         }
3317
3318         /* Now allocate the RX */
3319         if (!(adapter->rx_rings =
3320             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3321             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3322                 device_printf(dev, "Unable to allocate RX ring memory\n");
3323                 error = ENOMEM;
3324                 goto rx_fail;
3325         }
3326
3327         tsize = roundup2(adapter->num_tx_desc *
3328             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3329         /*
3330          * Now set up the TX queues, txconf is needed to handle the
3331          * possibility that things fail midcourse and we need to
3332          * undo memory gracefully
3333          */ 
3334         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3335                 /* Set up some basics */
3336                 txr = &adapter->tx_rings[i];
3337                 txr->adapter = adapter;
3338                 txr->me = i;
3339                 txr->num_desc = adapter->num_tx_desc;
3340
3341                 /* Initialize the TX lock */
3342                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3343                     device_get_nameunit(dev), txr->me);
3344                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3345
3346                 if (igb_dma_malloc(adapter, tsize,
3347                         &txr->txdma, BUS_DMA_NOWAIT)) {
3348                         device_printf(dev,
3349                             "Unable to allocate TX Descriptor memory\n");
3350                         error = ENOMEM;
3351                         goto err_tx_desc;
3352                 }
3353                 txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3354                 bzero((void *)txr->tx_base, tsize);
3355
3356                 /* Now allocate transmit buffers for the ring */
3357                 if (igb_allocate_transmit_buffers(txr)) {
3358                         device_printf(dev,
3359                             "Critical Failure setting up transmit buffers\n");
3360                         error = ENOMEM;
3361                         goto err_tx_desc;
3362                 }
3363 #ifndef IGB_LEGACY_TX
3364                 /* Allocate a buf ring */
3365                 txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3366                     M_WAITOK, &txr->tx_mtx);
3367 #endif
3368         }
3369
3370         /*
3371          * Next the RX queues...
3372          */ 
3373         rsize = roundup2(adapter->num_rx_desc *
3374             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3375         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3376                 rxr = &adapter->rx_rings[i];
3377                 rxr->adapter = adapter;
3378                 rxr->me = i;
3379
3380                 /* Initialize the RX lock */
3381                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3382                     device_get_nameunit(dev), txr->me);
3383                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3384
3385                 if (igb_dma_malloc(adapter, rsize,
3386                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3387                         device_printf(dev,
3388                             "Unable to allocate RxDescriptor memory\n");
3389                         error = ENOMEM;
3390                         goto err_rx_desc;
3391                 }
3392                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3393                 bzero((void *)rxr->rx_base, rsize);
3394
3395                 /* Allocate receive buffers for the ring*/
3396                 if (igb_allocate_receive_buffers(rxr)) {
3397                         device_printf(dev,
3398                             "Critical Failure setting up receive buffers\n");
3399                         error = ENOMEM;
3400                         goto err_rx_desc;
3401                 }
3402         }
3403
3404         /*
3405         ** Finally set up the queue holding structs
3406         */
3407         for (int i = 0; i < adapter->num_queues; i++) {
3408                 que = &adapter->queues[i];
3409                 que->adapter = adapter;
3410                 que->txr = &adapter->tx_rings[i];
3411                 que->rxr = &adapter->rx_rings[i];
3412         }
3413
3414         return (0);
3415
3416 err_rx_desc:
3417         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3418                 igb_dma_free(adapter, &rxr->rxdma);
3419 err_tx_desc:
3420         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3421                 igb_dma_free(adapter, &txr->txdma);
3422         free(adapter->rx_rings, M_DEVBUF);
3423 rx_fail:
3424 #ifndef IGB_LEGACY_TX
3425         buf_ring_free(txr->br, M_DEVBUF);
3426 #endif
3427         free(adapter->tx_rings, M_DEVBUF);
3428 tx_fail:
3429         free(adapter->queues, M_DEVBUF);
3430 fail:
3431         return (error);
3432 }
3433
3434 /*********************************************************************
3435  *
3436  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3437  *  the information needed to transmit a packet on the wire. This is
3438  *  called only once at attach, setup is done every reset.
3439  *
3440  **********************************************************************/
3441 static int
3442 igb_allocate_transmit_buffers(struct tx_ring *txr)
3443 {
3444         struct adapter *adapter = txr->adapter;
3445         device_t dev = adapter->dev;
3446         struct igb_tx_buf *txbuf;
3447         int error, i;
3448
3449         /*
3450          * Setup DMA descriptor areas.
3451          */
3452         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3453                                1, 0,                    /* alignment, bounds */
3454                                BUS_SPACE_MAXADDR,       /* lowaddr */
3455                                BUS_SPACE_MAXADDR,       /* highaddr */
3456                                NULL, NULL,              /* filter, filterarg */
3457                                IGB_TSO_SIZE,            /* maxsize */
3458                                IGB_MAX_SCATTER,         /* nsegments */
3459                                PAGE_SIZE,               /* maxsegsize */
3460                                0,                       /* flags */
3461                                NULL,                    /* lockfunc */
3462                                NULL,                    /* lockfuncarg */
3463                                &txr->txtag))) {
3464                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3465                 goto fail;
3466         }
3467
3468         if (!(txr->tx_buffers =
3469             (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3470             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3471                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3472                 error = ENOMEM;
3473                 goto fail;
3474         }
3475
3476         /* Create the descriptor buffer dma maps */
3477         txbuf = txr->tx_buffers;
3478         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3479                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3480                 if (error != 0) {
3481                         device_printf(dev, "Unable to create TX DMA map\n");
3482                         goto fail;
3483                 }
3484         }
3485
3486         return 0;
3487 fail:
3488         /* We free all, it handles case where we are in the middle */
3489         igb_free_transmit_structures(adapter);
3490         return (error);
3491 }
3492
3493 /*********************************************************************
3494  *
3495  *  Initialize a transmit ring.
3496  *
3497  **********************************************************************/
3498 static void
3499 igb_setup_transmit_ring(struct tx_ring *txr)
3500 {
3501         struct adapter *adapter = txr->adapter;
3502         struct igb_tx_buf *txbuf;
3503         int i;
3504 #ifdef DEV_NETMAP
3505         struct netmap_adapter *na = NA(adapter->ifp);
3506         struct netmap_slot *slot;
3507 #endif /* DEV_NETMAP */
3508
3509         /* Clear the old descriptor contents */
3510         IGB_TX_LOCK(txr);
3511 #ifdef DEV_NETMAP
3512         slot = netmap_reset(na, NR_TX, txr->me, 0);
3513 #endif /* DEV_NETMAP */
3514         bzero((void *)txr->tx_base,
3515               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3516         /* Reset indices */
3517         txr->next_avail_desc = 0;
3518         txr->next_to_clean = 0;
3519
3520         /* Free any existing tx buffers. */
3521         txbuf = txr->tx_buffers;
3522         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3523                 if (txbuf->m_head != NULL) {
3524                         bus_dmamap_sync(txr->txtag, txbuf->map,
3525                             BUS_DMASYNC_POSTWRITE);
3526                         bus_dmamap_unload(txr->txtag, txbuf->map);
3527                         m_freem(txbuf->m_head);
3528                         txbuf->m_head = NULL;
3529                 }
3530 #ifdef DEV_NETMAP
3531                 if (slot) {
3532                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3533                         /* no need to set the address */
3534                         netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3535                 }
3536 #endif /* DEV_NETMAP */
3537                 /* clear the watch index */
3538                 txbuf->eop = NULL;
3539         }
3540
3541         /* Set number of descriptors available */
3542         txr->tx_avail = adapter->num_tx_desc;
3543
3544         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3545             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3546         IGB_TX_UNLOCK(txr);
3547 }
3548
3549 /*********************************************************************
3550  *
3551  *  Initialize all transmit rings.
3552  *
3553  **********************************************************************/
3554 static void
3555 igb_setup_transmit_structures(struct adapter *adapter)
3556 {
3557         struct tx_ring *txr = adapter->tx_rings;
3558
3559         for (int i = 0; i < adapter->num_queues; i++, txr++)
3560                 igb_setup_transmit_ring(txr);
3561
3562         return;
3563 }
3564
3565 /*********************************************************************
3566  *
3567  *  Enable transmit unit.
3568  *
3569  **********************************************************************/
3570 static void
3571 igb_initialize_transmit_units(struct adapter *adapter)
3572 {
3573         struct tx_ring  *txr = adapter->tx_rings;
3574         struct e1000_hw *hw = &adapter->hw;
3575         u32             tctl, txdctl;
3576
3577         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3578         tctl = txdctl = 0;
3579
3580         /* Setup the Tx Descriptor Rings */
3581         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3582                 u64 bus_addr = txr->txdma.dma_paddr;
3583
3584                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3585                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3586                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3587                     (uint32_t)(bus_addr >> 32));
3588                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3589                     (uint32_t)bus_addr);
3590
3591                 /* Setup the HW Tx Head and Tail descriptor pointers */
3592                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3593                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3594
3595                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3596                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3597                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3598
3599                 txr->queue_status = IGB_QUEUE_IDLE;
3600
3601                 txdctl |= IGB_TX_PTHRESH;
3602                 txdctl |= IGB_TX_HTHRESH << 8;
3603                 txdctl |= IGB_TX_WTHRESH << 16;
3604                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3605                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3606         }
3607
3608         if (adapter->vf_ifp)
3609                 return;
3610
3611         e1000_config_collision_dist(hw);
3612
3613         /* Program the Transmit Control Register */
3614         tctl = E1000_READ_REG(hw, E1000_TCTL);
3615         tctl &= ~E1000_TCTL_CT;
3616         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3617                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3618
3619         /* This write will effectively turn on the transmit unit. */
3620         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3621 }
3622
3623 /*********************************************************************
3624  *
3625  *  Free all transmit rings.
3626  *
3627  **********************************************************************/
3628 static void
3629 igb_free_transmit_structures(struct adapter *adapter)
3630 {
3631         struct tx_ring *txr = adapter->tx_rings;
3632
3633         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3634                 IGB_TX_LOCK(txr);
3635                 igb_free_transmit_buffers(txr);
3636                 igb_dma_free(adapter, &txr->txdma);
3637                 IGB_TX_UNLOCK(txr);
3638                 IGB_TX_LOCK_DESTROY(txr);
3639         }
3640         free(adapter->tx_rings, M_DEVBUF);
3641 }
3642
3643 /*********************************************************************
3644  *
3645  *  Free transmit ring related data structures.
3646  *
3647  **********************************************************************/
3648 static void
3649 igb_free_transmit_buffers(struct tx_ring *txr)
3650 {
3651         struct adapter *adapter = txr->adapter;
3652         struct igb_tx_buf *tx_buffer;
3653         int             i;
3654
3655         INIT_DEBUGOUT("free_transmit_ring: begin");
3656
3657         if (txr->tx_buffers == NULL)
3658                 return;
3659
3660         tx_buffer = txr->tx_buffers;
3661         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3662                 if (tx_buffer->m_head != NULL) {
3663                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3664                             BUS_DMASYNC_POSTWRITE);
3665                         bus_dmamap_unload(txr->txtag,
3666                             tx_buffer->map);
3667                         m_freem(tx_buffer->m_head);
3668                         tx_buffer->m_head = NULL;
3669                         if (tx_buffer->map != NULL) {
3670                                 bus_dmamap_destroy(txr->txtag,
3671                                     tx_buffer->map);
3672                                 tx_buffer->map = NULL;
3673                         }
3674                 } else if (tx_buffer->map != NULL) {
3675                         bus_dmamap_unload(txr->txtag,
3676                             tx_buffer->map);
3677                         bus_dmamap_destroy(txr->txtag,
3678                             tx_buffer->map);
3679                         tx_buffer->map = NULL;
3680                 }
3681         }
3682 #ifndef IGB_LEGACY_TX
3683         if (txr->br != NULL)
3684                 buf_ring_free(txr->br, M_DEVBUF);
3685 #endif
3686         if (txr->tx_buffers != NULL) {
3687                 free(txr->tx_buffers, M_DEVBUF);
3688                 txr->tx_buffers = NULL;
3689         }
3690         if (txr->txtag != NULL) {
3691                 bus_dma_tag_destroy(txr->txtag);
3692                 txr->txtag = NULL;
3693         }
3694         return;
3695 }
3696
3697 /**********************************************************************
3698  *
3699  *  Setup work for hardware segmentation offload (TSO) on
3700  *  adapters using advanced tx descriptors
3701  *
3702  **********************************************************************/
3703 static int
3704 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3705     u32 *cmd_type_len, u32 *olinfo_status)
3706 {
3707         struct adapter *adapter = txr->adapter;
3708         struct e1000_adv_tx_context_desc *TXD;
3709         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3710         u32 mss_l4len_idx = 0, paylen;
3711         u16 vtag = 0, eh_type;
3712         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3713         struct ether_vlan_header *eh;
3714 #ifdef INET6
3715         struct ip6_hdr *ip6;
3716 #endif
3717 #ifdef INET
3718         struct ip *ip;
3719 #endif
3720         struct tcphdr *th;
3721
3722
3723         /*
3724          * Determine where frame payload starts.
3725          * Jump over vlan headers if already present
3726          */
3727         eh = mtod(mp, struct ether_vlan_header *);
3728         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3729                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3730                 eh_type = eh->evl_proto;
3731         } else {
3732                 ehdrlen = ETHER_HDR_LEN;
3733                 eh_type = eh->evl_encap_proto;
3734         }
3735
3736         switch (ntohs(eh_type)) {
3737 #ifdef INET6
3738         case ETHERTYPE_IPV6:
3739                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3740                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3741                 if (ip6->ip6_nxt != IPPROTO_TCP)
3742                         return (ENXIO);
3743                 ip_hlen = sizeof(struct ip6_hdr);
3744                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3745                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3746                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3747                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3748                 break;
3749 #endif
3750 #ifdef INET
3751         case ETHERTYPE_IP:
3752                 ip = (struct ip *)(mp->m_data + ehdrlen);
3753                 if (ip->ip_p != IPPROTO_TCP)
3754                         return (ENXIO);
3755                 ip->ip_sum = 0;
3756                 ip_hlen = ip->ip_hl << 2;
3757                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3758                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3759                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3760                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3761                 /* Tell transmit desc to also do IPv4 checksum. */
3762                 *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3763                 break;
3764 #endif
3765         default:
3766                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3767                     __func__, ntohs(eh_type));
3768                 break;
3769         }
3770
3771         ctxd = txr->next_avail_desc;
3772         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3773
3774         tcp_hlen = th->th_off << 2;
3775
3776         /* This is used in the transmit desc in encap */
3777         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3778
3779         /* VLAN MACLEN IPLEN */
3780         if (mp->m_flags & M_VLANTAG) {
3781                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3782                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3783         }
3784
3785         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3786         vlan_macip_lens |= ip_hlen;
3787         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3788
3789         /* ADV DTYPE TUCMD */
3790         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3791         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3792         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3793
3794         /* MSS L4LEN IDX */
3795         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3796         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3797         /* 82575 needs the queue index added */
3798         if (adapter->hw.mac.type == e1000_82575)
3799                 mss_l4len_idx |= txr->me << 4;
3800         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3801
3802         TXD->seqnum_seed = htole32(0);
3803
3804         if (++ctxd == txr->num_desc)
3805                 ctxd = 0;
3806
3807         txr->tx_avail--;
3808         txr->next_avail_desc = ctxd;
3809         *cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3810         *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3811         *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3812         ++txr->tso_tx;
3813         return (0);
3814 }
3815
3816 /*********************************************************************
3817  *
3818  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3819  *
3820  **********************************************************************/
3821
3822 static int
3823 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3824     u32 *cmd_type_len, u32 *olinfo_status)
3825 {
3826         struct e1000_adv_tx_context_desc *TXD;
3827         struct adapter *adapter = txr->adapter;
3828         struct ether_vlan_header *eh;
3829         struct ip *ip;
3830         struct ip6_hdr *ip6;
3831         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3832         int     ehdrlen, ip_hlen = 0;
3833         u16     etype;
3834         u8      ipproto = 0;
3835         int     offload = TRUE;
3836         int     ctxd = txr->next_avail_desc;
3837         u16     vtag = 0;
3838
3839         /* First check if TSO is to be used */
3840         if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3841                 return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3842
3843         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3844                 offload = FALSE;
3845
3846         /* Indicate the whole packet as payload when not doing TSO */
3847         *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3848
3849         /* Now ready a context descriptor */
3850         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3851
3852         /*
3853         ** In advanced descriptors the vlan tag must 
3854         ** be placed into the context descriptor. Hence
3855         ** we need to make one even if not doing offloads.
3856         */
3857         if (mp->m_flags & M_VLANTAG) {
3858                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3859                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3860         } else if (offload == FALSE) /* ... no offload to do */
3861                 return (0);
3862
3863         /*
3864          * Determine where frame payload starts.
3865          * Jump over vlan headers if already present,
3866          * helpful for QinQ too.
3867          */
3868         eh = mtod(mp, struct ether_vlan_header *);
3869         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3870                 etype = ntohs(eh->evl_proto);
3871                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3872         } else {
3873                 etype = ntohs(eh->evl_encap_proto);
3874                 ehdrlen = ETHER_HDR_LEN;
3875         }
3876
3877         /* Set the ether header length */
3878         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3879
3880         switch (etype) {
3881                 case ETHERTYPE_IP:
3882                         ip = (struct ip *)(mp->m_data + ehdrlen);
3883                         ip_hlen = ip->ip_hl << 2;
3884                         ipproto = ip->ip_p;
3885                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3886                         break;
3887                 case ETHERTYPE_IPV6:
3888                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3889                         ip_hlen = sizeof(struct ip6_hdr);
3890                         /* XXX-BZ this will go badly in case of ext hdrs. */
3891                         ipproto = ip6->ip6_nxt;
3892                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3893                         break;
3894                 default:
3895                         offload = FALSE;
3896                         break;
3897         }
3898
3899         vlan_macip_lens |= ip_hlen;
3900         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3901
3902         switch (ipproto) {
3903                 case IPPROTO_TCP:
3904                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3905                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3906                         break;
3907                 case IPPROTO_UDP:
3908                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3909                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3910                         break;
3911
3912 #if __FreeBSD_version >= 800000
3913                 case IPPROTO_SCTP:
3914                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3915                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3916                         break;
3917 #endif
3918                 default:
3919                         offload = FALSE;
3920                         break;
3921         }
3922
3923         if (offload) /* For the TX descriptor setup */
3924                 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3925
3926         /* 82575 needs the queue index added */
3927         if (adapter->hw.mac.type == e1000_82575)
3928                 mss_l4len_idx = txr->me << 4;
3929
3930         /* Now copy bits into descriptor */
3931         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3932         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3933         TXD->seqnum_seed = htole32(0);
3934         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3935
3936         /* We've consumed the first desc, adjust counters */
3937         if (++ctxd == txr->num_desc)
3938                 ctxd = 0;
3939         txr->next_avail_desc = ctxd;
3940         --txr->tx_avail;
3941
3942         return (0);
3943 }
3944
3945 /**********************************************************************
3946  *
3947  *  Examine each tx_buffer in the used queue. If the hardware is done
3948  *  processing the packet then free associated resources. The
3949  *  tx_buffer is put back on the free queue.
3950  *
3951  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3952  **********************************************************************/
3953 static bool
3954 igb_txeof(struct tx_ring *txr)
3955 {
3956         struct adapter          *adapter = txr->adapter;
3957         struct ifnet            *ifp = adapter->ifp;
3958         u32                     work, processed = 0;
3959         u16                     limit = txr->process_limit;
3960         struct igb_tx_buf       *buf;
3961         union e1000_adv_tx_desc *txd;
3962
3963         mtx_assert(&txr->tx_mtx, MA_OWNED);
3964
3965 #ifdef DEV_NETMAP
3966         if (netmap_tx_irq(ifp, txr->me))
3967                 return (FALSE);
3968 #endif /* DEV_NETMAP */
3969
3970         if (txr->tx_avail == txr->num_desc) {
3971                 txr->queue_status = IGB_QUEUE_IDLE;
3972                 return FALSE;
3973         }
3974
3975         /* Get work starting point */
3976         work = txr->next_to_clean;
3977         buf = &txr->tx_buffers[work];
3978         txd = &txr->tx_base[work];
3979         work -= txr->num_desc; /* The distance to ring end */
3980         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3981             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3982         do {
3983                 union e1000_adv_tx_desc *eop = buf->eop;
3984                 if (eop == NULL) /* No work */
3985                         break;
3986
3987                 if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
3988                         break;  /* I/O not complete */
3989
3990                 if (buf->m_head) {
3991                         txr->bytes +=
3992                             buf->m_head->m_pkthdr.len;
3993                         bus_dmamap_sync(txr->txtag,
3994                             buf->map,
3995                             BUS_DMASYNC_POSTWRITE);
3996                         bus_dmamap_unload(txr->txtag,
3997                             buf->map);
3998                         m_freem(buf->m_head);
3999                         buf->m_head = NULL;
4000                 }
4001                 buf->eop = NULL;
4002                 ++txr->tx_avail;
4003
4004                 /* We clean the range if multi segment */
4005                 while (txd != eop) {
4006                         ++txd;
4007                         ++buf;
4008                         ++work;
4009                         /* wrap the ring? */
4010                         if (__predict_false(!work)) {
4011                                 work -= txr->num_desc;
4012                                 buf = txr->tx_buffers;
4013                                 txd = txr->tx_base;
4014                         }
4015                         if (buf->m_head) {
4016                                 txr->bytes +=
4017                                     buf->m_head->m_pkthdr.len;
4018                                 bus_dmamap_sync(txr->txtag,
4019                                     buf->map,
4020                                     BUS_DMASYNC_POSTWRITE);
4021                                 bus_dmamap_unload(txr->txtag,
4022                                     buf->map);
4023                                 m_freem(buf->m_head);
4024                                 buf->m_head = NULL;
4025                         }
4026                         ++txr->tx_avail;
4027                         buf->eop = NULL;
4028
4029                 }
4030                 ++txr->packets;
4031                 ++processed;
4032                 ++ifp->if_opackets;
4033                 txr->watchdog_time = ticks;
4034
4035                 /* Try the next packet */
4036                 ++txd;
4037                 ++buf;
4038                 ++work;
4039                 /* reset with a wrap */
4040                 if (__predict_false(!work)) {
4041                         work -= txr->num_desc;
4042                         buf = txr->tx_buffers;
4043                         txd = txr->tx_base;
4044                 }
4045                 prefetch(txd);
4046         } while (__predict_true(--limit));
4047
4048         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4049             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4050
4051         work += txr->num_desc;
4052         txr->next_to_clean = work;
4053
4054         /*
4055         ** Watchdog calculation, we know there's
4056         ** work outstanding or the first return
4057         ** would have been taken, so none processed
4058         ** for too long indicates a hang.
4059         */
4060         if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4061                 txr->queue_status |= IGB_QUEUE_HUNG;
4062
4063         if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4064                 txr->queue_status &= ~IGB_QUEUE_DEPLETED;       
4065
4066         if (txr->tx_avail == txr->num_desc) {
4067                 txr->queue_status = IGB_QUEUE_IDLE;
4068                 return (FALSE);
4069         }
4070
4071         return (TRUE);
4072 }
4073
4074 /*********************************************************************
4075  *
4076  *  Refresh mbuf buffers for RX descriptor rings
4077  *   - now keeps its own state so discards due to resource
4078  *     exhaustion are unnecessary, if an mbuf cannot be obtained
4079  *     it just returns, keeping its placeholder, thus it can simply
4080  *     be recalled to try again.
4081  *
4082  **********************************************************************/
4083 static void
4084 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4085 {
4086         struct adapter          *adapter = rxr->adapter;
4087         bus_dma_segment_t       hseg[1];
4088         bus_dma_segment_t       pseg[1];
4089         struct igb_rx_buf       *rxbuf;
4090         struct mbuf             *mh, *mp;
4091         int                     i, j, nsegs, error;
4092         bool                    refreshed = FALSE;
4093
4094         i = j = rxr->next_to_refresh;
4095         /*
4096         ** Get one descriptor beyond
4097         ** our work mark to control
4098         ** the loop.
4099         */
4100         if (++j == adapter->num_rx_desc)
4101                 j = 0;
4102
4103         while (j != limit) {
4104                 rxbuf = &rxr->rx_buffers[i];
4105                 /* No hdr mbuf used with header split off */
4106                 if (rxr->hdr_split == FALSE)
4107                         goto no_split;
4108                 if (rxbuf->m_head == NULL) {
4109                         mh = m_gethdr(M_NOWAIT, MT_DATA);
4110                         if (mh == NULL)
4111                                 goto update;
4112                 } else
4113                         mh = rxbuf->m_head;
4114
4115                 mh->m_pkthdr.len = mh->m_len = MHLEN;
4116                 mh->m_len = MHLEN;
4117                 mh->m_flags |= M_PKTHDR;
4118                 /* Get the memory mapping */
4119                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4120                     rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4121                 if (error != 0) {
4122                         printf("Refresh mbufs: hdr dmamap load"
4123                             " failure - %d\n", error);
4124                         m_free(mh);
4125                         rxbuf->m_head = NULL;
4126                         goto update;
4127                 }
4128                 rxbuf->m_head = mh;
4129                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4130                     BUS_DMASYNC_PREREAD);
4131                 rxr->rx_base[i].read.hdr_addr =
4132                     htole64(hseg[0].ds_addr);
4133 no_split:
4134                 if (rxbuf->m_pack == NULL) {
4135                         mp = m_getjcl(M_NOWAIT, MT_DATA,
4136                             M_PKTHDR, adapter->rx_mbuf_sz);
4137                         if (mp == NULL)
4138                                 goto update;
4139                 } else
4140                         mp = rxbuf->m_pack;
4141
4142                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4143                 /* Get the memory mapping */
4144                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4145                     rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4146                 if (error != 0) {
4147                         printf("Refresh mbufs: payload dmamap load"
4148                             " failure - %d\n", error);
4149                         m_free(mp);
4150                         rxbuf->m_pack = NULL;
4151                         goto update;
4152                 }
4153                 rxbuf->m_pack = mp;
4154                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4155                     BUS_DMASYNC_PREREAD);
4156                 rxr->rx_base[i].read.pkt_addr =
4157                     htole64(pseg[0].ds_addr);
4158                 refreshed = TRUE; /* I feel wefreshed :) */
4159
4160                 i = j; /* our next is precalculated */
4161                 rxr->next_to_refresh = i;
4162                 if (++j == adapter->num_rx_desc)
4163                         j = 0;
4164         }
4165 update:
4166         if (refreshed) /* update tail */
4167                 E1000_WRITE_REG(&adapter->hw,
4168                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4169         return;
4170 }
4171
4172
4173 /*********************************************************************
4174  *
4175  *  Allocate memory for rx_buffer structures. Since we use one
4176  *  rx_buffer per received packet, the maximum number of rx_buffer's
4177  *  that we'll need is equal to the number of receive descriptors
4178  *  that we've allocated.
4179  *
4180  **********************************************************************/
4181 static int
4182 igb_allocate_receive_buffers(struct rx_ring *rxr)
4183 {
4184         struct  adapter         *adapter = rxr->adapter;
4185         device_t                dev = adapter->dev;
4186         struct igb_rx_buf       *rxbuf;
4187         int                     i, bsize, error;
4188
4189         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4190         if (!(rxr->rx_buffers =
4191             (struct igb_rx_buf *) malloc(bsize,
4192             M_DEVBUF, M_NOWAIT | M_ZERO))) {
4193                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4194                 error = ENOMEM;
4195                 goto fail;
4196         }
4197
4198         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4199                                    1, 0,                /* alignment, bounds */
4200                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4201                                    BUS_SPACE_MAXADDR,   /* highaddr */
4202                                    NULL, NULL,          /* filter, filterarg */
4203                                    MSIZE,               /* maxsize */
4204                                    1,                   /* nsegments */
4205                                    MSIZE,               /* maxsegsize */
4206                                    0,                   /* flags */
4207                                    NULL,                /* lockfunc */
4208                                    NULL,                /* lockfuncarg */
4209                                    &rxr->htag))) {
4210                 device_printf(dev, "Unable to create RX DMA tag\n");
4211                 goto fail;
4212         }
4213
4214         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4215                                    1, 0,                /* alignment, bounds */
4216                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4217                                    BUS_SPACE_MAXADDR,   /* highaddr */
4218                                    NULL, NULL,          /* filter, filterarg */
4219                                    MJUM9BYTES,          /* maxsize */
4220                                    1,                   /* nsegments */
4221                                    MJUM9BYTES,          /* maxsegsize */
4222                                    0,                   /* flags */
4223                                    NULL,                /* lockfunc */
4224                                    NULL,                /* lockfuncarg */
4225                                    &rxr->ptag))) {
4226                 device_printf(dev, "Unable to create RX payload DMA tag\n");
4227                 goto fail;
4228         }
4229
4230         for (i = 0; i < adapter->num_rx_desc; i++) {
4231                 rxbuf = &rxr->rx_buffers[i];
4232                 error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4233                 if (error) {
4234                         device_printf(dev,
4235                             "Unable to create RX head DMA maps\n");
4236                         goto fail;
4237                 }
4238                 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4239                 if (error) {
4240                         device_printf(dev,
4241                             "Unable to create RX packet DMA maps\n");
4242                         goto fail;
4243                 }
4244         }
4245
4246         return (0);
4247
4248 fail:
4249         /* Frees all, but can handle partial completion */
4250         igb_free_receive_structures(adapter);
4251         return (error);
4252 }
4253
4254
4255 static void
4256 igb_free_receive_ring(struct rx_ring *rxr)
4257 {
4258         struct  adapter         *adapter = rxr->adapter;
4259         struct igb_rx_buf       *rxbuf;
4260
4261
4262         for (int i = 0; i < adapter->num_rx_desc; i++) {
4263                 rxbuf = &rxr->rx_buffers[i];
4264                 if (rxbuf->m_head != NULL) {
4265                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4266                             BUS_DMASYNC_POSTREAD);
4267                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4268                         rxbuf->m_head->m_flags |= M_PKTHDR;
4269                         m_freem(rxbuf->m_head);
4270                 }
4271                 if (rxbuf->m_pack != NULL) {
4272                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4273                             BUS_DMASYNC_POSTREAD);
4274                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4275                         rxbuf->m_pack->m_flags |= M_PKTHDR;
4276                         m_freem(rxbuf->m_pack);
4277                 }
4278                 rxbuf->m_head = NULL;
4279                 rxbuf->m_pack = NULL;
4280         }
4281 }
4282
4283
4284 /*********************************************************************
4285  *
4286  *  Initialize a receive ring and its buffers.
4287  *
4288  **********************************************************************/
4289 static int
4290 igb_setup_receive_ring(struct rx_ring *rxr)
4291 {
4292         struct  adapter         *adapter;
4293         struct  ifnet           *ifp;
4294         device_t                dev;
4295         struct igb_rx_buf       *rxbuf;
4296         bus_dma_segment_t       pseg[1], hseg[1];
4297         struct lro_ctrl         *lro = &rxr->lro;
4298         int                     rsize, nsegs, error = 0;
4299 #ifdef DEV_NETMAP
4300         struct netmap_adapter *na = NA(rxr->adapter->ifp);
4301         struct netmap_slot *slot;
4302 #endif /* DEV_NETMAP */
4303
4304         adapter = rxr->adapter;
4305         dev = adapter->dev;
4306         ifp = adapter->ifp;
4307
4308         /* Clear the ring contents */
4309         IGB_RX_LOCK(rxr);
4310 #ifdef DEV_NETMAP
4311         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4312 #endif /* DEV_NETMAP */
4313         rsize = roundup2(adapter->num_rx_desc *
4314             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4315         bzero((void *)rxr->rx_base, rsize);
4316
4317         /*
4318         ** Free current RX buffer structures and their mbufs
4319         */
4320         igb_free_receive_ring(rxr);
4321
4322         /* Configure for header split? */
4323         if (igb_header_split)
4324                 rxr->hdr_split = TRUE;
4325
4326         /* Now replenish the ring mbufs */
4327         for (int j = 0; j < adapter->num_rx_desc; ++j) {
4328                 struct mbuf     *mh, *mp;
4329
4330                 rxbuf = &rxr->rx_buffers[j];
4331 #ifdef DEV_NETMAP
4332                 if (slot) {
4333                         /* slot sj is mapped to the i-th NIC-ring entry */
4334                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4335                         uint64_t paddr;
4336                         void *addr;
4337
4338                         addr = PNMB(na, slot + sj, &paddr);
4339                         netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4340                         /* Update descriptor */
4341                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4342                         continue;
4343                 }
4344 #endif /* DEV_NETMAP */
4345                 if (rxr->hdr_split == FALSE)
4346                         goto skip_head;
4347
4348                 /* First the header */
4349                 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4350                 if (rxbuf->m_head == NULL) {
4351                         error = ENOBUFS;
4352                         goto fail;
4353                 }
4354                 m_adj(rxbuf->m_head, ETHER_ALIGN);
4355                 mh = rxbuf->m_head;
4356                 mh->m_len = mh->m_pkthdr.len = MHLEN;
4357                 mh->m_flags |= M_PKTHDR;
4358                 /* Get the memory mapping */
4359                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4360                     rxbuf->hmap, rxbuf->m_head, hseg,
4361                     &nsegs, BUS_DMA_NOWAIT);
4362                 if (error != 0) /* Nothing elegant to do here */
4363                         goto fail;
4364                 bus_dmamap_sync(rxr->htag,
4365                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
4366                 /* Update descriptor */
4367                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4368
4369 skip_head:
4370                 /* Now the payload cluster */
4371                 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4372                     M_PKTHDR, adapter->rx_mbuf_sz);
4373                 if (rxbuf->m_pack == NULL) {
4374                         error = ENOBUFS;
4375                         goto fail;
4376                 }
4377                 mp = rxbuf->m_pack;
4378                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4379                 /* Get the memory mapping */
4380                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4381                     rxbuf->pmap, mp, pseg,
4382                     &nsegs, BUS_DMA_NOWAIT);
4383                 if (error != 0)
4384                         goto fail;
4385                 bus_dmamap_sync(rxr->ptag,
4386                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4387                 /* Update descriptor */
4388                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4389         }
4390
4391         /* Setup our descriptor indices */
4392         rxr->next_to_check = 0;
4393         rxr->next_to_refresh = adapter->num_rx_desc - 1;
4394         rxr->lro_enabled = FALSE;
4395         rxr->rx_split_packets = 0;
4396         rxr->rx_bytes = 0;
4397
4398         rxr->fmp = NULL;
4399         rxr->lmp = NULL;
4400
4401         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4402             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4403
4404         /*
4405         ** Now set up the LRO interface, we
4406         ** also only do head split when LRO
4407         ** is enabled, since so often they
4408         ** are undesireable in similar setups.
4409         */
4410         if (ifp->if_capenable & IFCAP_LRO) {
4411                 error = tcp_lro_init(lro);
4412                 if (error) {
4413                         device_printf(dev, "LRO Initialization failed!\n");
4414                         goto fail;
4415                 }
4416                 INIT_DEBUGOUT("RX LRO Initialized\n");
4417                 rxr->lro_enabled = TRUE;
4418                 lro->ifp = adapter->ifp;
4419         }
4420
4421         IGB_RX_UNLOCK(rxr);
4422         return (0);
4423
4424 fail:
4425         igb_free_receive_ring(rxr);
4426         IGB_RX_UNLOCK(rxr);
4427         return (error);
4428 }
4429
4430
4431 /*********************************************************************
4432  *
4433  *  Initialize all receive rings.
4434  *
4435  **********************************************************************/
4436 static int
4437 igb_setup_receive_structures(struct adapter *adapter)
4438 {
4439         struct rx_ring *rxr = adapter->rx_rings;
4440         int i;
4441
4442         for (i = 0; i < adapter->num_queues; i++, rxr++)
4443                 if (igb_setup_receive_ring(rxr))
4444                         goto fail;
4445
4446         return (0);
4447 fail:
4448         /*
4449          * Free RX buffers allocated so far, we will only handle
4450          * the rings that completed, the failing case will have
4451          * cleaned up for itself. 'i' is the endpoint.
4452          */
4453         for (int j = 0; j < i; ++j) {
4454                 rxr = &adapter->rx_rings[j];
4455                 IGB_RX_LOCK(rxr);
4456                 igb_free_receive_ring(rxr);
4457                 IGB_RX_UNLOCK(rxr);
4458         }
4459
4460         return (ENOBUFS);
4461 }
4462
4463 /*********************************************************************
4464  *
4465  *  Enable receive unit.
4466  *
4467  **********************************************************************/
4468 static void
4469 igb_initialize_receive_units(struct adapter *adapter)
4470 {
4471         struct rx_ring  *rxr = adapter->rx_rings;
4472         struct ifnet    *ifp = adapter->ifp;
4473         struct e1000_hw *hw = &adapter->hw;
4474         u32             rctl, rxcsum, psize, srrctl = 0;
4475
4476         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4477
4478         /*
4479          * Make sure receives are disabled while setting
4480          * up the descriptor ring
4481          */
4482         rctl = E1000_READ_REG(hw, E1000_RCTL);
4483         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4484
4485         /*
4486         ** Set up for header split
4487         */
4488         if (igb_header_split) {
4489                 /* Use a standard mbuf for the header */
4490                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4491                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4492         } else
4493                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4494
4495         /*
4496         ** Set up for jumbo frames
4497         */
4498         if (ifp->if_mtu > ETHERMTU) {
4499                 rctl |= E1000_RCTL_LPE;
4500                 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4501                         srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4502                         rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4503                 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4504                         srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4505                         rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4506                 }
4507                 /* Set maximum packet len */
4508                 psize = adapter->max_frame_size;
4509                 /* are we on a vlan? */
4510                 if (adapter->ifp->if_vlantrunk != NULL)
4511                         psize += VLAN_TAG_SIZE;
4512                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4513         } else {
4514                 rctl &= ~E1000_RCTL_LPE;
4515                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4516                 rctl |= E1000_RCTL_SZ_2048;
4517         }
4518
4519         /*
4520          * If TX flow control is disabled and there's >1 queue defined,
4521          * enable DROP.
4522          *
4523          * This drops frames rather than hanging the RX MAC for all queues.
4524          */
4525         if ((adapter->num_queues > 1) &&
4526             (adapter->fc == e1000_fc_none ||
4527              adapter->fc == e1000_fc_rx_pause)) {
4528                 srrctl |= E1000_SRRCTL_DROP_EN;
4529         }
4530
4531         /* Setup the Base and Length of the Rx Descriptor Rings */
4532         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4533                 u64 bus_addr = rxr->rxdma.dma_paddr;
4534                 u32 rxdctl;
4535
4536                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4537                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4538                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4539                     (uint32_t)(bus_addr >> 32));
4540                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4541                     (uint32_t)bus_addr);
4542                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4543                 /* Enable this Queue */
4544                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4545                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4546                 rxdctl &= 0xFFF00000;
4547                 rxdctl |= IGB_RX_PTHRESH;
4548                 rxdctl |= IGB_RX_HTHRESH << 8;
4549                 rxdctl |= IGB_RX_WTHRESH << 16;
4550                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4551         }
4552
4553         /*
4554         ** Setup for RX MultiQueue
4555         */
4556         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4557         if (adapter->num_queues >1) {
4558                 u32 random[10], mrqc, shift = 0;
4559                 union igb_reta {
4560                         u32 dword;
4561                         u8  bytes[4];
4562                 } reta;
4563
4564                 arc4rand(&random, sizeof(random), 0);
4565                 if (adapter->hw.mac.type == e1000_82575)
4566                         shift = 6;
4567                 /* Warning FM follows */
4568                 for (int i = 0; i < 128; i++) {
4569                         reta.bytes[i & 3] =
4570                             (i % adapter->num_queues) << shift;
4571                         if ((i & 3) == 3)
4572                                 E1000_WRITE_REG(hw,
4573                                     E1000_RETA(i >> 2), reta.dword);
4574                 }
4575                 /* Now fill in hash table */
4576                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4577                 for (int i = 0; i < 10; i++)
4578                         E1000_WRITE_REG_ARRAY(hw,
4579                             E1000_RSSRK(0), i, random[i]);
4580
4581                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4582                     E1000_MRQC_RSS_FIELD_IPV4_TCP);
4583                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4584                     E1000_MRQC_RSS_FIELD_IPV6_TCP);
4585                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4586                     E1000_MRQC_RSS_FIELD_IPV6_UDP);
4587                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4588                     E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4589
4590                 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4591
4592                 /*
4593                 ** NOTE: Receive Full-Packet Checksum Offload 
4594                 ** is mutually exclusive with Multiqueue. However
4595                 ** this is not the same as TCP/IP checksums which
4596                 ** still work.
4597                 */
4598                 rxcsum |= E1000_RXCSUM_PCSD;
4599 #if __FreeBSD_version >= 800000
4600                 /* For SCTP Offload */
4601                 if ((hw->mac.type == e1000_82576)
4602                     && (ifp->if_capenable & IFCAP_RXCSUM))
4603                         rxcsum |= E1000_RXCSUM_CRCOFL;
4604 #endif
4605         } else {
4606                 /* Non RSS setup */
4607                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4608                         rxcsum |= E1000_RXCSUM_IPPCSE;
4609 #if __FreeBSD_version >= 800000
4610                         if (adapter->hw.mac.type == e1000_82576)
4611                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4612 #endif
4613                 } else
4614                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4615         }
4616         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4617
4618         /* Setup the Receive Control Register */
4619         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4620         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4621                    E1000_RCTL_RDMTS_HALF |
4622                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4623         /* Strip CRC bytes. */
4624         rctl |= E1000_RCTL_SECRC;
4625         /* Make sure VLAN Filters are off */
4626         rctl &= ~E1000_RCTL_VFE;
4627         /* Don't store bad packets */
4628         rctl &= ~E1000_RCTL_SBP;
4629
4630         /* Enable Receives */
4631         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4632
4633         /*
4634          * Setup the HW Rx Head and Tail Descriptor Pointers
4635          *   - needs to be after enable
4636          */
4637         for (int i = 0; i < adapter->num_queues; i++) {
4638                 rxr = &adapter->rx_rings[i];
4639                 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4640 #ifdef DEV_NETMAP
4641                 /*
4642                  * an init() while a netmap client is active must
4643                  * preserve the rx buffers passed to userspace.
4644                  * In this driver it means we adjust RDT to
4645                  * something different from next_to_refresh
4646                  * (which is not used in netmap mode).
4647                  */
4648                 if (ifp->if_capenable & IFCAP_NETMAP) {
4649                         struct netmap_adapter *na = NA(adapter->ifp);
4650                         struct netmap_kring *kring = &na->rx_rings[i];
4651                         int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4652
4653                         if (t >= adapter->num_rx_desc)
4654                                 t -= adapter->num_rx_desc;
4655                         else if (t < 0)
4656                                 t += adapter->num_rx_desc;
4657                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4658                 } else
4659 #endif /* DEV_NETMAP */
4660                 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4661         }
4662         return;
4663 }
4664
4665 /*********************************************************************
4666  *
4667  *  Free receive rings.
4668  *
4669  **********************************************************************/
4670 static void
4671 igb_free_receive_structures(struct adapter *adapter)
4672 {
4673         struct rx_ring *rxr = adapter->rx_rings;
4674
4675         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4676                 struct lro_ctrl *lro = &rxr->lro;
4677                 igb_free_receive_buffers(rxr);
4678                 tcp_lro_free(lro);
4679                 igb_dma_free(adapter, &rxr->rxdma);
4680         }
4681
4682         free(adapter->rx_rings, M_DEVBUF);
4683 }
4684
4685 /*********************************************************************
4686  *
4687  *  Free receive ring data structures.
4688  *
4689  **********************************************************************/
4690 static void
4691 igb_free_receive_buffers(struct rx_ring *rxr)
4692 {
4693         struct adapter          *adapter = rxr->adapter;
4694         struct igb_rx_buf       *rxbuf;
4695         int i;
4696
4697         INIT_DEBUGOUT("free_receive_structures: begin");
4698
4699         /* Cleanup any existing buffers */
4700         if (rxr->rx_buffers != NULL) {
4701                 for (i = 0; i < adapter->num_rx_desc; i++) {
4702                         rxbuf = &rxr->rx_buffers[i];
4703                         if (rxbuf->m_head != NULL) {
4704                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4705                                     BUS_DMASYNC_POSTREAD);
4706                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4707                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4708                                 m_freem(rxbuf->m_head);
4709                         }
4710                         if (rxbuf->m_pack != NULL) {
4711                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4712                                     BUS_DMASYNC_POSTREAD);
4713                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4714                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4715                                 m_freem(rxbuf->m_pack);
4716                         }
4717                         rxbuf->m_head = NULL;
4718                         rxbuf->m_pack = NULL;
4719                         if (rxbuf->hmap != NULL) {
4720                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4721                                 rxbuf->hmap = NULL;
4722                         }
4723                         if (rxbuf->pmap != NULL) {
4724                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4725                                 rxbuf->pmap = NULL;
4726                         }
4727                 }
4728                 if (rxr->rx_buffers != NULL) {
4729                         free(rxr->rx_buffers, M_DEVBUF);
4730                         rxr->rx_buffers = NULL;
4731                 }
4732         }
4733
4734         if (rxr->htag != NULL) {
4735                 bus_dma_tag_destroy(rxr->htag);
4736                 rxr->htag = NULL;
4737         }
4738         if (rxr->ptag != NULL) {
4739                 bus_dma_tag_destroy(rxr->ptag);
4740                 rxr->ptag = NULL;
4741         }
4742 }
4743
4744 static __inline void
4745 igb_rx_discard(struct rx_ring *rxr, int i)
4746 {
4747         struct igb_rx_buf       *rbuf;
4748
4749         rbuf = &rxr->rx_buffers[i];
4750
4751         /* Partially received? Free the chain */
4752         if (rxr->fmp != NULL) {
4753                 rxr->fmp->m_flags |= M_PKTHDR;
4754                 m_freem(rxr->fmp);
4755                 rxr->fmp = NULL;
4756                 rxr->lmp = NULL;
4757         }
4758
4759         /*
4760         ** With advanced descriptors the writeback
4761         ** clobbers the buffer addrs, so its easier
4762         ** to just free the existing mbufs and take
4763         ** the normal refresh path to get new buffers
4764         ** and mapping.
4765         */
4766         if (rbuf->m_head) {
4767                 m_free(rbuf->m_head);
4768                 rbuf->m_head = NULL;
4769                 bus_dmamap_unload(rxr->htag, rbuf->hmap);
4770         }
4771
4772         if (rbuf->m_pack) {
4773                 m_free(rbuf->m_pack);
4774                 rbuf->m_pack = NULL;
4775                 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4776         }
4777
4778         return;
4779 }
4780
4781 static __inline void
4782 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4783 {
4784
4785         /*
4786          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4787          * should be computed by hardware. Also it should not have VLAN tag in
4788          * ethernet header.
4789          */
4790         if (rxr->lro_enabled &&
4791             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4792             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4793             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4794             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4795             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4796             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4797                 /*
4798                  * Send to the stack if:
4799                  **  - LRO not enabled, or
4800                  **  - no LRO resources, or
4801                  **  - lro enqueue fails
4802                  */
4803                 if (rxr->lro.lro_cnt != 0)
4804                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4805                                 return;
4806         }
4807         IGB_RX_UNLOCK(rxr);
4808         (*ifp->if_input)(ifp, m);
4809         IGB_RX_LOCK(rxr);
4810 }
4811
4812 /*********************************************************************
4813  *
4814  *  This routine executes in interrupt context. It replenishes
4815  *  the mbufs in the descriptor and sends data which has been
4816  *  dma'ed into host memory to upper layer.
4817  *
4818  *  We loop at most count times if count is > 0, or until done if
4819  *  count < 0.
4820  *
4821  *  Return TRUE if more to clean, FALSE otherwise
4822  *********************************************************************/
4823 static bool
4824 igb_rxeof(struct igb_queue *que, int count, int *done)
4825 {
4826         struct adapter          *adapter = que->adapter;
4827         struct rx_ring          *rxr = que->rxr;
4828         struct ifnet            *ifp = adapter->ifp;
4829         struct lro_ctrl         *lro = &rxr->lro;
4830         struct lro_entry        *queued;
4831         int                     i, processed = 0, rxdone = 0;
4832         u32                     ptype, staterr = 0;
4833         union e1000_adv_rx_desc *cur;
4834
4835         IGB_RX_LOCK(rxr);
4836         /* Sync the ring. */
4837         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4838             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4839
4840 #ifdef DEV_NETMAP
4841         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4842                 IGB_RX_UNLOCK(rxr);
4843                 return (FALSE);
4844         }
4845 #endif /* DEV_NETMAP */
4846
4847         /* Main clean loop */
4848         for (i = rxr->next_to_check; count != 0;) {
4849                 struct mbuf             *sendmp, *mh, *mp;
4850                 struct igb_rx_buf       *rxbuf;
4851                 u16                     hlen, plen, hdr, vtag;
4852                 bool                    eop = FALSE;
4853  
4854                 cur = &rxr->rx_base[i];
4855                 staterr = le32toh(cur->wb.upper.status_error);
4856                 if ((staterr & E1000_RXD_STAT_DD) == 0)
4857                         break;
4858                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4859                         break;
4860                 count--;
4861                 sendmp = mh = mp = NULL;
4862                 cur->wb.upper.status_error = 0;
4863                 rxbuf = &rxr->rx_buffers[i];
4864                 plen = le16toh(cur->wb.upper.length);
4865                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4866                 if (((adapter->hw.mac.type == e1000_i350) ||
4867                     (adapter->hw.mac.type == e1000_i354)) &&
4868                     (staterr & E1000_RXDEXT_STATERR_LB))
4869                         vtag = be16toh(cur->wb.upper.vlan);
4870                 else
4871                         vtag = le16toh(cur->wb.upper.vlan);
4872                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4873                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4874
4875                 /*
4876                  * Free the frame (all segments) if we're at EOP and
4877                  * it's an error.
4878                  *
4879                  * The datasheet states that EOP + status is only valid for
4880                  * the final segment in a multi-segment frame.
4881                  */
4882                 if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
4883                         adapter->dropped_pkts++;
4884                         ++rxr->rx_discarded;
4885                         igb_rx_discard(rxr, i);
4886                         goto next_desc;
4887                 }
4888
4889                 /*
4890                 ** The way the hardware is configured to
4891                 ** split, it will ONLY use the header buffer
4892                 ** when header split is enabled, otherwise we
4893                 ** get normal behavior, ie, both header and
4894                 ** payload are DMA'd into the payload buffer.
4895                 **
4896                 ** The fmp test is to catch the case where a
4897                 ** packet spans multiple descriptors, in that
4898                 ** case only the first header is valid.
4899                 */
4900                 if (rxr->hdr_split && rxr->fmp == NULL) {
4901                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4902                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4903                             E1000_RXDADV_HDRBUFLEN_SHIFT;
4904                         if (hlen > IGB_HDR_BUF)
4905                                 hlen = IGB_HDR_BUF;
4906                         mh = rxr->rx_buffers[i].m_head;
4907                         mh->m_len = hlen;
4908                         /* clear buf pointer for refresh */
4909                         rxbuf->m_head = NULL;
4910                         /*
4911                         ** Get the payload length, this
4912                         ** could be zero if its a small
4913                         ** packet.
4914                         */
4915                         if (plen > 0) {
4916                                 mp = rxr->rx_buffers[i].m_pack;
4917                                 mp->m_len = plen;
4918                                 mh->m_next = mp;
4919                                 /* clear buf pointer */
4920                                 rxbuf->m_pack = NULL;
4921                                 rxr->rx_split_packets++;
4922                         }
4923                 } else {
4924                         /*
4925                         ** Either no header split, or a
4926                         ** secondary piece of a fragmented
4927                         ** split packet.
4928                         */
4929                         mh = rxr->rx_buffers[i].m_pack;
4930                         mh->m_len = plen;
4931                         /* clear buf info for refresh */
4932                         rxbuf->m_pack = NULL;
4933                 }
4934                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4935
4936                 ++processed; /* So we know when to refresh */
4937
4938                 /* Initial frame - setup */
4939                 if (rxr->fmp == NULL) {
4940                         mh->m_pkthdr.len = mh->m_len;
4941                         /* Save the head of the chain */
4942                         rxr->fmp = mh;
4943                         rxr->lmp = mh;
4944                         if (mp != NULL) {
4945                                 /* Add payload if split */
4946                                 mh->m_pkthdr.len += mp->m_len;
4947                                 rxr->lmp = mh->m_next;
4948                         }
4949                 } else {
4950                         /* Chain mbuf's together */
4951                         rxr->lmp->m_next = mh;
4952                         rxr->lmp = rxr->lmp->m_next;
4953                         rxr->fmp->m_pkthdr.len += mh->m_len;
4954                 }
4955
4956                 if (eop) {
4957                         rxr->fmp->m_pkthdr.rcvif = ifp;
4958                         ifp->if_ipackets++;
4959                         rxr->rx_packets++;
4960                         /* capture data for AIM */
4961                         rxr->packets++;
4962                         rxr->bytes += rxr->fmp->m_pkthdr.len;
4963                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4964
4965                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4966                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
4967
4968                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4969                             (staterr & E1000_RXD_STAT_VP) != 0) {
4970                                 rxr->fmp->m_pkthdr.ether_vtag = vtag;
4971                                 rxr->fmp->m_flags |= M_VLANTAG;
4972                         }
4973
4974                         /*
4975                          * In case of multiqueue, we have RXCSUM.PCSD bit set
4976                          * and never cleared. This means we have RSS hash
4977                          * available to be used.
4978                          */
4979                         if (adapter->num_queues > 1) {
4980                                 rxr->fmp->m_pkthdr.flowid = 
4981                                     le32toh(cur->wb.lower.hi_dword.rss);
4982                                 /*
4983                                  * Full RSS support is not avilable in
4984                                  * FreeBSD 10 so setting the hash type to
4985                                  * OPAQUE.
4986                                  */
4987                                 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
4988                         } else {
4989 #ifndef IGB_LEGACY_TX
4990                                 rxr->fmp->m_pkthdr.flowid = que->msix;
4991                                 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
4992 #endif
4993                         }
4994                         sendmp = rxr->fmp;
4995                         /* Make sure to set M_PKTHDR. */
4996                         sendmp->m_flags |= M_PKTHDR;
4997                         rxr->fmp = NULL;
4998                         rxr->lmp = NULL;
4999                 }
5000
5001 next_desc:
5002                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
5003                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
5004
5005                 /* Advance our pointers to the next descriptor. */
5006                 if (++i == adapter->num_rx_desc)
5007                         i = 0;
5008                 /*
5009                 ** Send to the stack or LRO
5010                 */
5011                 if (sendmp != NULL) {
5012                         rxr->next_to_check = i;
5013                         igb_rx_input(rxr, ifp, sendmp, ptype);
5014                         i = rxr->next_to_check;
5015                         rxdone++;
5016                 }
5017
5018                 /* Every 8 descriptors we go to refresh mbufs */
5019                 if (processed == 8) {
5020                         igb_refresh_mbufs(rxr, i);
5021                         processed = 0;
5022                 }
5023         }
5024
5025         /* Catch any remainders */
5026         if (igb_rx_unrefreshed(rxr))
5027                 igb_refresh_mbufs(rxr, i);
5028
5029         rxr->next_to_check = i;
5030
5031         /*
5032          * Flush any outstanding LRO work
5033          */
5034         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
5035                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
5036                 tcp_lro_flush(lro, queued);
5037         }
5038
5039         if (done != NULL)
5040                 *done += rxdone;
5041
5042         IGB_RX_UNLOCK(rxr);
5043         return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5044 }
5045
5046 /*********************************************************************
5047  *
5048  *  Verify that the hardware indicated that the checksum is valid.
5049  *  Inform the stack about the status of checksum so that stack
5050  *  doesn't spend time verifying the checksum.
5051  *
5052  *********************************************************************/
5053 static void
5054 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5055 {
5056         u16 status = (u16)staterr;
5057         u8  errors = (u8) (staterr >> 24);
5058         int sctp;
5059
5060         /* Ignore Checksum bit is set */
5061         if (status & E1000_RXD_STAT_IXSM) {
5062                 mp->m_pkthdr.csum_flags = 0;
5063                 return;
5064         }
5065
5066         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5067             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5068                 sctp = 1;
5069         else
5070                 sctp = 0;
5071         if (status & E1000_RXD_STAT_IPCS) {
5072                 /* Did it pass? */
5073                 if (!(errors & E1000_RXD_ERR_IPE)) {
5074                         /* IP Checksum Good */
5075                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5076                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5077                 } else
5078                         mp->m_pkthdr.csum_flags = 0;
5079         }
5080
5081         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5082                 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5083 #if __FreeBSD_version >= 800000
5084                 if (sctp) /* reassign */
5085                         type = CSUM_SCTP_VALID;
5086 #endif
5087                 /* Did it pass? */
5088                 if (!(errors & E1000_RXD_ERR_TCPE)) {
5089                         mp->m_pkthdr.csum_flags |= type;
5090                         if (sctp == 0)
5091                                 mp->m_pkthdr.csum_data = htons(0xffff);
5092                 }
5093         }
5094         return;
5095 }
5096
5097 /*
5098  * This routine is run via an vlan
5099  * config EVENT
5100  */
5101 static void
5102 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5103 {
5104         struct adapter  *adapter = ifp->if_softc;
5105         u32             index, bit;
5106
5107         if (ifp->if_softc !=  arg)   /* Not our event */
5108                 return;
5109
5110         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5111                 return;
5112
5113         IGB_CORE_LOCK(adapter);
5114         index = (vtag >> 5) & 0x7F;
5115         bit = vtag & 0x1F;
5116         adapter->shadow_vfta[index] |= (1 << bit);
5117         ++adapter->num_vlans;
5118         /* Change hw filter setting */
5119         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5120                 igb_setup_vlan_hw_support(adapter);
5121         IGB_CORE_UNLOCK(adapter);
5122 }
5123
5124 /*
5125  * This routine is run via an vlan
5126  * unconfig EVENT
5127  */
5128 static void
5129 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5130 {
5131         struct adapter  *adapter = ifp->if_softc;
5132         u32             index, bit;
5133
5134         if (ifp->if_softc !=  arg)
5135                 return;
5136
5137         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5138                 return;
5139
5140         IGB_CORE_LOCK(adapter);
5141         index = (vtag >> 5) & 0x7F;
5142         bit = vtag & 0x1F;
5143         adapter->shadow_vfta[index] &= ~(1 << bit);
5144         --adapter->num_vlans;
5145         /* Change hw filter setting */
5146         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5147                 igb_setup_vlan_hw_support(adapter);
5148         IGB_CORE_UNLOCK(adapter);
5149 }
5150
5151 static void
5152 igb_setup_vlan_hw_support(struct adapter *adapter)
5153 {
5154         struct e1000_hw *hw = &adapter->hw;
5155         struct ifnet    *ifp = adapter->ifp;
5156         u32             reg;
5157
5158         if (adapter->vf_ifp) {
5159                 e1000_rlpml_set_vf(hw,
5160                     adapter->max_frame_size + VLAN_TAG_SIZE);
5161                 return;
5162         }
5163
5164         reg = E1000_READ_REG(hw, E1000_CTRL);
5165         reg |= E1000_CTRL_VME;
5166         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5167
5168         /* Enable the Filter Table */
5169         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5170                 reg = E1000_READ_REG(hw, E1000_RCTL);
5171                 reg &= ~E1000_RCTL_CFIEN;
5172                 reg |= E1000_RCTL_VFE;
5173                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5174         }
5175
5176         /* Update the frame size */
5177         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5178             adapter->max_frame_size + VLAN_TAG_SIZE);
5179
5180         /* Don't bother with table if no vlans */
5181         if ((adapter->num_vlans == 0) ||
5182             ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5183                 return;
5184         /*
5185         ** A soft reset zero's out the VFTA, so
5186         ** we need to repopulate it now.
5187         */
5188         for (int i = 0; i < IGB_VFTA_SIZE; i++)
5189                 if (adapter->shadow_vfta[i] != 0) {
5190                         if (adapter->vf_ifp)
5191                                 e1000_vfta_set_vf(hw,
5192                                     adapter->shadow_vfta[i], TRUE);
5193                         else
5194                                 e1000_write_vfta(hw,
5195                                     i, adapter->shadow_vfta[i]);
5196                 }
5197 }
5198
5199 static void
5200 igb_enable_intr(struct adapter *adapter)
5201 {
5202         /* With RSS set up what to auto clear */
5203         if (adapter->msix_mem) {
5204                 u32 mask = (adapter->que_mask | adapter->link_mask);
5205                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5206                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5207                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5208                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5209                     E1000_IMS_LSC);
5210         } else {
5211                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5212                     IMS_ENABLE_MASK);
5213         }
5214         E1000_WRITE_FLUSH(&adapter->hw);
5215
5216         return;
5217 }
5218
5219 static void
5220 igb_disable_intr(struct adapter *adapter)
5221 {
5222         if (adapter->msix_mem) {
5223                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5224                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5225         } 
5226         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5227         E1000_WRITE_FLUSH(&adapter->hw);
5228         return;
5229 }
5230
5231 /*
5232  * Bit of a misnomer, what this really means is
5233  * to enable OS management of the system... aka
5234  * to disable special hardware management features 
5235  */
5236 static void
5237 igb_init_manageability(struct adapter *adapter)
5238 {
5239         if (adapter->has_manage) {
5240                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5241                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5242
5243                 /* disable hardware interception of ARP */
5244                 manc &= ~(E1000_MANC_ARP_EN);
5245
5246                 /* enable receiving management packets to the host */
5247                 manc |= E1000_MANC_EN_MNG2HOST;
5248                 manc2h |= 1 << 5;  /* Mng Port 623 */
5249                 manc2h |= 1 << 6;  /* Mng Port 664 */
5250                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5251                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5252         }
5253 }
5254
5255 /*
5256  * Give control back to hardware management
5257  * controller if there is one.
5258  */
5259 static void
5260 igb_release_manageability(struct adapter *adapter)
5261 {
5262         if (adapter->has_manage) {
5263                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5264
5265                 /* re-enable hardware interception of ARP */
5266                 manc |= E1000_MANC_ARP_EN;
5267                 manc &= ~E1000_MANC_EN_MNG2HOST;
5268
5269                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5270         }
5271 }
5272
5273 /*
5274  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5275  * For ASF and Pass Through versions of f/w this means that
5276  * the driver is loaded. 
5277  *
5278  */
5279 static void
5280 igb_get_hw_control(struct adapter *adapter)
5281 {
5282         u32 ctrl_ext;
5283
5284         if (adapter->vf_ifp)
5285                 return;
5286
5287         /* Let firmware know the driver has taken over */
5288         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5289         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5290             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5291 }
5292
5293 /*
5294  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5295  * For ASF and Pass Through versions of f/w this means that the
5296  * driver is no longer loaded.
5297  *
5298  */
5299 static void
5300 igb_release_hw_control(struct adapter *adapter)
5301 {
5302         u32 ctrl_ext;
5303
5304         if (adapter->vf_ifp)
5305                 return;
5306
5307         /* Let firmware taken over control of h/w */
5308         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5309         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5310             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5311 }
5312
5313 static int
5314 igb_is_valid_ether_addr(uint8_t *addr)
5315 {
5316         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5317
5318         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5319                 return (FALSE);
5320         }
5321
5322         return (TRUE);
5323 }
5324
5325
5326 /*
5327  * Enable PCI Wake On Lan capability
5328  */
5329 static void
5330 igb_enable_wakeup(device_t dev)
5331 {
5332         u16     cap, status;
5333         u8      id;
5334
5335         /* First find the capabilities pointer*/
5336         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5337         /* Read the PM Capabilities */
5338         id = pci_read_config(dev, cap, 1);
5339         if (id != PCIY_PMG)     /* Something wrong */
5340                 return;
5341         /* OK, we have the power capabilities, so
5342            now get the status register */
5343         cap += PCIR_POWER_STATUS;
5344         status = pci_read_config(dev, cap, 2);
5345         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5346         pci_write_config(dev, cap, status, 2);
5347         return;
5348 }
5349
5350 static void
5351 igb_led_func(void *arg, int onoff)
5352 {
5353         struct adapter  *adapter = arg;
5354
5355         IGB_CORE_LOCK(adapter);
5356         if (onoff) {
5357                 e1000_setup_led(&adapter->hw);
5358                 e1000_led_on(&adapter->hw);
5359         } else {
5360                 e1000_led_off(&adapter->hw);
5361                 e1000_cleanup_led(&adapter->hw);
5362         }
5363         IGB_CORE_UNLOCK(adapter);
5364 }
5365
5366 /**********************************************************************
5367  *
5368  *  Update the board statistics counters.
5369  *
5370  **********************************************************************/
5371 static void
5372 igb_update_stats_counters(struct adapter *adapter)
5373 {
5374         struct ifnet            *ifp;
5375         struct e1000_hw         *hw = &adapter->hw;
5376         struct e1000_hw_stats   *stats;
5377
5378         /* 
5379         ** The virtual function adapter has only a
5380         ** small controlled set of stats, do only 
5381         ** those and return.
5382         */
5383         if (adapter->vf_ifp) {
5384                 igb_update_vf_stats_counters(adapter);
5385                 return;
5386         }
5387
5388         stats = (struct e1000_hw_stats  *)adapter->stats;
5389
5390         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5391            (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5392                 stats->symerrs +=
5393                     E1000_READ_REG(hw,E1000_SYMERRS);
5394                 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5395         }
5396
5397         stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5398         stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5399         stats->scc += E1000_READ_REG(hw, E1000_SCC);
5400         stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5401
5402         stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5403         stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5404         stats->colc += E1000_READ_REG(hw, E1000_COLC);
5405         stats->dc += E1000_READ_REG(hw, E1000_DC);
5406         stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5407         stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5408         stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5409         /*
5410         ** For watchdog management we need to know if we have been
5411         ** paused during the last interval, so capture that here.
5412         */ 
5413         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5414         stats->xoffrxc += adapter->pause_frames;
5415         stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5416         stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5417         stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5418         stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5419         stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5420         stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5421         stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5422         stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5423         stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5424         stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5425         stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5426         stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5427
5428         /* For the 64-bit byte counters the low dword must be read first. */
5429         /* Both registers clear on the read of the high dword */
5430
5431         stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5432             ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5433         stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5434             ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5435
5436         stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5437         stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5438         stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5439         stats->roc += E1000_READ_REG(hw, E1000_ROC);
5440         stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5441
5442         stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
5443         stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC);
5444         stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
5445
5446         stats->tor += E1000_READ_REG(hw, E1000_TORL) +
5447             ((u64)E1000_READ_REG(hw, E1000_TORH) << 32);
5448         stats->tot += E1000_READ_REG(hw, E1000_TOTL) +
5449             ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32);
5450
5451         stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5452         stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5453         stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5454         stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5455         stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5456         stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5457         stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5458         stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5459         stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5460         stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5461
5462         /* Interrupt Counts */
5463
5464         stats->iac += E1000_READ_REG(hw, E1000_IAC);
5465         stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5466         stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5467         stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5468         stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5469         stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5470         stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5471         stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5472         stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5473
5474         /* Host to Card Statistics */
5475
5476         stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5477         stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5478         stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5479         stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5480         stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5481         stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5482         stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5483         stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5484             ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5485         stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5486             ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5487         stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5488         stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5489         stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5490
5491         stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5492         stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5493         stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5494         stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5495         stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5496         stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5497
5498         ifp = adapter->ifp;
5499         ifp->if_collisions = stats->colc;
5500
5501         /* Rx Errors */
5502         ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5503             stats->crcerrs + stats->algnerrc +
5504             stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5505
5506         /* Tx Errors */
5507         ifp->if_oerrors = stats->ecol +
5508             stats->latecol + adapter->watchdog_events;
5509
5510         /* Driver specific counters */
5511         adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5512         adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5513         adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5514         adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5515         adapter->packet_buf_alloc_tx =
5516             ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5517         adapter->packet_buf_alloc_rx =
5518             (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5519 }
5520
5521
5522 /**********************************************************************
5523  *
5524  *  Initialize the VF board statistics counters.
5525  *
5526  **********************************************************************/
5527 static void
5528 igb_vf_init_stats(struct adapter *adapter)
5529 {
5530         struct e1000_hw *hw = &adapter->hw;
5531         struct e1000_vf_stats   *stats;
5532
5533         stats = (struct e1000_vf_stats  *)adapter->stats;
5534         if (stats == NULL)
5535                 return;
5536         stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5537         stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5538         stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5539         stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5540         stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5541 }
5542  
5543 /**********************************************************************
5544  *
5545  *  Update the VF board statistics counters.
5546  *
5547  **********************************************************************/
5548 static void
5549 igb_update_vf_stats_counters(struct adapter *adapter)
5550 {
5551         struct e1000_hw *hw = &adapter->hw;
5552         struct e1000_vf_stats   *stats;
5553
5554         if (adapter->link_speed == 0)
5555                 return;
5556
5557         stats = (struct e1000_vf_stats  *)adapter->stats;
5558
5559         UPDATE_VF_REG(E1000_VFGPRC,
5560             stats->last_gprc, stats->gprc);
5561         UPDATE_VF_REG(E1000_VFGORC,
5562             stats->last_gorc, stats->gorc);
5563         UPDATE_VF_REG(E1000_VFGPTC,
5564             stats->last_gptc, stats->gptc);
5565         UPDATE_VF_REG(E1000_VFGOTC,
5566             stats->last_gotc, stats->gotc);
5567         UPDATE_VF_REG(E1000_VFMPRC,
5568             stats->last_mprc, stats->mprc);
5569 }
5570
5571 /* Export a single 32-bit register via a read-only sysctl. */
5572 static int
5573 igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5574 {
5575         struct adapter *adapter;
5576         u_int val;
5577
5578         adapter = oidp->oid_arg1;
5579         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5580         return (sysctl_handle_int(oidp, &val, 0, req));
5581 }
5582
5583 /*
5584 **  Tuneable interrupt rate handler
5585 */
5586 static int
5587 igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5588 {
5589         struct igb_queue        *que = ((struct igb_queue *)oidp->oid_arg1);
5590         int                     error;
5591         u32                     reg, usec, rate;
5592                         
5593         reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5594         usec = ((reg & 0x7FFC) >> 2);
5595         if (usec > 0)
5596                 rate = 1000000 / usec;
5597         else
5598                 rate = 0;
5599         error = sysctl_handle_int(oidp, &rate, 0, req);
5600         if (error || !req->newptr)
5601                 return error;
5602         return 0;
5603 }
5604
5605 /*
5606  * Add sysctl variables, one per statistic, to the system.
5607  */
5608 static void
5609 igb_add_hw_stats(struct adapter *adapter)
5610 {
5611         device_t dev = adapter->dev;
5612
5613         struct tx_ring *txr = adapter->tx_rings;
5614         struct rx_ring *rxr = adapter->rx_rings;
5615
5616         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5617         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5618         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5619         struct e1000_hw_stats *stats = adapter->stats;
5620
5621         struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5622         struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5623
5624 #define QUEUE_NAME_LEN 32
5625         char namebuf[QUEUE_NAME_LEN];
5626
5627         /* Driver Statistics */
5628         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", 
5629                         CTLFLAG_RD, &adapter->link_irq,
5630                         "Link MSIX IRQ Handled");
5631         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5632                         CTLFLAG_RD, &adapter->dropped_pkts,
5633                         "Driver dropped packets");
5634         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5635                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5636                         "Driver tx dma failure in xmit");
5637         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5638                         CTLFLAG_RD, &adapter->rx_overruns,
5639                         "RX overruns");
5640         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5641                         CTLFLAG_RD, &adapter->watchdog_events,
5642                         "Watchdog timeouts");
5643
5644         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", 
5645                         CTLFLAG_RD, &adapter->device_control,
5646                         "Device Control Register");
5647         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", 
5648                         CTLFLAG_RD, &adapter->rx_control,
5649                         "Receiver Control Register");
5650         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", 
5651                         CTLFLAG_RD, &adapter->int_mask,
5652                         "Interrupt Mask");
5653         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", 
5654                         CTLFLAG_RD, &adapter->eint_mask,
5655                         "Extended Interrupt Mask");
5656         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", 
5657                         CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5658                         "Transmit Buffer Packet Allocation");
5659         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", 
5660                         CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5661                         "Receive Buffer Packet Allocation");
5662         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5663                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5664                         "Flow Control High Watermark");
5665         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5666                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5667                         "Flow Control Low Watermark");
5668
5669         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5670                 struct lro_ctrl *lro = &rxr->lro;
5671
5672                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5673                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5674                                             CTLFLAG_RD, NULL, "Queue Name");
5675                 queue_list = SYSCTL_CHILDREN(queue_node);
5676
5677                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", 
5678                                 CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
5679                                 sizeof(&adapter->queues[i]),
5680                                 igb_sysctl_interrupt_rate_handler,
5681                                 "IU", "Interrupt Rate");
5682
5683                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5684                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5685                                 igb_sysctl_reg_handler, "IU",
5686                                 "Transmit Descriptor Head");
5687                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5688                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5689                                 igb_sysctl_reg_handler, "IU",
5690                                 "Transmit Descriptor Tail");
5691                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5692                                 CTLFLAG_RD, &txr->no_desc_avail,
5693                                 "Queue No Descriptor Available");
5694                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5695                                 CTLFLAG_RD, &txr->total_packets,
5696                                 "Queue Packets Transmitted");
5697
5698                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5699                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5700                                 igb_sysctl_reg_handler, "IU",
5701                                 "Receive Descriptor Head");
5702                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5703                                 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5704                                 igb_sysctl_reg_handler, "IU",
5705                                 "Receive Descriptor Tail");
5706                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5707                                 CTLFLAG_RD, &rxr->rx_packets,
5708                                 "Queue Packets Received");
5709                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5710                                 CTLFLAG_RD, &rxr->rx_bytes,
5711                                 "Queue Bytes Received");
5712                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5713                                 CTLFLAG_RD, &lro->lro_queued, 0,
5714                                 "LRO Queued");
5715                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5716                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5717                                 "LRO Flushed");
5718         }
5719
5720         /* MAC stats get their own sub node */
5721
5722         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5723                                     CTLFLAG_RD, NULL, "MAC Statistics");
5724         stat_list = SYSCTL_CHILDREN(stat_node);
5725
5726         /*
5727         ** VF adapter has a very limited set of stats
5728         ** since its not managing the metal, so to speak.
5729         */
5730         if (adapter->vf_ifp) {
5731         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5732                         CTLFLAG_RD, &stats->gprc,
5733                         "Good Packets Received");
5734         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5735                         CTLFLAG_RD, &stats->gptc,
5736                         "Good Packets Transmitted");
5737         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5738                         CTLFLAG_RD, &stats->gorc, 
5739                         "Good Octets Received"); 
5740         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5741                         CTLFLAG_RD, &stats->gotc, 
5742                         "Good Octets Transmitted"); 
5743         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5744                         CTLFLAG_RD, &stats->mprc,
5745                         "Multicast Packets Received");
5746                 return;
5747         }
5748
5749         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
5750                         CTLFLAG_RD, &stats->ecol,
5751                         "Excessive collisions");
5752         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
5753                         CTLFLAG_RD, &stats->scc,
5754                         "Single collisions");
5755         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
5756                         CTLFLAG_RD, &stats->mcc,
5757                         "Multiple collisions");
5758         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
5759                         CTLFLAG_RD, &stats->latecol,
5760                         "Late collisions");
5761         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
5762                         CTLFLAG_RD, &stats->colc,
5763                         "Collision Count");
5764         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5765                         CTLFLAG_RD, &stats->symerrs,
5766                         "Symbol Errors");
5767         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5768                         CTLFLAG_RD, &stats->sec,
5769                         "Sequence Errors");
5770         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5771                         CTLFLAG_RD, &stats->dc,
5772                         "Defer Count");
5773         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5774                         CTLFLAG_RD, &stats->mpc,
5775                         "Missed Packets");
5776         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors",
5777                         CTLFLAG_RD, &stats->rlec,
5778                         "Receive Length Errors");
5779         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5780                         CTLFLAG_RD, &stats->rnbc,
5781                         "Receive No Buffers");
5782         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5783                         CTLFLAG_RD, &stats->ruc,
5784                         "Receive Undersize");
5785         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5786                         CTLFLAG_RD, &stats->rfc,
5787                         "Fragmented Packets Received");
5788         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5789                         CTLFLAG_RD, &stats->roc,
5790                         "Oversized Packets Received");
5791         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5792                         CTLFLAG_RD, &stats->rjc,
5793                         "Recevied Jabber");
5794         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5795                         CTLFLAG_RD, &stats->rxerrc,
5796                         "Receive Errors");
5797         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5798                         CTLFLAG_RD, &stats->crcerrs,
5799                         "CRC errors");
5800         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5801                         CTLFLAG_RD, &stats->algnerrc,
5802                         "Alignment Errors");
5803         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs",
5804                         CTLFLAG_RD, &stats->tncrs,
5805                         "Transmit with No CRS");
5806         /* On 82575 these are collision counts */
5807         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5808                         CTLFLAG_RD, &stats->cexterr,
5809                         "Collision/Carrier extension errors");
5810         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5811                         CTLFLAG_RD, &stats->xonrxc,
5812                         "XON Received");
5813         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5814                         CTLFLAG_RD, &stats->xontxc,
5815                         "XON Transmitted");
5816         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5817                         CTLFLAG_RD, &stats->xoffrxc,
5818                         "XOFF Received");
5819         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5820                         CTLFLAG_RD, &stats->xofftxc,
5821                         "XOFF Transmitted");
5822         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd",
5823                         CTLFLAG_RD, &stats->fcruc,
5824                         "Unsupported Flow Control Received");
5825         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd",
5826                         CTLFLAG_RD, &stats->mgprc,
5827                         "Management Packets Received");
5828         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop",
5829                         CTLFLAG_RD, &stats->mgpdc,
5830                         "Management Packets Dropped");
5831         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd",
5832                         CTLFLAG_RD, &stats->mgptc,
5833                         "Management Packets Transmitted");
5834         /* Packet Reception Stats */
5835         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5836                         CTLFLAG_RD, &stats->tpr,
5837                         "Total Packets Received");
5838         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5839                         CTLFLAG_RD, &stats->gprc,
5840                         "Good Packets Received");
5841         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5842                         CTLFLAG_RD, &stats->bprc,
5843                         "Broadcast Packets Received");
5844         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5845                         CTLFLAG_RD, &stats->mprc,
5846                         "Multicast Packets Received");
5847         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5848                         CTLFLAG_RD, &stats->prc64,
5849                         "64 byte frames received");
5850         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5851                         CTLFLAG_RD, &stats->prc127,
5852                         "65-127 byte frames received");
5853         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5854                         CTLFLAG_RD, &stats->prc255,
5855                         "128-255 byte frames received");
5856         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5857                         CTLFLAG_RD, &stats->prc511,
5858                         "256-511 byte frames received");
5859         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5860                         CTLFLAG_RD, &stats->prc1023,
5861                         "512-1023 byte frames received");
5862         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5863                         CTLFLAG_RD, &stats->prc1522,
5864                         "1023-1522 byte frames received");
5865         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5866                         CTLFLAG_RD, &stats->gorc, 
5867                         "Good Octets Received");
5868         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd", 
5869                         CTLFLAG_RD, &stats->tor, 
5870                         "Total Octets Received");
5871
5872         /* Packet Transmission Stats */
5873         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5874                         CTLFLAG_RD, &stats->gotc, 
5875                         "Good Octets Transmitted"); 
5876         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd", 
5877                         CTLFLAG_RD, &stats->tot, 
5878                         "Total Octets Transmitted");
5879         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5880                         CTLFLAG_RD, &stats->tpt,
5881                         "Total Packets Transmitted");
5882         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5883                         CTLFLAG_RD, &stats->gptc,
5884                         "Good Packets Transmitted");
5885         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5886                         CTLFLAG_RD, &stats->bptc,
5887                         "Broadcast Packets Transmitted");
5888         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5889                         CTLFLAG_RD, &stats->mptc,
5890                         "Multicast Packets Transmitted");
5891         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5892                         CTLFLAG_RD, &stats->ptc64,
5893                         "64 byte frames transmitted");
5894         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5895                         CTLFLAG_RD, &stats->ptc127,
5896                         "65-127 byte frames transmitted");
5897         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5898                         CTLFLAG_RD, &stats->ptc255,
5899                         "128-255 byte frames transmitted");
5900         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5901                         CTLFLAG_RD, &stats->ptc511,
5902                         "256-511 byte frames transmitted");
5903         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5904                         CTLFLAG_RD, &stats->ptc1023,
5905                         "512-1023 byte frames transmitted");
5906         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5907                         CTLFLAG_RD, &stats->ptc1522,
5908                         "1024-1522 byte frames transmitted");
5909         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5910                         CTLFLAG_RD, &stats->tsctc,
5911                         "TSO Contexts Transmitted");
5912         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5913                         CTLFLAG_RD, &stats->tsctfc,
5914                         "TSO Contexts Failed");
5915
5916
5917         /* Interrupt Stats */
5918
5919         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5920                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5921         int_list = SYSCTL_CHILDREN(int_node);
5922
5923         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5924                         CTLFLAG_RD, &stats->iac,
5925                         "Interrupt Assertion Count");
5926
5927         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5928                         CTLFLAG_RD, &stats->icrxptc,
5929                         "Interrupt Cause Rx Pkt Timer Expire Count");
5930
5931         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5932                         CTLFLAG_RD, &stats->icrxatc,
5933                         "Interrupt Cause Rx Abs Timer Expire Count");
5934
5935         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5936                         CTLFLAG_RD, &stats->ictxptc,
5937                         "Interrupt Cause Tx Pkt Timer Expire Count");
5938
5939         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5940                         CTLFLAG_RD, &stats->ictxatc,
5941                         "Interrupt Cause Tx Abs Timer Expire Count");
5942
5943         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5944                         CTLFLAG_RD, &stats->ictxqec,
5945                         "Interrupt Cause Tx Queue Empty Count");
5946
5947         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5948                         CTLFLAG_RD, &stats->ictxqmtc,
5949                         "Interrupt Cause Tx Queue Min Thresh Count");
5950
5951         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5952                         CTLFLAG_RD, &stats->icrxdmtc,
5953                         "Interrupt Cause Rx Desc Min Thresh Count");
5954
5955         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5956                         CTLFLAG_RD, &stats->icrxoc,
5957                         "Interrupt Cause Receiver Overrun Count");
5958
5959         /* Host to Card Stats */
5960
5961         host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", 
5962                                     CTLFLAG_RD, NULL, 
5963                                     "Host to Card Statistics");
5964
5965         host_list = SYSCTL_CHILDREN(host_node);
5966
5967         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5968                         CTLFLAG_RD, &stats->cbtmpc,
5969                         "Circuit Breaker Tx Packet Count");
5970
5971         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5972                         CTLFLAG_RD, &stats->htdpmc,
5973                         "Host Transmit Discarded Packets");
5974
5975         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5976                         CTLFLAG_RD, &stats->rpthc,
5977                         "Rx Packets To Host");
5978
5979         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5980                         CTLFLAG_RD, &stats->cbrmpc,
5981                         "Circuit Breaker Rx Packet Count");
5982
5983         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5984                         CTLFLAG_RD, &stats->cbrdpc,
5985                         "Circuit Breaker Rx Dropped Count");
5986
5987         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5988                         CTLFLAG_RD, &stats->hgptc,
5989                         "Host Good Packets Tx Count");
5990
5991         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5992                         CTLFLAG_RD, &stats->htcbdpc,
5993                         "Host Tx Circuit Breaker Dropped Count");
5994
5995         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5996                         CTLFLAG_RD, &stats->hgorc,
5997                         "Host Good Octets Received Count");
5998
5999         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
6000                         CTLFLAG_RD, &stats->hgotc,
6001                         "Host Good Octets Transmit Count");
6002
6003         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
6004                         CTLFLAG_RD, &stats->lenerrs,
6005                         "Length Errors");
6006
6007         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
6008                         CTLFLAG_RD, &stats->scvpc,
6009                         "SerDes/SGMII Code Violation Pkt Count");
6010
6011         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
6012                         CTLFLAG_RD, &stats->hrmpc,
6013                         "Header Redirection Missed Packet Count");
6014 }
6015
6016
6017 /**********************************************************************
6018  *
6019  *  This routine provides a way to dump out the adapter eeprom,
6020  *  often a useful debug/service tool. This only dumps the first
6021  *  32 words, stuff that matters is in that extent.
6022  *
6023  **********************************************************************/
6024 static int
6025 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6026 {
6027         struct adapter *adapter;
6028         int error;
6029         int result;
6030
6031         result = -1;
6032         error = sysctl_handle_int(oidp, &result, 0, req);
6033
6034         if (error || !req->newptr)
6035                 return (error);
6036
6037         /*
6038          * This value will cause a hex dump of the
6039          * first 32 16-bit words of the EEPROM to
6040          * the screen.
6041          */
6042         if (result == 1) {
6043                 adapter = (struct adapter *)arg1;
6044                 igb_print_nvm_info(adapter);
6045         }
6046
6047         return (error);
6048 }
6049
6050 static void
6051 igb_print_nvm_info(struct adapter *adapter)
6052 {
6053         u16     eeprom_data;
6054         int     i, j, row = 0;
6055
6056         /* Its a bit crude, but it gets the job done */
6057         printf("\nInterface EEPROM Dump:\n");
6058         printf("Offset\n0x0000  ");
6059         for (i = 0, j = 0; i < 32; i++, j++) {
6060                 if (j == 8) { /* Make the offset block */
6061                         j = 0; ++row;
6062                         printf("\n0x00%x0  ",row);
6063                 }
6064                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6065                 printf("%04x ", eeprom_data);
6066         }
6067         printf("\n");
6068 }
6069
6070 static void
6071 igb_set_sysctl_value(struct adapter *adapter, const char *name,
6072         const char *description, int *limit, int value)
6073 {
6074         *limit = value;
6075         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6076             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6077             OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6078 }
6079
6080 /*
6081 ** Set flow control using sysctl:
6082 ** Flow control values:
6083 **      0 - off
6084 **      1 - rx pause
6085 **      2 - tx pause
6086 **      3 - full
6087 */
6088 static int
6089 igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6090 {
6091         int             error;
6092         static int      input = 3; /* default is full */
6093         struct adapter  *adapter = (struct adapter *) arg1;
6094
6095         error = sysctl_handle_int(oidp, &input, 0, req);
6096
6097         if ((error) || (req->newptr == NULL))
6098                 return (error);
6099
6100         switch (input) {
6101                 case e1000_fc_rx_pause:
6102                 case e1000_fc_tx_pause:
6103                 case e1000_fc_full:
6104                 case e1000_fc_none:
6105                         adapter->hw.fc.requested_mode = input;
6106                         adapter->fc = input;
6107                         break;
6108                 default:
6109                         /* Do nothing */
6110                         return (error);
6111         }
6112
6113         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6114         e1000_force_mac_fc(&adapter->hw);
6115         /* XXX TODO: update DROP_EN on each RX queue if appropriate */
6116         return (error);
6117 }
6118
6119 /*
6120 ** Manage DMA Coalesce:
6121 ** Control values:
6122 **      0/1 - off/on
6123 **      Legal timer values are:
6124 **      250,500,1000-10000 in thousands
6125 */
6126 static int
6127 igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6128 {
6129         struct adapter *adapter = (struct adapter *) arg1;
6130         int             error;
6131
6132         error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6133
6134         if ((error) || (req->newptr == NULL))
6135                 return (error);
6136
6137         switch (adapter->dmac) {
6138                 case 0:
6139                         /*Disabling */
6140                         break;
6141                 case 1: /* Just enable and use default */
6142                         adapter->dmac = 1000;
6143                         break;
6144                 case 250:
6145                 case 500:
6146                 case 1000:
6147                 case 2000:
6148                 case 3000:
6149                 case 4000:
6150                 case 5000:
6151                 case 6000:
6152                 case 7000:
6153                 case 8000:
6154                 case 9000:
6155                 case 10000:
6156                         /* Legal values - allow */
6157                         break;
6158                 default:
6159                         /* Do nothing, illegal value */
6160                         adapter->dmac = 0;
6161                         return (EINVAL);
6162         }
6163         /* Reinit the interface */
6164         igb_init(adapter);
6165         return (error);
6166 }
6167
6168 /*
6169 ** Manage Energy Efficient Ethernet:
6170 ** Control values:
6171 **     0/1 - enabled/disabled
6172 */
6173 static int
6174 igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6175 {
6176         struct adapter  *adapter = (struct adapter *) arg1;
6177         int             error, value;
6178
6179         value = adapter->hw.dev_spec._82575.eee_disable;
6180         error = sysctl_handle_int(oidp, &value, 0, req);
6181         if (error || req->newptr == NULL)
6182                 return (error);
6183         IGB_CORE_LOCK(adapter);
6184         adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6185         igb_init_locked(adapter);
6186         IGB_CORE_UNLOCK(adapter);
6187         return (0);
6188 }