]> CyberLeo.Net >> Repos - FreeBSD/stable/9.git/blob - sys/dev/e1000/if_igb.c
MFC: sync the version of netmap with the one in HEAD, including device
[FreeBSD/stable/9.git] / sys / dev / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2013, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #ifdef HAVE_KERNEL_OPTION_HEADERS
37 #include "opt_device_polling.h"
38 #include "opt_inet.h"
39 #include "opt_inet6.h"
40 #include "opt_altq.h"
41 #endif
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #if __FreeBSD_version >= 800000
46 #include <sys/buf_ring.h>
47 #endif
48 #include <sys/bus.h>
49 #include <sys/endian.h>
50 #include <sys/kernel.h>
51 #include <sys/kthread.h>
52 #include <sys/malloc.h>
53 #include <sys/mbuf.h>
54 #include <sys/module.h>
55 #include <sys/rman.h>
56 #include <sys/socket.h>
57 #include <sys/sockio.h>
58 #include <sys/sysctl.h>
59 #include <sys/taskqueue.h>
60 #include <sys/eventhandler.h>
61 #include <sys/pcpu.h>
62 #include <sys/smp.h>
63 #include <machine/smp.h>
64 #include <machine/bus.h>
65 #include <machine/resource.h>
66
67 #include <net/bpf.h>
68 #include <net/ethernet.h>
69 #include <net/if.h>
70 #include <net/if_arp.h>
71 #include <net/if_dl.h>
72 #include <net/if_media.h>
73
74 #include <net/if_types.h>
75 #include <net/if_vlan_var.h>
76
77 #include <netinet/in_systm.h>
78 #include <netinet/in.h>
79 #include <netinet/if_ether.h>
80 #include <netinet/ip.h>
81 #include <netinet/ip6.h>
82 #include <netinet/tcp.h>
83 #include <netinet/tcp_lro.h>
84 #include <netinet/udp.h>
85
86 #include <machine/in_cksum.h>
87 #include <dev/led/led.h>
88 #include <dev/pci/pcivar.h>
89 #include <dev/pci/pcireg.h>
90
91 #include "e1000_api.h"
92 #include "e1000_82575.h"
93 #include "if_igb.h"
94
95 /*********************************************************************
96  *  Set this to one to display debug statistics
97  *********************************************************************/
98 int     igb_display_debug_stats = 0;
99
100 /*********************************************************************
101  *  Driver version:
102  *********************************************************************/
103 char igb_driver_version[] = "version - 2.3.9";
104
105
106 /*********************************************************************
107  *  PCI Device ID Table
108  *
109  *  Used by probe to select devices to load on
110  *  Last field stores an index into e1000_strings
111  *  Last entry must be all 0s
112  *
113  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
114  *********************************************************************/
115
116 static igb_vendor_info_t igb_vendor_info_array[] =
117 {
118         { 0x8086, E1000_DEV_ID_82575EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
119         { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
120                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
122                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82576,           PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82576_NS,        PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82576_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82576_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
129                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
131                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
133                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82576_VF,        PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82580_COPPER,    PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82580_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82580_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_82580_SGMII,     PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
140                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
141         { 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
142                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
143         { 0x8086, E1000_DEV_ID_DH89XXCC_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,  PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_DH89XXCC_SFP,    PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
147                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_I350_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_I350_FIBER,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_I350_SERDES,     PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_I350_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_I350_VF,         PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_I210_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_I210_COPPER_IT,  PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
156                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_I210_FIBER,      PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_I210_SERDES,     PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_I210_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_I211_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
161         /* required last entry */
162         { 0, 0, 0, 0, 0}
163 };
164
165 /*********************************************************************
166  *  Table of branding strings for all supported NICs.
167  *********************************************************************/
168
169 static char *igb_strings[] = {
170         "Intel(R) PRO/1000 Network Connection"
171 };
172
173 /*********************************************************************
174  *  Function prototypes
175  *********************************************************************/
176 static int      igb_probe(device_t);
177 static int      igb_attach(device_t);
178 static int      igb_detach(device_t);
179 static int      igb_shutdown(device_t);
180 static int      igb_suspend(device_t);
181 static int      igb_resume(device_t);
182 #if __FreeBSD_version >= 800000
183 static int      igb_mq_start(struct ifnet *, struct mbuf *);
184 static int      igb_mq_start_locked(struct ifnet *, struct tx_ring *);
185 static void     igb_qflush(struct ifnet *);
186 static void     igb_deferred_mq_start(void *, int);
187 #else
188 static void     igb_start(struct ifnet *);
189 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
190 #endif
191 static int      igb_ioctl(struct ifnet *, u_long, caddr_t);
192 static void     igb_init(void *);
193 static void     igb_init_locked(struct adapter *);
194 static void     igb_stop(void *);
195 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
196 static int      igb_media_change(struct ifnet *);
197 static void     igb_identify_hardware(struct adapter *);
198 static int      igb_allocate_pci_resources(struct adapter *);
199 static int      igb_allocate_msix(struct adapter *);
200 static int      igb_allocate_legacy(struct adapter *);
201 static int      igb_setup_msix(struct adapter *);
202 static void     igb_free_pci_resources(struct adapter *);
203 static void     igb_local_timer(void *);
204 static void     igb_reset(struct adapter *);
205 static int      igb_setup_interface(device_t, struct adapter *);
206 static int      igb_allocate_queues(struct adapter *);
207 static void     igb_configure_queues(struct adapter *);
208
209 static int      igb_allocate_transmit_buffers(struct tx_ring *);
210 static void     igb_setup_transmit_structures(struct adapter *);
211 static void     igb_setup_transmit_ring(struct tx_ring *);
212 static void     igb_initialize_transmit_units(struct adapter *);
213 static void     igb_free_transmit_structures(struct adapter *);
214 static void     igb_free_transmit_buffers(struct tx_ring *);
215
216 static int      igb_allocate_receive_buffers(struct rx_ring *);
217 static int      igb_setup_receive_structures(struct adapter *);
218 static int      igb_setup_receive_ring(struct rx_ring *);
219 static void     igb_initialize_receive_units(struct adapter *);
220 static void     igb_free_receive_structures(struct adapter *);
221 static void     igb_free_receive_buffers(struct rx_ring *);
222 static void     igb_free_receive_ring(struct rx_ring *);
223
224 static void     igb_enable_intr(struct adapter *);
225 static void     igb_disable_intr(struct adapter *);
226 static void     igb_update_stats_counters(struct adapter *);
227 static bool     igb_txeof(struct tx_ring *);
228
229 static __inline void igb_rx_discard(struct rx_ring *, int);
230 static __inline void igb_rx_input(struct rx_ring *,
231                     struct ifnet *, struct mbuf *, u32);
232
233 static bool     igb_rxeof(struct igb_queue *, int, int *);
234 static void     igb_rx_checksum(u32, struct mbuf *, u32);
235 static bool     igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
236 static bool     igb_tso_setup(struct tx_ring *, struct mbuf *, int,
237                     struct ip *, struct tcphdr *);
238 static void     igb_set_promisc(struct adapter *);
239 static void     igb_disable_promisc(struct adapter *);
240 static void     igb_set_multi(struct adapter *);
241 static void     igb_update_link_status(struct adapter *);
242 static void     igb_refresh_mbufs(struct rx_ring *, int);
243
244 static void     igb_register_vlan(void *, struct ifnet *, u16);
245 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
246 static void     igb_setup_vlan_hw_support(struct adapter *);
247
248 static int      igb_xmit(struct tx_ring *, struct mbuf **);
249 static int      igb_dma_malloc(struct adapter *, bus_size_t,
250                     struct igb_dma_alloc *, int);
251 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
252 static int      igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
253 static void     igb_print_nvm_info(struct adapter *);
254 static int      igb_is_valid_ether_addr(u8 *);
255 static void     igb_add_hw_stats(struct adapter *);
256
257 static void     igb_vf_init_stats(struct adapter *);
258 static void     igb_update_vf_stats_counters(struct adapter *);
259
260 /* Management and WOL Support */
261 static void     igb_init_manageability(struct adapter *);
262 static void     igb_release_manageability(struct adapter *);
263 static void     igb_get_hw_control(struct adapter *);
264 static void     igb_release_hw_control(struct adapter *);
265 static void     igb_enable_wakeup(device_t);
266 static void     igb_led_func(void *, int);
267
268 static int      igb_irq_fast(void *);
269 static void     igb_msix_que(void *);
270 static void     igb_msix_link(void *);
271 static void     igb_handle_que(void *context, int pending);
272 static void     igb_handle_link(void *context, int pending);
273 static void     igb_handle_link_locked(struct adapter *);
274
275 static void     igb_set_sysctl_value(struct adapter *, const char *,
276                     const char *, int *, int);
277 static int      igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
278 static int      igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
279 static int      igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
280
281 #ifdef DEVICE_POLLING
282 static poll_handler_t igb_poll;
283 #endif /* POLLING */
284
285 /*********************************************************************
286  *  FreeBSD Device Interface Entry Points
287  *********************************************************************/
288
289 static device_method_t igb_methods[] = {
290         /* Device interface */
291         DEVMETHOD(device_probe, igb_probe),
292         DEVMETHOD(device_attach, igb_attach),
293         DEVMETHOD(device_detach, igb_detach),
294         DEVMETHOD(device_shutdown, igb_shutdown),
295         DEVMETHOD(device_suspend, igb_suspend),
296         DEVMETHOD(device_resume, igb_resume),
297         DEVMETHOD_END
298 };
299
300 static driver_t igb_driver = {
301         "igb", igb_methods, sizeof(struct adapter),
302 };
303
304 static devclass_t igb_devclass;
305 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
306 MODULE_DEPEND(igb, pci, 1, 1, 1);
307 MODULE_DEPEND(igb, ether, 1, 1, 1);
308
309 /*********************************************************************
310  *  Tunable default values.
311  *********************************************************************/
312
313 static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
314
315 /* Descriptor defaults */
316 static int igb_rxd = IGB_DEFAULT_RXD;
317 static int igb_txd = IGB_DEFAULT_TXD;
318 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
319 TUNABLE_INT("hw.igb.txd", &igb_txd);
320 SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
321     "Number of receive descriptors per queue");
322 SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
323     "Number of transmit descriptors per queue");
324
325 /*
326 ** AIM: Adaptive Interrupt Moderation
327 ** which means that the interrupt rate
328 ** is varied over time based on the
329 ** traffic for that interrupt vector
330 */
331 static int igb_enable_aim = TRUE;
332 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
333 SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
334     "Enable adaptive interrupt moderation");
335
336 /*
337  * MSIX should be the default for best performance,
338  * but this allows it to be forced off for testing.
339  */         
340 static int igb_enable_msix = 1;
341 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
342 SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
343     "Enable MSI-X interrupts");
344
345 /*
346 ** Tuneable Interrupt rate
347 */
348 static int igb_max_interrupt_rate = 8000;
349 TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
350 SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
351     &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
352
353 #if __FreeBSD_version >= 800000
354 /*
355 ** Tuneable number of buffers in the buf-ring (drbr_xxx)
356 */
357 static int igb_buf_ring_size = IGB_BR_SIZE;
358 TUNABLE_INT("hw.igb.buf_ring_size", &igb_buf_ring_size);
359 SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
360     &igb_buf_ring_size, 0, "Size of the bufring");
361 #endif
362
363 /*
364 ** Header split causes the packet header to
365 ** be dma'd to a seperate mbuf from the payload.
366 ** this can have memory alignment benefits. But
367 ** another plus is that small packets often fit
368 ** into the header and thus use no cluster. Its
369 ** a very workload dependent type feature.
370 */
371 static int igb_header_split = FALSE;
372 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
373 SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
374     "Enable receive mbuf header split");
375
376 /*
377 ** This will autoconfigure based on
378 ** the number of CPUs and max supported MSI-X messages
379 ** if left at 0.
380 */
381 static int igb_num_queues = 0;
382 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
383 SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
384     "Number of queues to configure, 0 indicates autoconfigure");
385
386 /*
387 ** Global variable to store last used CPU when binding queues
388 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
389 ** queue is bound to a cpu.
390 */
391 static int igb_last_bind_cpu = -1;
392
393 /* How many packets rxeof tries to clean at a time */
394 static int igb_rx_process_limit = 100;
395 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
396 SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
397     &igb_rx_process_limit, 0,
398     "Maximum number of received packets to process at a time, -1 means unlimited");
399
400 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
401 #include <dev/netmap/if_igb_netmap.h>
402 #endif /* DEV_NETMAP */
403 /*********************************************************************
404  *  Device identification routine
405  *
406  *  igb_probe determines if the driver should be loaded on
407  *  adapter based on PCI vendor/device id of the adapter.
408  *
409  *  return BUS_PROBE_DEFAULT on success, positive on failure
410  *********************************************************************/
411
412 static int
413 igb_probe(device_t dev)
414 {
415         char            adapter_name[60];
416         uint16_t        pci_vendor_id = 0;
417         uint16_t        pci_device_id = 0;
418         uint16_t        pci_subvendor_id = 0;
419         uint16_t        pci_subdevice_id = 0;
420         igb_vendor_info_t *ent;
421
422         INIT_DEBUGOUT("igb_probe: begin");
423
424         pci_vendor_id = pci_get_vendor(dev);
425         if (pci_vendor_id != IGB_VENDOR_ID)
426                 return (ENXIO);
427
428         pci_device_id = pci_get_device(dev);
429         pci_subvendor_id = pci_get_subvendor(dev);
430         pci_subdevice_id = pci_get_subdevice(dev);
431
432         ent = igb_vendor_info_array;
433         while (ent->vendor_id != 0) {
434                 if ((pci_vendor_id == ent->vendor_id) &&
435                     (pci_device_id == ent->device_id) &&
436
437                     ((pci_subvendor_id == ent->subvendor_id) ||
438                     (ent->subvendor_id == PCI_ANY_ID)) &&
439
440                     ((pci_subdevice_id == ent->subdevice_id) ||
441                     (ent->subdevice_id == PCI_ANY_ID))) {
442                         sprintf(adapter_name, "%s %s",
443                                 igb_strings[ent->index],
444                                 igb_driver_version);
445                         device_set_desc_copy(dev, adapter_name);
446                         return (BUS_PROBE_DEFAULT);
447                 }
448                 ent++;
449         }
450
451         return (ENXIO);
452 }
453
454 /*********************************************************************
455  *  Device initialization routine
456  *
457  *  The attach entry point is called when the driver is being loaded.
458  *  This routine identifies the type of hardware, allocates all resources
459  *  and initializes the hardware.
460  *
461  *  return 0 on success, positive on failure
462  *********************************************************************/
463
464 static int
465 igb_attach(device_t dev)
466 {
467         struct adapter  *adapter;
468         int             error = 0;
469         u16             eeprom_data;
470
471         INIT_DEBUGOUT("igb_attach: begin");
472
473         if (resource_disabled("igb", device_get_unit(dev))) {
474                 device_printf(dev, "Disabled by device hint\n");
475                 return (ENXIO);
476         }
477
478         adapter = device_get_softc(dev);
479         adapter->dev = adapter->osdep.dev = dev;
480         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
481
482         /* SYSCTL stuff */
483         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
484             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
485             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
486             igb_sysctl_nvm_info, "I", "NVM Information");
487
488         igb_set_sysctl_value(adapter, "enable_aim",
489             "Interrupt Moderation", &adapter->enable_aim,
490             igb_enable_aim);
491
492         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
493             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
494             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
495             adapter, 0, igb_set_flowcntl, "I", "Flow Control");
496
497         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
498
499         /* Determine hardware and mac info */
500         igb_identify_hardware(adapter);
501
502         /* Setup PCI resources */
503         if (igb_allocate_pci_resources(adapter)) {
504                 device_printf(dev, "Allocation of PCI resources failed\n");
505                 error = ENXIO;
506                 goto err_pci;
507         }
508
509         /* Do Shared Code initialization */
510         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
511                 device_printf(dev, "Setup of Shared code failed\n");
512                 error = ENXIO;
513                 goto err_pci;
514         }
515
516         e1000_get_bus_info(&adapter->hw);
517
518         /* Sysctl for limiting the amount of work done in the taskqueue */
519         igb_set_sysctl_value(adapter, "rx_processing_limit",
520             "max number of rx packets to process",
521             &adapter->rx_process_limit, igb_rx_process_limit);
522
523         /*
524          * Validate number of transmit and receive descriptors. It
525          * must not exceed hardware maximum, and must be multiple
526          * of E1000_DBA_ALIGN.
527          */
528         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
529             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
530                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
531                     IGB_DEFAULT_TXD, igb_txd);
532                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
533         } else
534                 adapter->num_tx_desc = igb_txd;
535         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
536             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
537                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
538                     IGB_DEFAULT_RXD, igb_rxd);
539                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
540         } else
541                 adapter->num_rx_desc = igb_rxd;
542
543         adapter->hw.mac.autoneg = DO_AUTO_NEG;
544         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
545         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
546
547         /* Copper options */
548         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
549                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
550                 adapter->hw.phy.disable_polarity_correction = FALSE;
551                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
552         }
553
554         /*
555          * Set the frame limits assuming
556          * standard ethernet sized frames.
557          */
558         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
559         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
560
561         /*
562         ** Allocate and Setup Queues
563         */
564         if (igb_allocate_queues(adapter)) {
565                 error = ENOMEM;
566                 goto err_pci;
567         }
568
569         /* Allocate the appropriate stats memory */
570         if (adapter->vf_ifp) {
571                 adapter->stats =
572                     (struct e1000_vf_stats *)malloc(sizeof \
573                     (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
574                 igb_vf_init_stats(adapter);
575         } else
576                 adapter->stats =
577                     (struct e1000_hw_stats *)malloc(sizeof \
578                     (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
579         if (adapter->stats == NULL) {
580                 device_printf(dev, "Can not allocate stats memory\n");
581                 error = ENOMEM;
582                 goto err_late;
583         }
584
585         /* Allocate multicast array memory. */
586         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
587             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
588         if (adapter->mta == NULL) {
589                 device_printf(dev, "Can not allocate multicast setup array\n");
590                 error = ENOMEM;
591                 goto err_late;
592         }
593
594         /* Some adapter-specific advanced features */
595         if (adapter->hw.mac.type >= e1000_i350) {
596                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
597                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
598                     OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
599                     adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
600                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
601                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
602                     OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
603                     adapter, 0, igb_sysctl_eee, "I",
604                     "Disable Energy Efficient Ethernet");
605                 if (adapter->hw.phy.media_type == e1000_media_type_copper)
606                         e1000_set_eee_i350(&adapter->hw);
607         }
608
609         /*
610         ** Start from a known state, this is
611         ** important in reading the nvm and
612         ** mac from that.
613         */
614         e1000_reset_hw(&adapter->hw);
615
616         /* Make sure we have a good EEPROM before we read from it */
617         if (((adapter->hw.mac.type != e1000_i210) &&
618             (adapter->hw.mac.type != e1000_i211)) &&
619             (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
620                 /*
621                 ** Some PCI-E parts fail the first check due to
622                 ** the link being in sleep state, call it again,
623                 ** if it fails a second time its a real issue.
624                 */
625                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
626                         device_printf(dev,
627                             "The EEPROM Checksum Is Not Valid\n");
628                         error = EIO;
629                         goto err_late;
630                 }
631         }
632
633         /*
634         ** Copy the permanent MAC address out of the EEPROM
635         */
636         if (e1000_read_mac_addr(&adapter->hw) < 0) {
637                 device_printf(dev, "EEPROM read error while reading MAC"
638                     " address\n");
639                 error = EIO;
640                 goto err_late;
641         }
642         /* Check its sanity */
643         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
644                 device_printf(dev, "Invalid MAC address\n");
645                 error = EIO;
646                 goto err_late;
647         }
648
649         /* Setup OS specific network interface */
650         if (igb_setup_interface(dev, adapter) != 0)
651                 goto err_late;
652
653         /* Now get a good starting state */
654         igb_reset(adapter);
655
656         /* Initialize statistics */
657         igb_update_stats_counters(adapter);
658
659         adapter->hw.mac.get_link_status = 1;
660         igb_update_link_status(adapter);
661
662         /* Indicate SOL/IDER usage */
663         if (e1000_check_reset_block(&adapter->hw))
664                 device_printf(dev,
665                     "PHY reset is blocked due to SOL/IDER session.\n");
666
667         /* Determine if we have to control management hardware */
668         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
669
670         /*
671          * Setup Wake-on-Lan
672          */
673         /* APME bit in EEPROM is mapped to WUC.APME */
674         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
675         if (eeprom_data)
676                 adapter->wol = E1000_WUFC_MAG;
677
678         /* Register for VLAN events */
679         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
680              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
681         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
682              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
683
684         igb_add_hw_stats(adapter);
685
686         /* Tell the stack that the interface is not active */
687         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
688         adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
689
690         adapter->led_dev = led_create(igb_led_func, adapter,
691             device_get_nameunit(dev));
692
693         /* 
694         ** Configure Interrupts
695         */
696         if ((adapter->msix > 1) && (igb_enable_msix))
697                 error = igb_allocate_msix(adapter);
698         else /* MSI or Legacy */
699                 error = igb_allocate_legacy(adapter);
700         if (error)
701                 goto err_late;
702
703 #ifdef DEV_NETMAP
704         igb_netmap_attach(adapter);
705 #endif /* DEV_NETMAP */
706         INIT_DEBUGOUT("igb_attach: end");
707
708         return (0);
709
710 err_late:
711         igb_detach(dev);
712         igb_free_transmit_structures(adapter);
713         igb_free_receive_structures(adapter);
714         igb_release_hw_control(adapter);
715 err_pci:
716         igb_free_pci_resources(adapter);
717         if (adapter->ifp != NULL)
718                 if_free(adapter->ifp);
719         free(adapter->mta, M_DEVBUF);
720         IGB_CORE_LOCK_DESTROY(adapter);
721
722         return (error);
723 }
724
725 /*********************************************************************
726  *  Device removal routine
727  *
728  *  The detach entry point is called when the driver is being removed.
729  *  This routine stops the adapter and deallocates all the resources
730  *  that were allocated for driver operation.
731  *
732  *  return 0 on success, positive on failure
733  *********************************************************************/
734
735 static int
736 igb_detach(device_t dev)
737 {
738         struct adapter  *adapter = device_get_softc(dev);
739         struct ifnet    *ifp = adapter->ifp;
740
741         INIT_DEBUGOUT("igb_detach: begin");
742
743         /* Make sure VLANS are not using driver */
744         if (adapter->ifp->if_vlantrunk != NULL) {
745                 device_printf(dev,"Vlan in use, detach first\n");
746                 return (EBUSY);
747         }
748
749         ether_ifdetach(adapter->ifp);
750
751         if (adapter->led_dev != NULL)
752                 led_destroy(adapter->led_dev);
753
754 #ifdef DEVICE_POLLING
755         if (ifp->if_capenable & IFCAP_POLLING)
756                 ether_poll_deregister(ifp);
757 #endif
758
759         IGB_CORE_LOCK(adapter);
760         adapter->in_detach = 1;
761         igb_stop(adapter);
762         IGB_CORE_UNLOCK(adapter);
763
764         e1000_phy_hw_reset(&adapter->hw);
765
766         /* Give control back to firmware */
767         igb_release_manageability(adapter);
768         igb_release_hw_control(adapter);
769
770         if (adapter->wol) {
771                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
772                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
773                 igb_enable_wakeup(dev);
774         }
775
776         /* Unregister VLAN events */
777         if (adapter->vlan_attach != NULL)
778                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
779         if (adapter->vlan_detach != NULL)
780                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
781
782         callout_drain(&adapter->timer);
783
784 #ifdef DEV_NETMAP
785         netmap_detach(adapter->ifp);
786 #endif /* DEV_NETMAP */
787         igb_free_pci_resources(adapter);
788         bus_generic_detach(dev);
789         if_free(ifp);
790
791         igb_free_transmit_structures(adapter);
792         igb_free_receive_structures(adapter);
793         if (adapter->mta != NULL)
794                 free(adapter->mta, M_DEVBUF);
795
796         IGB_CORE_LOCK_DESTROY(adapter);
797
798         return (0);
799 }
800
801 /*********************************************************************
802  *
803  *  Shutdown entry point
804  *
805  **********************************************************************/
806
807 static int
808 igb_shutdown(device_t dev)
809 {
810         return igb_suspend(dev);
811 }
812
813 /*
814  * Suspend/resume device methods.
815  */
816 static int
817 igb_suspend(device_t dev)
818 {
819         struct adapter *adapter = device_get_softc(dev);
820
821         IGB_CORE_LOCK(adapter);
822
823         igb_stop(adapter);
824
825         igb_release_manageability(adapter);
826         igb_release_hw_control(adapter);
827
828         if (adapter->wol) {
829                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
830                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
831                 igb_enable_wakeup(dev);
832         }
833
834         IGB_CORE_UNLOCK(adapter);
835
836         return bus_generic_suspend(dev);
837 }
838
839 static int
840 igb_resume(device_t dev)
841 {
842         struct adapter *adapter = device_get_softc(dev);
843         struct tx_ring  *txr = adapter->tx_rings;
844         struct ifnet *ifp = adapter->ifp;
845
846         IGB_CORE_LOCK(adapter);
847         igb_init_locked(adapter);
848         igb_init_manageability(adapter);
849
850         if ((ifp->if_flags & IFF_UP) &&
851             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
852                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
853                         IGB_TX_LOCK(txr);
854 #if __FreeBSD_version >= 800000
855                         /* Process the stack queue only if not depleted */
856                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
857                             !drbr_empty(ifp, txr->br))
858                                 igb_mq_start_locked(ifp, txr);
859 #else
860                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
861                                 igb_start_locked(txr, ifp);
862 #endif
863                         IGB_TX_UNLOCK(txr);
864                 }
865         }
866         IGB_CORE_UNLOCK(adapter);
867
868         return bus_generic_resume(dev);
869 }
870
871
872 #if __FreeBSD_version < 800000
873
874 /*********************************************************************
875  *  Transmit entry point
876  *
877  *  igb_start is called by the stack to initiate a transmit.
878  *  The driver will remain in this routine as long as there are
879  *  packets to transmit and transmit resources are available.
880  *  In case resources are not available stack is notified and
881  *  the packet is requeued.
882  **********************************************************************/
883
884 static void
885 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
886 {
887         struct adapter  *adapter = ifp->if_softc;
888         struct mbuf     *m_head;
889
890         IGB_TX_LOCK_ASSERT(txr);
891
892         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
893             IFF_DRV_RUNNING)
894                 return;
895         if (!adapter->link_active)
896                 return;
897
898         /* Call cleanup if number of TX descriptors low */
899         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
900                 igb_txeof(txr);
901
902         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
903                 if (txr->tx_avail <= IGB_MAX_SCATTER) {
904                         txr->queue_status |= IGB_QUEUE_DEPLETED;
905                         break;
906                 }
907                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
908                 if (m_head == NULL)
909                         break;
910                 /*
911                  *  Encapsulation can modify our pointer, and or make it
912                  *  NULL on failure.  In that event, we can't requeue.
913                  */
914                 if (igb_xmit(txr, &m_head)) {
915                         if (m_head != NULL)
916                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
917                         if (txr->tx_avail <= IGB_MAX_SCATTER)
918                                 txr->queue_status |= IGB_QUEUE_DEPLETED;
919                         break;
920                 }
921
922                 /* Send a copy of the frame to the BPF listener */
923                 ETHER_BPF_MTAP(ifp, m_head);
924
925                 /* Set watchdog on */
926                 txr->watchdog_time = ticks;
927                 txr->queue_status |= IGB_QUEUE_WORKING;
928         }
929 }
930  
931 /*
932  * Legacy TX driver routine, called from the
933  * stack, always uses tx[0], and spins for it.
934  * Should not be used with multiqueue tx
935  */
936 static void
937 igb_start(struct ifnet *ifp)
938 {
939         struct adapter  *adapter = ifp->if_softc;
940         struct tx_ring  *txr = adapter->tx_rings;
941
942         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
943                 IGB_TX_LOCK(txr);
944                 igb_start_locked(txr, ifp);
945                 IGB_TX_UNLOCK(txr);
946         }
947         return;
948 }
949
950 #else /* __FreeBSD_version >= 800000 */
951
952 /*
953 ** Multiqueue Transmit Entry:
954 **  quick turnaround to the stack
955 **
956 */
957 static int
958 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
959 {
960         struct adapter          *adapter = ifp->if_softc;
961         struct igb_queue        *que;
962         struct tx_ring          *txr;
963         int                     i, err = 0;
964
965         /* Which queue to use */
966         if ((m->m_flags & M_FLOWID) != 0)
967                 i = m->m_pkthdr.flowid % adapter->num_queues;
968         else
969                 i = curcpu % adapter->num_queues;
970         txr = &adapter->tx_rings[i];
971         que = &adapter->queues[i];
972
973         err = drbr_enqueue(ifp, txr->br, m);
974         taskqueue_enqueue(que->tq, &txr->txq_task);
975
976         return (err);
977 }
978
979 static int
980 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
981 {
982         struct adapter  *adapter = txr->adapter;
983         struct mbuf     *next;
984         int             err = 0, enq;
985
986         IGB_TX_LOCK_ASSERT(txr);
987
988         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
989             adapter->link_active == 0)
990                 return (ENETDOWN);
991
992         enq = 0;
993
994         /* Process the queue */
995         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
996                 if ((err = igb_xmit(txr, &next)) != 0) {
997                         if (next == NULL) {
998                                 /* It was freed, move forward */
999                                 drbr_advance(ifp, txr->br);
1000                         } else {
1001                                 /* 
1002                                  * Still have one left, it may not be
1003                                  * the same since the transmit function
1004                                  * may have changed it.
1005                                  */
1006                                 drbr_putback(ifp, txr->br, next);
1007                         }
1008                         break;
1009                 }
1010                 drbr_advance(ifp, txr->br);
1011                 enq++;
1012                 ifp->if_obytes += next->m_pkthdr.len;
1013                 if (next->m_flags & M_MCAST)
1014                         ifp->if_omcasts++;
1015                 ETHER_BPF_MTAP(ifp, next);
1016                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1017                         break;
1018         }
1019         if (enq > 0) {
1020                 /* Set the watchdog */
1021                 txr->queue_status |= IGB_QUEUE_WORKING;
1022                 txr->watchdog_time = ticks;
1023         }
1024         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1025                 igb_txeof(txr);
1026         if (txr->tx_avail <= IGB_MAX_SCATTER)
1027                 txr->queue_status |= IGB_QUEUE_DEPLETED;
1028         return (err);
1029 }
1030
1031 /*
1032  * Called from a taskqueue to drain queued transmit packets.
1033  */
1034 static void
1035 igb_deferred_mq_start(void *arg, int pending)
1036 {
1037         struct tx_ring *txr = arg;
1038         struct adapter *adapter = txr->adapter;
1039         struct ifnet *ifp = adapter->ifp;
1040
1041         IGB_TX_LOCK(txr);
1042         if (!drbr_empty(ifp, txr->br))
1043                 igb_mq_start_locked(ifp, txr);
1044         IGB_TX_UNLOCK(txr);
1045 }
1046
1047 /*
1048 ** Flush all ring buffers
1049 */
1050 static void
1051 igb_qflush(struct ifnet *ifp)
1052 {
1053         struct adapter  *adapter = ifp->if_softc;
1054         struct tx_ring  *txr = adapter->tx_rings;
1055         struct mbuf     *m;
1056
1057         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1058                 IGB_TX_LOCK(txr);
1059                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1060                         m_freem(m);
1061                 IGB_TX_UNLOCK(txr);
1062         }
1063         if_qflush(ifp);
1064 }
1065 #endif /* __FreeBSD_version >= 800000 */
1066
1067 /*********************************************************************
1068  *  Ioctl entry point
1069  *
1070  *  igb_ioctl is called when the user wants to configure the
1071  *  interface.
1072  *
1073  *  return 0 on success, positive on failure
1074  **********************************************************************/
1075
1076 static int
1077 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1078 {
1079         struct adapter  *adapter = ifp->if_softc;
1080         struct ifreq    *ifr = (struct ifreq *)data;
1081 #if defined(INET) || defined(INET6)
1082         struct ifaddr   *ifa = (struct ifaddr *)data;
1083 #endif
1084         bool            avoid_reset = FALSE;
1085         int             error = 0;
1086
1087         if (adapter->in_detach)
1088                 return (error);
1089
1090         switch (command) {
1091         case SIOCSIFADDR:
1092 #ifdef INET
1093                 if (ifa->ifa_addr->sa_family == AF_INET)
1094                         avoid_reset = TRUE;
1095 #endif
1096 #ifdef INET6
1097                 if (ifa->ifa_addr->sa_family == AF_INET6)
1098                         avoid_reset = TRUE;
1099 #endif
1100                 /*
1101                 ** Calling init results in link renegotiation,
1102                 ** so we avoid doing it when possible.
1103                 */
1104                 if (avoid_reset) {
1105                         ifp->if_flags |= IFF_UP;
1106                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1107                                 igb_init(adapter);
1108 #ifdef INET
1109                         if (!(ifp->if_flags & IFF_NOARP))
1110                                 arp_ifinit(ifp, ifa);
1111 #endif
1112                 } else
1113                         error = ether_ioctl(ifp, command, data);
1114                 break;
1115         case SIOCSIFMTU:
1116             {
1117                 int max_frame_size;
1118
1119                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1120
1121                 IGB_CORE_LOCK(adapter);
1122                 max_frame_size = 9234;
1123                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1124                     ETHER_CRC_LEN) {
1125                         IGB_CORE_UNLOCK(adapter);
1126                         error = EINVAL;
1127                         break;
1128                 }
1129
1130                 ifp->if_mtu = ifr->ifr_mtu;
1131                 adapter->max_frame_size =
1132                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1133                 igb_init_locked(adapter);
1134                 IGB_CORE_UNLOCK(adapter);
1135                 break;
1136             }
1137         case SIOCSIFFLAGS:
1138                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1139                     SIOCSIFFLAGS (Set Interface Flags)");
1140                 IGB_CORE_LOCK(adapter);
1141                 if (ifp->if_flags & IFF_UP) {
1142                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1143                                 if ((ifp->if_flags ^ adapter->if_flags) &
1144                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1145                                         igb_disable_promisc(adapter);
1146                                         igb_set_promisc(adapter);
1147                                 }
1148                         } else
1149                                 igb_init_locked(adapter);
1150                 } else
1151                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1152                                 igb_stop(adapter);
1153                 adapter->if_flags = ifp->if_flags;
1154                 IGB_CORE_UNLOCK(adapter);
1155                 break;
1156         case SIOCADDMULTI:
1157         case SIOCDELMULTI:
1158                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1159                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1160                         IGB_CORE_LOCK(adapter);
1161                         igb_disable_intr(adapter);
1162                         igb_set_multi(adapter);
1163 #ifdef DEVICE_POLLING
1164                         if (!(ifp->if_capenable & IFCAP_POLLING))
1165 #endif
1166                                 igb_enable_intr(adapter);
1167                         IGB_CORE_UNLOCK(adapter);
1168                 }
1169                 break;
1170         case SIOCSIFMEDIA:
1171                 /* Check SOL/IDER usage */
1172                 IGB_CORE_LOCK(adapter);
1173                 if (e1000_check_reset_block(&adapter->hw)) {
1174                         IGB_CORE_UNLOCK(adapter);
1175                         device_printf(adapter->dev, "Media change is"
1176                             " blocked due to SOL/IDER session.\n");
1177                         break;
1178                 }
1179                 IGB_CORE_UNLOCK(adapter);
1180         case SIOCGIFMEDIA:
1181                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1182                     SIOCxIFMEDIA (Get/Set Interface Media)");
1183                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1184                 break;
1185         case SIOCSIFCAP:
1186             {
1187                 int mask, reinit;
1188
1189                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1190                 reinit = 0;
1191                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1192 #ifdef DEVICE_POLLING
1193                 if (mask & IFCAP_POLLING) {
1194                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1195                                 error = ether_poll_register(igb_poll, ifp);
1196                                 if (error)
1197                                         return (error);
1198                                 IGB_CORE_LOCK(adapter);
1199                                 igb_disable_intr(adapter);
1200                                 ifp->if_capenable |= IFCAP_POLLING;
1201                                 IGB_CORE_UNLOCK(adapter);
1202                         } else {
1203                                 error = ether_poll_deregister(ifp);
1204                                 /* Enable interrupt even in error case */
1205                                 IGB_CORE_LOCK(adapter);
1206                                 igb_enable_intr(adapter);
1207                                 ifp->if_capenable &= ~IFCAP_POLLING;
1208                                 IGB_CORE_UNLOCK(adapter);
1209                         }
1210                 }
1211 #endif
1212                 if (mask & IFCAP_HWCSUM) {
1213                         ifp->if_capenable ^= IFCAP_HWCSUM;
1214                         reinit = 1;
1215                 }
1216                 if (mask & IFCAP_TSO4) {
1217                         ifp->if_capenable ^= IFCAP_TSO4;
1218                         reinit = 1;
1219                 }
1220                 if (mask & IFCAP_VLAN_HWTAGGING) {
1221                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1222                         reinit = 1;
1223                 }
1224                 if (mask & IFCAP_VLAN_HWFILTER) {
1225                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1226                         reinit = 1;
1227                 }
1228                 if (mask & IFCAP_VLAN_HWTSO) {
1229                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1230                         reinit = 1;
1231                 }
1232                 if (mask & IFCAP_LRO) {
1233                         ifp->if_capenable ^= IFCAP_LRO;
1234                         reinit = 1;
1235                 }
1236                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1237                         igb_init(adapter);
1238                 VLAN_CAPABILITIES(ifp);
1239                 break;
1240             }
1241
1242         default:
1243                 error = ether_ioctl(ifp, command, data);
1244                 break;
1245         }
1246
1247         return (error);
1248 }
1249
1250
1251 /*********************************************************************
1252  *  Init entry point
1253  *
1254  *  This routine is used in two ways. It is used by the stack as
1255  *  init entry point in network interface structure. It is also used
1256  *  by the driver as a hw/sw initialization routine to get to a
1257  *  consistent state.
1258  *
1259  *  return 0 on success, positive on failure
1260  **********************************************************************/
1261
1262 static void
1263 igb_init_locked(struct adapter *adapter)
1264 {
1265         struct ifnet    *ifp = adapter->ifp;
1266         device_t        dev = adapter->dev;
1267
1268         INIT_DEBUGOUT("igb_init: begin");
1269
1270         IGB_CORE_LOCK_ASSERT(adapter);
1271
1272         igb_disable_intr(adapter);
1273         callout_stop(&adapter->timer);
1274
1275         /* Get the latest mac address, User can use a LAA */
1276         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1277               ETHER_ADDR_LEN);
1278
1279         /* Put the address into the Receive Address Array */
1280         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1281
1282         igb_reset(adapter);
1283         igb_update_link_status(adapter);
1284
1285         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1286
1287         /* Set hardware offload abilities */
1288         ifp->if_hwassist = 0;
1289         if (ifp->if_capenable & IFCAP_TXCSUM) {
1290                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1291 #if __FreeBSD_version >= 800000
1292                 if (adapter->hw.mac.type == e1000_82576)
1293                         ifp->if_hwassist |= CSUM_SCTP;
1294 #endif
1295         }
1296
1297         if (ifp->if_capenable & IFCAP_TSO4)
1298                 ifp->if_hwassist |= CSUM_TSO;
1299
1300         /* Configure for OS presence */
1301         igb_init_manageability(adapter);
1302
1303         /* Prepare transmit descriptors and buffers */
1304         igb_setup_transmit_structures(adapter);
1305         igb_initialize_transmit_units(adapter);
1306
1307         /* Setup Multicast table */
1308         igb_set_multi(adapter);
1309
1310         /*
1311         ** Figure out the desired mbuf pool
1312         ** for doing jumbo/packetsplit
1313         */
1314         if (adapter->max_frame_size <= 2048)
1315                 adapter->rx_mbuf_sz = MCLBYTES;
1316         else if (adapter->max_frame_size <= 4096)
1317                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1318         else
1319                 adapter->rx_mbuf_sz = MJUM9BYTES;
1320
1321         /* Prepare receive descriptors and buffers */
1322         if (igb_setup_receive_structures(adapter)) {
1323                 device_printf(dev, "Could not setup receive structures\n");
1324                 return;
1325         }
1326         igb_initialize_receive_units(adapter);
1327
1328         /* Enable VLAN support */
1329         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1330                 igb_setup_vlan_hw_support(adapter);
1331                                 
1332         /* Don't lose promiscuous settings */
1333         igb_set_promisc(adapter);
1334
1335         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1336         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1337
1338         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1339         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1340
1341         if (adapter->msix > 1) /* Set up queue routing */
1342                 igb_configure_queues(adapter);
1343
1344         /* this clears any pending interrupts */
1345         E1000_READ_REG(&adapter->hw, E1000_ICR);
1346 #ifdef DEVICE_POLLING
1347         /*
1348          * Only enable interrupts if we are not polling, make sure
1349          * they are off otherwise.
1350          */
1351         if (ifp->if_capenable & IFCAP_POLLING)
1352                 igb_disable_intr(adapter);
1353         else
1354 #endif /* DEVICE_POLLING */
1355         {
1356                 igb_enable_intr(adapter);
1357                 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1358         }
1359
1360         /* Set Energy Efficient Ethernet */
1361         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1362                 e1000_set_eee_i350(&adapter->hw);
1363 }
1364
1365 static void
1366 igb_init(void *arg)
1367 {
1368         struct adapter *adapter = arg;
1369
1370         IGB_CORE_LOCK(adapter);
1371         igb_init_locked(adapter);
1372         IGB_CORE_UNLOCK(adapter);
1373 }
1374
1375
1376 static void
1377 igb_handle_que(void *context, int pending)
1378 {
1379         struct igb_queue *que = context;
1380         struct adapter *adapter = que->adapter;
1381         struct tx_ring *txr = que->txr;
1382         struct ifnet    *ifp = adapter->ifp;
1383
1384         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1385                 bool    more;
1386
1387                 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1388
1389                 IGB_TX_LOCK(txr);
1390                 igb_txeof(txr);
1391 #if __FreeBSD_version >= 800000
1392                 /* Process the stack queue only if not depleted */
1393                 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1394                     !drbr_empty(ifp, txr->br))
1395                         igb_mq_start_locked(ifp, txr);
1396 #else
1397                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1398                         igb_start_locked(txr, ifp);
1399 #endif
1400                 IGB_TX_UNLOCK(txr);
1401                 /* Do we need another? */
1402                 if (more) {
1403                         taskqueue_enqueue(que->tq, &que->que_task);
1404                         return;
1405                 }
1406         }
1407
1408 #ifdef DEVICE_POLLING
1409         if (ifp->if_capenable & IFCAP_POLLING)
1410                 return;
1411 #endif
1412         /* Reenable this interrupt */
1413         if (que->eims)
1414                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1415         else
1416                 igb_enable_intr(adapter);
1417 }
1418
1419 /* Deal with link in a sleepable context */
1420 static void
1421 igb_handle_link(void *context, int pending)
1422 {
1423         struct adapter *adapter = context;
1424
1425         IGB_CORE_LOCK(adapter);
1426         igb_handle_link_locked(adapter);
1427         IGB_CORE_UNLOCK(adapter);
1428 }
1429
1430 static void
1431 igb_handle_link_locked(struct adapter *adapter)
1432 {
1433         struct tx_ring  *txr = adapter->tx_rings;
1434         struct ifnet *ifp = adapter->ifp;
1435
1436         IGB_CORE_LOCK_ASSERT(adapter);
1437         adapter->hw.mac.get_link_status = 1;
1438         igb_update_link_status(adapter);
1439         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1440                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1441                         IGB_TX_LOCK(txr);
1442 #if __FreeBSD_version >= 800000
1443                         /* Process the stack queue only if not depleted */
1444                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1445                             !drbr_empty(ifp, txr->br))
1446                                 igb_mq_start_locked(ifp, txr);
1447 #else
1448                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1449                                 igb_start_locked(txr, ifp);
1450 #endif
1451                         IGB_TX_UNLOCK(txr);
1452                 }
1453         }
1454 }
1455
1456 /*********************************************************************
1457  *
1458  *  MSI/Legacy Deferred
1459  *  Interrupt Service routine  
1460  *
1461  *********************************************************************/
1462 static int
1463 igb_irq_fast(void *arg)
1464 {
1465         struct adapter          *adapter = arg;
1466         struct igb_queue        *que = adapter->queues;
1467         u32                     reg_icr;
1468
1469
1470         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1471
1472         /* Hot eject?  */
1473         if (reg_icr == 0xffffffff)
1474                 return FILTER_STRAY;
1475
1476         /* Definitely not our interrupt.  */
1477         if (reg_icr == 0x0)
1478                 return FILTER_STRAY;
1479
1480         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1481                 return FILTER_STRAY;
1482
1483         /*
1484          * Mask interrupts until the taskqueue is finished running.  This is
1485          * cheap, just assume that it is needed.  This also works around the
1486          * MSI message reordering errata on certain systems.
1487          */
1488         igb_disable_intr(adapter);
1489         taskqueue_enqueue(que->tq, &que->que_task);
1490
1491         /* Link status change */
1492         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1493                 taskqueue_enqueue(que->tq, &adapter->link_task);
1494
1495         if (reg_icr & E1000_ICR_RXO)
1496                 adapter->rx_overruns++;
1497         return FILTER_HANDLED;
1498 }
1499
1500 #ifdef DEVICE_POLLING
1501 #if __FreeBSD_version >= 800000
1502 #define POLL_RETURN_COUNT(a) (a)
1503 static int
1504 #else
1505 #define POLL_RETURN_COUNT(a)
1506 static void
1507 #endif
1508 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1509 {
1510         struct adapter          *adapter = ifp->if_softc;
1511         struct igb_queue        *que;
1512         struct tx_ring          *txr;
1513         u32                     reg_icr, rx_done = 0;
1514         u32                     loop = IGB_MAX_LOOP;
1515         bool                    more;
1516
1517         IGB_CORE_LOCK(adapter);
1518         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1519                 IGB_CORE_UNLOCK(adapter);
1520                 return POLL_RETURN_COUNT(rx_done);
1521         }
1522
1523         if (cmd == POLL_AND_CHECK_STATUS) {
1524                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1525                 /* Link status change */
1526                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1527                         igb_handle_link_locked(adapter);
1528
1529                 if (reg_icr & E1000_ICR_RXO)
1530                         adapter->rx_overruns++;
1531         }
1532         IGB_CORE_UNLOCK(adapter);
1533
1534         for (int i = 0; i < adapter->num_queues; i++) {
1535                 que = &adapter->queues[i];
1536                 txr = que->txr;
1537
1538                 igb_rxeof(que, count, &rx_done);
1539
1540                 IGB_TX_LOCK(txr);
1541                 do {
1542                         more = igb_txeof(txr);
1543                 } while (loop-- && more);
1544 #if __FreeBSD_version >= 800000
1545                 if (!drbr_empty(ifp, txr->br))
1546                         igb_mq_start_locked(ifp, txr);
1547 #else
1548                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1549                         igb_start_locked(txr, ifp);
1550 #endif
1551                 IGB_TX_UNLOCK(txr);
1552         }
1553
1554         return POLL_RETURN_COUNT(rx_done);
1555 }
1556 #endif /* DEVICE_POLLING */
1557
1558 /*********************************************************************
1559  *
1560  *  MSIX Que Interrupt Service routine
1561  *
1562  **********************************************************************/
1563 static void
1564 igb_msix_que(void *arg)
1565 {
1566         struct igb_queue *que = arg;
1567         struct adapter *adapter = que->adapter;
1568         struct ifnet   *ifp = adapter->ifp;
1569         struct tx_ring *txr = que->txr;
1570         struct rx_ring *rxr = que->rxr;
1571         u32             newitr = 0;
1572         bool            more_rx;
1573
1574         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1575         ++que->irqs;
1576
1577         IGB_TX_LOCK(txr);
1578         igb_txeof(txr);
1579 #if __FreeBSD_version >= 800000
1580         /* Process the stack queue only if not depleted */
1581         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1582             !drbr_empty(ifp, txr->br))
1583                 igb_mq_start_locked(ifp, txr);
1584 #else
1585         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1586                 igb_start_locked(txr, ifp);
1587 #endif
1588         IGB_TX_UNLOCK(txr);
1589
1590         more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1591
1592         if (adapter->enable_aim == FALSE)
1593                 goto no_calc;
1594         /*
1595         ** Do Adaptive Interrupt Moderation:
1596         **  - Write out last calculated setting
1597         **  - Calculate based on average size over
1598         **    the last interval.
1599         */
1600         if (que->eitr_setting)
1601                 E1000_WRITE_REG(&adapter->hw,
1602                     E1000_EITR(que->msix), que->eitr_setting);
1603  
1604         que->eitr_setting = 0;
1605
1606         /* Idle, do nothing */
1607         if ((txr->bytes == 0) && (rxr->bytes == 0))
1608                 goto no_calc;
1609                                 
1610         /* Used half Default if sub-gig */
1611         if (adapter->link_speed != 1000)
1612                 newitr = IGB_DEFAULT_ITR / 2;
1613         else {
1614                 if ((txr->bytes) && (txr->packets))
1615                         newitr = txr->bytes/txr->packets;
1616                 if ((rxr->bytes) && (rxr->packets))
1617                         newitr = max(newitr,
1618                             (rxr->bytes / rxr->packets));
1619                 newitr += 24; /* account for hardware frame, crc */
1620                 /* set an upper boundary */
1621                 newitr = min(newitr, 3000);
1622                 /* Be nice to the mid range */
1623                 if ((newitr > 300) && (newitr < 1200))
1624                         newitr = (newitr / 3);
1625                 else
1626                         newitr = (newitr / 2);
1627         }
1628         newitr &= 0x7FFC;  /* Mask invalid bits */
1629         if (adapter->hw.mac.type == e1000_82575)
1630                 newitr |= newitr << 16;
1631         else
1632                 newitr |= E1000_EITR_CNT_IGNR;
1633                  
1634         /* save for next interrupt */
1635         que->eitr_setting = newitr;
1636
1637         /* Reset state */
1638         txr->bytes = 0;
1639         txr->packets = 0;
1640         rxr->bytes = 0;
1641         rxr->packets = 0;
1642
1643 no_calc:
1644         /* Schedule a clean task if needed*/
1645         if (more_rx)
1646                 taskqueue_enqueue(que->tq, &que->que_task);
1647         else
1648                 /* Reenable this interrupt */
1649                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1650         return;
1651 }
1652
1653
1654 /*********************************************************************
1655  *
1656  *  MSIX Link Interrupt Service routine
1657  *
1658  **********************************************************************/
1659
1660 static void
1661 igb_msix_link(void *arg)
1662 {
1663         struct adapter  *adapter = arg;
1664         u32             icr;
1665
1666         ++adapter->link_irq;
1667         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1668         if (!(icr & E1000_ICR_LSC))
1669                 goto spurious;
1670         igb_handle_link(adapter, 0);
1671
1672 spurious:
1673         /* Rearm */
1674         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1675         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1676         return;
1677 }
1678
1679
1680 /*********************************************************************
1681  *
1682  *  Media Ioctl callback
1683  *
1684  *  This routine is called whenever the user queries the status of
1685  *  the interface using ifconfig.
1686  *
1687  **********************************************************************/
1688 static void
1689 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1690 {
1691         struct adapter *adapter = ifp->if_softc;
1692
1693         INIT_DEBUGOUT("igb_media_status: begin");
1694
1695         IGB_CORE_LOCK(adapter);
1696         igb_update_link_status(adapter);
1697
1698         ifmr->ifm_status = IFM_AVALID;
1699         ifmr->ifm_active = IFM_ETHER;
1700
1701         if (!adapter->link_active) {
1702                 IGB_CORE_UNLOCK(adapter);
1703                 return;
1704         }
1705
1706         ifmr->ifm_status |= IFM_ACTIVE;
1707
1708         switch (adapter->link_speed) {
1709         case 10:
1710                 ifmr->ifm_active |= IFM_10_T;
1711                 break;
1712         case 100:
1713                 /*
1714                 ** Support for 100Mb SFP - these are Fiber 
1715                 ** but the media type appears as serdes
1716                 */
1717                 if (adapter->hw.phy.media_type ==
1718                     e1000_media_type_internal_serdes)
1719                         ifmr->ifm_active |= IFM_100_FX;
1720                 else
1721                         ifmr->ifm_active |= IFM_100_TX;
1722                 break;
1723         case 1000:
1724                 ifmr->ifm_active |= IFM_1000_T;
1725                 break;
1726         }
1727
1728         if (adapter->link_duplex == FULL_DUPLEX)
1729                 ifmr->ifm_active |= IFM_FDX;
1730         else
1731                 ifmr->ifm_active |= IFM_HDX;
1732
1733         IGB_CORE_UNLOCK(adapter);
1734 }
1735
1736 /*********************************************************************
1737  *
1738  *  Media Ioctl callback
1739  *
1740  *  This routine is called when the user changes speed/duplex using
1741  *  media/mediopt option with ifconfig.
1742  *
1743  **********************************************************************/
1744 static int
1745 igb_media_change(struct ifnet *ifp)
1746 {
1747         struct adapter *adapter = ifp->if_softc;
1748         struct ifmedia  *ifm = &adapter->media;
1749
1750         INIT_DEBUGOUT("igb_media_change: begin");
1751
1752         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1753                 return (EINVAL);
1754
1755         IGB_CORE_LOCK(adapter);
1756         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1757         case IFM_AUTO:
1758                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1759                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1760                 break;
1761         case IFM_1000_LX:
1762         case IFM_1000_SX:
1763         case IFM_1000_T:
1764                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1765                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1766                 break;
1767         case IFM_100_TX:
1768                 adapter->hw.mac.autoneg = FALSE;
1769                 adapter->hw.phy.autoneg_advertised = 0;
1770                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1771                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1772                 else
1773                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1774                 break;
1775         case IFM_10_T:
1776                 adapter->hw.mac.autoneg = FALSE;
1777                 adapter->hw.phy.autoneg_advertised = 0;
1778                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1779                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1780                 else
1781                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1782                 break;
1783         default:
1784                 device_printf(adapter->dev, "Unsupported media type\n");
1785         }
1786
1787         igb_init_locked(adapter);
1788         IGB_CORE_UNLOCK(adapter);
1789
1790         return (0);
1791 }
1792
1793
1794 /*********************************************************************
1795  *
1796  *  This routine maps the mbufs to Advanced TX descriptors.
1797  *  
1798  **********************************************************************/
1799 static int
1800 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1801 {
1802         struct adapter          *adapter = txr->adapter;
1803         bus_dma_segment_t       segs[IGB_MAX_SCATTER];
1804         bus_dmamap_t            map;
1805         struct igb_tx_buffer    *tx_buffer, *tx_buffer_mapped;
1806         union e1000_adv_tx_desc *txd = NULL;
1807         struct mbuf             *m_head = *m_headp;
1808         struct ether_vlan_header *eh = NULL;
1809         struct ip               *ip = NULL;
1810         struct tcphdr           *th = NULL;
1811         u32                     hdrlen, cmd_type_len, olinfo_status = 0;
1812         int                     ehdrlen, poff;
1813         int                     nsegs, i, first, last = 0;
1814         int                     error, do_tso, remap = 1;
1815
1816         /* Set basic descriptor constants */
1817         cmd_type_len = E1000_ADVTXD_DTYP_DATA;
1818         cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1819         if (m_head->m_flags & M_VLANTAG)
1820                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1821
1822 retry:
1823         m_head = *m_headp;
1824         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1825         hdrlen = ehdrlen = poff = 0;
1826
1827         /*
1828          * Intel recommends entire IP/TCP header length reside in a single
1829          * buffer. If multiple descriptors are used to describe the IP and
1830          * TCP header, each descriptor should describe one or more
1831          * complete headers; descriptors referencing only parts of headers
1832          * are not supported. If all layer headers are not coalesced into
1833          * a single buffer, each buffer should not cross a 4KB boundary,
1834          * or be larger than the maximum read request size.
1835          * Controller also requires modifing IP/TCP header to make TSO work
1836          * so we firstly get a writable mbuf chain then coalesce ethernet/
1837          * IP/TCP header into a single buffer to meet the requirement of
1838          * controller. This also simplifies IP/TCP/UDP checksum offloading
1839          * which also has similiar restrictions.
1840          */
1841         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1842                 if (do_tso || (m_head->m_next != NULL && 
1843                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1844                         if (M_WRITABLE(*m_headp) == 0) {
1845                                 m_head = m_dup(*m_headp, M_NOWAIT);
1846                                 m_freem(*m_headp);
1847                                 if (m_head == NULL) {
1848                                         *m_headp = NULL;
1849                                         return (ENOBUFS);
1850                                 }
1851                                 *m_headp = m_head;
1852                         }
1853                 }
1854                 /*
1855                  * Assume IPv4, we don't have TSO/checksum offload support
1856                  * for IPv6 yet.
1857                  */
1858                 ehdrlen = sizeof(struct ether_header);
1859                 m_head = m_pullup(m_head, ehdrlen);
1860                 if (m_head == NULL) {
1861                         *m_headp = NULL;
1862                         return (ENOBUFS);
1863                 }
1864                 eh = mtod(m_head, struct ether_vlan_header *);
1865                 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1866                         ehdrlen = sizeof(struct ether_vlan_header);
1867                         m_head = m_pullup(m_head, ehdrlen);
1868                         if (m_head == NULL) {
1869                                 *m_headp = NULL;
1870                                 return (ENOBUFS);
1871                         }
1872                 }
1873                 m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip));
1874                 if (m_head == NULL) {
1875                         *m_headp = NULL;
1876                         return (ENOBUFS);
1877                 }
1878                 ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1879                 poff = ehdrlen + (ip->ip_hl << 2);
1880                 if (do_tso) {
1881                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1882                         if (m_head == NULL) {
1883                                 *m_headp = NULL;
1884                                 return (ENOBUFS);
1885                         }
1886                         /*
1887                          * The pseudo TCP checksum does not include TCP payload
1888                          * length so driver should recompute the checksum here
1889                          * what hardware expect to see. This is adherence of
1890                          * Microsoft's Large Send specification.
1891                          */
1892                         th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1893                         th->th_sum = in_pseudo(ip->ip_src.s_addr,
1894                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1895                         /* Keep track of the full header length */
1896                         hdrlen = poff + (th->th_off << 2);
1897                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1898                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1899                         if (m_head == NULL) {
1900                                 *m_headp = NULL;
1901                                 return (ENOBUFS);
1902                         }
1903                         th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1904                         m_head = m_pullup(m_head, poff + (th->th_off << 2));
1905                         if (m_head == NULL) {
1906                                 *m_headp = NULL;
1907                                 return (ENOBUFS);
1908                         }
1909                         ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1910                         th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1911                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1912                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1913                         if (m_head == NULL) {
1914                                 *m_headp = NULL;
1915                                 return (ENOBUFS);
1916                         }
1917                         ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1918                 }
1919                 *m_headp = m_head;
1920         }
1921
1922         /*
1923          * Map the packet for DMA
1924          *
1925          * Capture the first descriptor index,
1926          * this descriptor will have the index
1927          * of the EOP which is the only one that
1928          * now gets a DONE bit writeback.
1929          */
1930         first = txr->next_avail_desc;
1931         tx_buffer = &txr->tx_buffers[first];
1932         tx_buffer_mapped = tx_buffer;
1933         map = tx_buffer->map;
1934
1935         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1936             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1937
1938         /*
1939          * There are two types of errors we can (try) to handle:
1940          * - EFBIG means the mbuf chain was too long and bus_dma ran
1941          *   out of segments.  Defragment the mbuf chain and try again.
1942          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1943          *   at this point in time.  Defer sending and try again later.
1944          * All other errors, in particular EINVAL, are fatal and prevent the
1945          * mbuf chain from ever going through.  Drop it and report error.
1946          */
1947         if (error == EFBIG && remap) {
1948                 struct mbuf *m;
1949
1950                 m = m_defrag(*m_headp, M_NOWAIT);
1951                 if (m == NULL) {
1952                         adapter->mbuf_defrag_failed++;
1953                         m_freem(*m_headp);
1954                         *m_headp = NULL;
1955                         return (ENOBUFS);
1956                 }
1957                 *m_headp = m;
1958
1959                 /* Try it again, but only once */
1960                 remap = 0;
1961                 goto retry;
1962         } else if (error == ENOMEM) {
1963                 adapter->no_tx_dma_setup++;
1964                 return (error);
1965         } else if (error != 0) {
1966                 adapter->no_tx_dma_setup++;
1967                 m_freem(*m_headp);
1968                 *m_headp = NULL;
1969                 return (error);
1970         }
1971
1972         /*
1973         ** Make sure we don't overrun the ring,
1974         ** we need nsegs descriptors and one for
1975         ** the context descriptor used for the
1976         ** offloads.
1977         */
1978         if ((nsegs + 1) > (txr->tx_avail - 2)) {
1979                 txr->no_desc_avail++;
1980                 bus_dmamap_unload(txr->txtag, map);
1981                 return (ENOBUFS);
1982         }
1983         m_head = *m_headp;
1984
1985         /* Do hardware assists:
1986          * Set up the context descriptor, used
1987          * when any hardware offload is done.
1988          * This includes CSUM, VLAN, and TSO.
1989          * It will use the first descriptor.
1990          */
1991
1992         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1993                 if (igb_tso_setup(txr, m_head, ehdrlen, ip, th)) {
1994                         cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1995                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1996                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1997                 } else
1998                         return (ENXIO);
1999         } else if (igb_tx_ctx_setup(txr, m_head))
2000                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2001
2002         /* Calculate payload length */
2003         olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
2004             << E1000_ADVTXD_PAYLEN_SHIFT);
2005
2006         /* 82575 needs the queue index added */
2007         if (adapter->hw.mac.type == e1000_82575)
2008                 olinfo_status |= txr->me << 4;
2009
2010         /* Set up our transmit descriptors */
2011         i = txr->next_avail_desc;
2012         for (int j = 0; j < nsegs; j++) {
2013                 bus_size_t seg_len;
2014                 bus_addr_t seg_addr;
2015
2016                 tx_buffer = &txr->tx_buffers[i];
2017                 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
2018                 seg_addr = segs[j].ds_addr;
2019                 seg_len  = segs[j].ds_len;
2020
2021                 txd->read.buffer_addr = htole64(seg_addr);
2022                 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
2023                 txd->read.olinfo_status = htole32(olinfo_status);
2024                 last = i;
2025                 if (++i == adapter->num_tx_desc)
2026                         i = 0;
2027                 tx_buffer->m_head = NULL;
2028                 tx_buffer->next_eop = -1;
2029         }
2030
2031         txr->next_avail_desc = i;
2032         txr->tx_avail -= nsegs;
2033         tx_buffer->m_head = m_head;
2034
2035         /*
2036         ** Here we swap the map so the last descriptor,
2037         ** which gets the completion interrupt has the
2038         ** real map, and the first descriptor gets the
2039         ** unused map from this descriptor.
2040         */
2041         tx_buffer_mapped->map = tx_buffer->map;
2042         tx_buffer->map = map;
2043         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2044
2045         /*
2046          * Last Descriptor of Packet
2047          * needs End Of Packet (EOP)
2048          * and Report Status (RS)
2049          */
2050         txd->read.cmd_type_len |=
2051             htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
2052         /*
2053          * Keep track in the first buffer which
2054          * descriptor will be written back
2055          */
2056         tx_buffer = &txr->tx_buffers[first];
2057         tx_buffer->next_eop = last;
2058         /* Update the watchdog time early and often */
2059         txr->watchdog_time = ticks;
2060
2061         /*
2062          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2063          * that this frame is available to transmit.
2064          */
2065         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2066             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2067         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2068         ++txr->tx_packets;
2069
2070         return (0);
2071 }
2072 static void
2073 igb_set_promisc(struct adapter *adapter)
2074 {
2075         struct ifnet    *ifp = adapter->ifp;
2076         struct e1000_hw *hw = &adapter->hw;
2077         u32             reg;
2078
2079         if (adapter->vf_ifp) {
2080                 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
2081                 return;
2082         }
2083
2084         reg = E1000_READ_REG(hw, E1000_RCTL);
2085         if (ifp->if_flags & IFF_PROMISC) {
2086                 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2087                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
2088         } else if (ifp->if_flags & IFF_ALLMULTI) {
2089                 reg |= E1000_RCTL_MPE;
2090                 reg &= ~E1000_RCTL_UPE;
2091                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
2092         }
2093 }
2094
2095 static void
2096 igb_disable_promisc(struct adapter *adapter)
2097 {
2098         struct e1000_hw *hw = &adapter->hw;
2099         u32             reg;
2100
2101         if (adapter->vf_ifp) {
2102                 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2103                 return;
2104         }
2105         reg = E1000_READ_REG(hw, E1000_RCTL);
2106         reg &=  (~E1000_RCTL_UPE);
2107         reg &=  (~E1000_RCTL_MPE);
2108         E1000_WRITE_REG(hw, E1000_RCTL, reg);
2109 }
2110
2111
2112 /*********************************************************************
2113  *  Multicast Update
2114  *
2115  *  This routine is called whenever multicast address list is updated.
2116  *
2117  **********************************************************************/
2118
2119 static void
2120 igb_set_multi(struct adapter *adapter)
2121 {
2122         struct ifnet    *ifp = adapter->ifp;
2123         struct ifmultiaddr *ifma;
2124         u32 reg_rctl = 0;
2125         u8  *mta;
2126
2127         int mcnt = 0;
2128
2129         IOCTL_DEBUGOUT("igb_set_multi: begin");
2130
2131         mta = adapter->mta;
2132         bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2133             MAX_NUM_MULTICAST_ADDRESSES);
2134
2135 #if __FreeBSD_version < 800000
2136         IF_ADDR_LOCK(ifp);
2137 #else
2138         if_maddr_rlock(ifp);
2139 #endif
2140         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2141                 if (ifma->ifma_addr->sa_family != AF_LINK)
2142                         continue;
2143
2144                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2145                         break;
2146
2147                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2148                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2149                 mcnt++;
2150         }
2151 #if __FreeBSD_version < 800000
2152         IF_ADDR_UNLOCK(ifp);
2153 #else
2154         if_maddr_runlock(ifp);
2155 #endif
2156
2157         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2158                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2159                 reg_rctl |= E1000_RCTL_MPE;
2160                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2161         } else
2162                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2163 }
2164
2165
2166 /*********************************************************************
2167  *  Timer routine:
2168  *      This routine checks for link status,
2169  *      updates statistics, and does the watchdog.
2170  *
2171  **********************************************************************/
2172
2173 static void
2174 igb_local_timer(void *arg)
2175 {
2176         struct adapter          *adapter = arg;
2177         device_t                dev = adapter->dev;
2178         struct ifnet            *ifp = adapter->ifp;
2179         struct tx_ring          *txr = adapter->tx_rings;
2180         struct igb_queue        *que = adapter->queues;
2181         int                     hung = 0, busy = 0;
2182
2183
2184         IGB_CORE_LOCK_ASSERT(adapter);
2185
2186         igb_update_link_status(adapter);
2187         igb_update_stats_counters(adapter);
2188
2189         /*
2190         ** Check the TX queues status
2191         **      - central locked handling of OACTIVE
2192         **      - watchdog only if all queues show hung
2193         */
2194         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2195                 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2196                     (adapter->pause_frames == 0))
2197                         ++hung;
2198                 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2199                         ++busy;
2200                 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2201                         taskqueue_enqueue(que->tq, &que->que_task);
2202         }
2203         if (hung == adapter->num_queues)
2204                 goto timeout;
2205         if (busy == adapter->num_queues)
2206                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2207         else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2208             (busy < adapter->num_queues))
2209                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2210
2211         adapter->pause_frames = 0;
2212         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2213 #ifndef DEVICE_POLLING
2214         /* Schedule all queue interrupts - deadlock protection */
2215         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2216 #endif
2217         return;
2218
2219 timeout:
2220         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2221         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2222             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2223             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2224         device_printf(dev,"TX(%d) desc avail = %d,"
2225             "Next TX to Clean = %d\n",
2226             txr->me, txr->tx_avail, txr->next_to_clean);
2227         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2228         adapter->watchdog_events++;
2229         igb_init_locked(adapter);
2230 }
2231
2232 static void
2233 igb_update_link_status(struct adapter *adapter)
2234 {
2235         struct e1000_hw         *hw = &adapter->hw;
2236         struct e1000_fc_info    *fc = &hw->fc;
2237         struct ifnet            *ifp = adapter->ifp;
2238         device_t                dev = adapter->dev;
2239         struct tx_ring          *txr = adapter->tx_rings;
2240         u32                     link_check, thstat, ctrl;
2241         char                    *flowctl = NULL;
2242
2243         link_check = thstat = ctrl = 0;
2244
2245         /* Get the cached link value or read for real */
2246         switch (hw->phy.media_type) {
2247         case e1000_media_type_copper:
2248                 if (hw->mac.get_link_status) {
2249                         /* Do the work to read phy */
2250                         e1000_check_for_link(hw);
2251                         link_check = !hw->mac.get_link_status;
2252                 } else
2253                         link_check = TRUE;
2254                 break;
2255         case e1000_media_type_fiber:
2256                 e1000_check_for_link(hw);
2257                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2258                                  E1000_STATUS_LU);
2259                 break;
2260         case e1000_media_type_internal_serdes:
2261                 e1000_check_for_link(hw);
2262                 link_check = adapter->hw.mac.serdes_has_link;
2263                 break;
2264         /* VF device is type_unknown */
2265         case e1000_media_type_unknown:
2266                 e1000_check_for_link(hw);
2267                 link_check = !hw->mac.get_link_status;
2268                 /* Fall thru */
2269         default:
2270                 break;
2271         }
2272
2273         /* Check for thermal downshift or shutdown */
2274         if (hw->mac.type == e1000_i350) {
2275                 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2276                 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2277         }
2278
2279         /* Get the flow control for display */
2280         switch (fc->current_mode) {
2281         case e1000_fc_rx_pause:
2282                 flowctl = "RX";
2283                 break;  
2284         case e1000_fc_tx_pause:
2285                 flowctl = "TX";
2286                 break;  
2287         case e1000_fc_full:
2288                 flowctl = "Full";
2289                 break;  
2290         case e1000_fc_none:
2291         default:
2292                 flowctl = "None";
2293                 break;  
2294         }
2295
2296         /* Now we check if a transition has happened */
2297         if (link_check && (adapter->link_active == 0)) {
2298                 e1000_get_speed_and_duplex(&adapter->hw, 
2299                     &adapter->link_speed, &adapter->link_duplex);
2300                 if (bootverbose)
2301                         device_printf(dev, "Link is up %d Mbps %s,"
2302                             " Flow Control: %s\n",
2303                             adapter->link_speed,
2304                             ((adapter->link_duplex == FULL_DUPLEX) ?
2305                             "Full Duplex" : "Half Duplex"), flowctl);
2306                 adapter->link_active = 1;
2307                 ifp->if_baudrate = adapter->link_speed * 1000000;
2308                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2309                     (thstat & E1000_THSTAT_LINK_THROTTLE))
2310                         device_printf(dev, "Link: thermal downshift\n");
2311                 /* This can sleep */
2312                 if_link_state_change(ifp, LINK_STATE_UP);
2313         } else if (!link_check && (adapter->link_active == 1)) {
2314                 ifp->if_baudrate = adapter->link_speed = 0;
2315                 adapter->link_duplex = 0;
2316                 if (bootverbose)
2317                         device_printf(dev, "Link is Down\n");
2318                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2319                     (thstat & E1000_THSTAT_PWR_DOWN))
2320                         device_printf(dev, "Link: thermal shutdown\n");
2321                 adapter->link_active = 0;
2322                 /* This can sleep */
2323                 if_link_state_change(ifp, LINK_STATE_DOWN);
2324                 /* Reset queue state */
2325                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2326                         txr->queue_status = IGB_QUEUE_IDLE;
2327         }
2328 }
2329
2330 /*********************************************************************
2331  *
2332  *  This routine disables all traffic on the adapter by issuing a
2333  *  global reset on the MAC and deallocates TX/RX buffers.
2334  *
2335  **********************************************************************/
2336
2337 static void
2338 igb_stop(void *arg)
2339 {
2340         struct adapter  *adapter = arg;
2341         struct ifnet    *ifp = adapter->ifp;
2342         struct tx_ring *txr = adapter->tx_rings;
2343
2344         IGB_CORE_LOCK_ASSERT(adapter);
2345
2346         INIT_DEBUGOUT("igb_stop: begin");
2347
2348         igb_disable_intr(adapter);
2349
2350         callout_stop(&adapter->timer);
2351
2352         /* Tell the stack that the interface is no longer active */
2353         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2354         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2355
2356         /* Disarm watchdog timer. */
2357         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2358                 IGB_TX_LOCK(txr);
2359                 txr->queue_status = IGB_QUEUE_IDLE;
2360                 IGB_TX_UNLOCK(txr);
2361         }
2362
2363         e1000_reset_hw(&adapter->hw);
2364         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2365
2366         e1000_led_off(&adapter->hw);
2367         e1000_cleanup_led(&adapter->hw);
2368 }
2369
2370
2371 /*********************************************************************
2372  *
2373  *  Determine hardware revision.
2374  *
2375  **********************************************************************/
2376 static void
2377 igb_identify_hardware(struct adapter *adapter)
2378 {
2379         device_t dev = adapter->dev;
2380
2381         /* Make sure our PCI config space has the necessary stuff set */
2382         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2383         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2384             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2385                 INIT_DEBUGOUT("Memory Access and/or Bus Master "
2386                     "bits were not set!\n");
2387                 adapter->hw.bus.pci_cmd_word |=
2388                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2389                 pci_write_config(dev, PCIR_COMMAND,
2390                     adapter->hw.bus.pci_cmd_word, 2);
2391         }
2392
2393         /* Save off the information about this board */
2394         adapter->hw.vendor_id = pci_get_vendor(dev);
2395         adapter->hw.device_id = pci_get_device(dev);
2396         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2397         adapter->hw.subsystem_vendor_id =
2398             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2399         adapter->hw.subsystem_device_id =
2400             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2401
2402         /* Set MAC type early for PCI setup */
2403         e1000_set_mac_type(&adapter->hw);
2404
2405         /* Are we a VF device? */
2406         if ((adapter->hw.mac.type == e1000_vfadapt) ||
2407             (adapter->hw.mac.type == e1000_vfadapt_i350))
2408                 adapter->vf_ifp = 1;
2409         else
2410                 adapter->vf_ifp = 0;
2411 }
2412
2413 static int
2414 igb_allocate_pci_resources(struct adapter *adapter)
2415 {
2416         device_t        dev = adapter->dev;
2417         int             rid;
2418
2419         rid = PCIR_BAR(0);
2420         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2421             &rid, RF_ACTIVE);
2422         if (adapter->pci_mem == NULL) {
2423                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2424                 return (ENXIO);
2425         }
2426         adapter->osdep.mem_bus_space_tag =
2427             rman_get_bustag(adapter->pci_mem);
2428         adapter->osdep.mem_bus_space_handle =
2429             rman_get_bushandle(adapter->pci_mem);
2430         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2431
2432         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2433
2434         /* This will setup either MSI/X or MSI */
2435         adapter->msix = igb_setup_msix(adapter);
2436         adapter->hw.back = &adapter->osdep;
2437
2438         return (0);
2439 }
2440
2441 /*********************************************************************
2442  *
2443  *  Setup the Legacy or MSI Interrupt handler
2444  *
2445  **********************************************************************/
2446 static int
2447 igb_allocate_legacy(struct adapter *adapter)
2448 {
2449         device_t                dev = adapter->dev;
2450         struct igb_queue        *que = adapter->queues;
2451         struct tx_ring          *txr = adapter->tx_rings;
2452         int                     error, rid = 0;
2453
2454         /* Turn off all interrupts */
2455         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2456
2457         /* MSI RID is 1 */
2458         if (adapter->msix == 1)
2459                 rid = 1;
2460
2461         /* We allocate a single interrupt resource */
2462         adapter->res = bus_alloc_resource_any(dev,
2463             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2464         if (adapter->res == NULL) {
2465                 device_printf(dev, "Unable to allocate bus resource: "
2466                     "interrupt\n");
2467                 return (ENXIO);
2468         }
2469
2470 #if __FreeBSD_version >= 800000
2471         TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2472 #endif
2473
2474         /*
2475          * Try allocating a fast interrupt and the associated deferred
2476          * processing contexts.
2477          */
2478         TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2479         /* Make tasklet for deferred link handling */
2480         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2481         que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2482             taskqueue_thread_enqueue, &que->tq);
2483         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2484             device_get_nameunit(adapter->dev));
2485         if ((error = bus_setup_intr(dev, adapter->res,
2486             INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2487             adapter, &adapter->tag)) != 0) {
2488                 device_printf(dev, "Failed to register fast interrupt "
2489                             "handler: %d\n", error);
2490                 taskqueue_free(que->tq);
2491                 que->tq = NULL;
2492                 return (error);
2493         }
2494
2495         return (0);
2496 }
2497
2498
2499 /*********************************************************************
2500  *
2501  *  Setup the MSIX Queue Interrupt handlers: 
2502  *
2503  **********************************************************************/
2504 static int
2505 igb_allocate_msix(struct adapter *adapter)
2506 {
2507         device_t                dev = adapter->dev;
2508         struct igb_queue        *que = adapter->queues;
2509         int                     error, rid, vector = 0;
2510
2511         /* Be sure to start with all interrupts disabled */
2512         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2513         E1000_WRITE_FLUSH(&adapter->hw);
2514
2515         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2516                 rid = vector +1;
2517                 que->res = bus_alloc_resource_any(dev,
2518                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2519                 if (que->res == NULL) {
2520                         device_printf(dev,
2521                             "Unable to allocate bus resource: "
2522                             "MSIX Queue Interrupt\n");
2523                         return (ENXIO);
2524                 }
2525                 error = bus_setup_intr(dev, que->res,
2526                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2527                     igb_msix_que, que, &que->tag);
2528                 if (error) {
2529                         que->res = NULL;
2530                         device_printf(dev, "Failed to register Queue handler");
2531                         return (error);
2532                 }
2533 #if __FreeBSD_version >= 800504
2534                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2535 #endif
2536                 que->msix = vector;
2537                 if (adapter->hw.mac.type == e1000_82575)
2538                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2539                 else
2540                         que->eims = 1 << vector;
2541                 /*
2542                 ** Bind the msix vector, and thus the
2543                 ** rings to the corresponding cpu.
2544                 */
2545                 if (adapter->num_queues > 1) {
2546                         if (igb_last_bind_cpu < 0)
2547                                 igb_last_bind_cpu = CPU_FIRST();
2548                         bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2549                         device_printf(dev,
2550                                 "Bound queue %d to cpu %d\n",
2551                                 i,igb_last_bind_cpu);
2552                         igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2553                 }
2554 #if __FreeBSD_version >= 800000
2555                 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2556                     que->txr);
2557 #endif
2558                 /* Make tasklet for deferred handling */
2559                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2560                 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2561                     taskqueue_thread_enqueue, &que->tq);
2562                 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2563                     device_get_nameunit(adapter->dev));
2564         }
2565
2566         /* And Link */
2567         rid = vector + 1;
2568         adapter->res = bus_alloc_resource_any(dev,
2569             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2570         if (adapter->res == NULL) {
2571                 device_printf(dev,
2572                     "Unable to allocate bus resource: "
2573                     "MSIX Link Interrupt\n");
2574                 return (ENXIO);
2575         }
2576         if ((error = bus_setup_intr(dev, adapter->res,
2577             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2578             igb_msix_link, adapter, &adapter->tag)) != 0) {
2579                 device_printf(dev, "Failed to register Link handler");
2580                 return (error);
2581         }
2582 #if __FreeBSD_version >= 800504
2583         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2584 #endif
2585         adapter->linkvec = vector;
2586
2587         return (0);
2588 }
2589
2590
2591 static void
2592 igb_configure_queues(struct adapter *adapter)
2593 {
2594         struct  e1000_hw        *hw = &adapter->hw;
2595         struct  igb_queue       *que;
2596         u32                     tmp, ivar = 0, newitr = 0;
2597
2598         /* First turn on RSS capability */
2599         if (adapter->hw.mac.type != e1000_82575)
2600                 E1000_WRITE_REG(hw, E1000_GPIE,
2601                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2602                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2603
2604         /* Turn on MSIX */
2605         switch (adapter->hw.mac.type) {
2606         case e1000_82580:
2607         case e1000_i350:
2608         case e1000_i210:
2609         case e1000_i211:
2610         case e1000_vfadapt:
2611         case e1000_vfadapt_i350:
2612                 /* RX entries */
2613                 for (int i = 0; i < adapter->num_queues; i++) {
2614                         u32 index = i >> 1;
2615                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2616                         que = &adapter->queues[i];
2617                         if (i & 1) {
2618                                 ivar &= 0xFF00FFFF;
2619                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2620                         } else {
2621                                 ivar &= 0xFFFFFF00;
2622                                 ivar |= que->msix | E1000_IVAR_VALID;
2623                         }
2624                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2625                 }
2626                 /* TX entries */
2627                 for (int i = 0; i < adapter->num_queues; i++) {
2628                         u32 index = i >> 1;
2629                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2630                         que = &adapter->queues[i];
2631                         if (i & 1) {
2632                                 ivar &= 0x00FFFFFF;
2633                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2634                         } else {
2635                                 ivar &= 0xFFFF00FF;
2636                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2637                         }
2638                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2639                         adapter->que_mask |= que->eims;
2640                 }
2641
2642                 /* And for the link interrupt */
2643                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2644                 adapter->link_mask = 1 << adapter->linkvec;
2645                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2646                 break;
2647         case e1000_82576:
2648                 /* RX entries */
2649                 for (int i = 0; i < adapter->num_queues; i++) {
2650                         u32 index = i & 0x7; /* Each IVAR has two entries */
2651                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2652                         que = &adapter->queues[i];
2653                         if (i < 8) {
2654                                 ivar &= 0xFFFFFF00;
2655                                 ivar |= que->msix | E1000_IVAR_VALID;
2656                         } else {
2657                                 ivar &= 0xFF00FFFF;
2658                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2659                         }
2660                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2661                         adapter->que_mask |= que->eims;
2662                 }
2663                 /* TX entries */
2664                 for (int i = 0; i < adapter->num_queues; i++) {
2665                         u32 index = i & 0x7; /* Each IVAR has two entries */
2666                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2667                         que = &adapter->queues[i];
2668                         if (i < 8) {
2669                                 ivar &= 0xFFFF00FF;
2670                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2671                         } else {
2672                                 ivar &= 0x00FFFFFF;
2673                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2674                         }
2675                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2676                         adapter->que_mask |= que->eims;
2677                 }
2678
2679                 /* And for the link interrupt */
2680                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2681                 adapter->link_mask = 1 << adapter->linkvec;
2682                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2683                 break;
2684
2685         case e1000_82575:
2686                 /* enable MSI-X support*/
2687                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2688                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2689                 /* Auto-Mask interrupts upon ICR read. */
2690                 tmp |= E1000_CTRL_EXT_EIAME;
2691                 tmp |= E1000_CTRL_EXT_IRCA;
2692                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2693
2694                 /* Queues */
2695                 for (int i = 0; i < adapter->num_queues; i++) {
2696                         que = &adapter->queues[i];
2697                         tmp = E1000_EICR_RX_QUEUE0 << i;
2698                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2699                         que->eims = tmp;
2700                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2701                             i, que->eims);
2702                         adapter->que_mask |= que->eims;
2703                 }
2704
2705                 /* Link */
2706                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2707                     E1000_EIMS_OTHER);
2708                 adapter->link_mask |= E1000_EIMS_OTHER;
2709         default:
2710                 break;
2711         }
2712
2713         /* Set the starting interrupt rate */
2714         if (igb_max_interrupt_rate > 0)
2715                 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2716
2717         if (hw->mac.type == e1000_82575)
2718                 newitr |= newitr << 16;
2719         else
2720                 newitr |= E1000_EITR_CNT_IGNR;
2721
2722         for (int i = 0; i < adapter->num_queues; i++) {
2723                 que = &adapter->queues[i];
2724                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2725         }
2726
2727         return;
2728 }
2729
2730
2731 static void
2732 igb_free_pci_resources(struct adapter *adapter)
2733 {
2734         struct          igb_queue *que = adapter->queues;
2735         device_t        dev = adapter->dev;
2736         int             rid;
2737
2738         /*
2739         ** There is a slight possibility of a failure mode
2740         ** in attach that will result in entering this function
2741         ** before interrupt resources have been initialized, and
2742         ** in that case we do not want to execute the loops below
2743         ** We can detect this reliably by the state of the adapter
2744         ** res pointer.
2745         */
2746         if (adapter->res == NULL)
2747                 goto mem;
2748
2749         /*
2750          * First release all the interrupt resources:
2751          */
2752         for (int i = 0; i < adapter->num_queues; i++, que++) {
2753                 rid = que->msix + 1;
2754                 if (que->tag != NULL) {
2755                         bus_teardown_intr(dev, que->res, que->tag);
2756                         que->tag = NULL;
2757                 }
2758                 if (que->res != NULL)
2759                         bus_release_resource(dev,
2760                             SYS_RES_IRQ, rid, que->res);
2761         }
2762
2763         /* Clean the Legacy or Link interrupt last */
2764         if (adapter->linkvec) /* we are doing MSIX */
2765                 rid = adapter->linkvec + 1;
2766         else
2767                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2768
2769         que = adapter->queues;
2770         if (adapter->tag != NULL) {
2771                 taskqueue_drain(que->tq, &adapter->link_task);
2772                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2773                 adapter->tag = NULL;
2774         }
2775         if (adapter->res != NULL)
2776                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2777
2778         for (int i = 0; i < adapter->num_queues; i++, que++) {
2779                 if (que->tq != NULL) {
2780 #if __FreeBSD_version >= 800000
2781                         taskqueue_drain(que->tq, &que->txr->txq_task);
2782 #endif
2783                         taskqueue_drain(que->tq, &que->que_task);
2784                         taskqueue_free(que->tq);
2785                 }
2786         }
2787 mem:
2788         if (adapter->msix)
2789                 pci_release_msi(dev);
2790
2791         if (adapter->msix_mem != NULL)
2792                 bus_release_resource(dev, SYS_RES_MEMORY,
2793                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2794
2795         if (adapter->pci_mem != NULL)
2796                 bus_release_resource(dev, SYS_RES_MEMORY,
2797                     PCIR_BAR(0), adapter->pci_mem);
2798
2799 }
2800
2801 /*
2802  * Setup Either MSI/X or MSI
2803  */
2804 static int
2805 igb_setup_msix(struct adapter *adapter)
2806 {
2807         device_t dev = adapter->dev;
2808         int rid, want, queues, msgs, maxqueues;
2809
2810         /* tuneable override */
2811         if (igb_enable_msix == 0)
2812                 goto msi;
2813
2814         /* First try MSI/X */
2815         rid = PCIR_BAR(IGB_MSIX_BAR);
2816         adapter->msix_mem = bus_alloc_resource_any(dev,
2817             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2818         if (!adapter->msix_mem) {
2819                 /* May not be enabled */
2820                 device_printf(adapter->dev,
2821                     "Unable to map MSIX table \n");
2822                 goto msi;
2823         }
2824
2825         msgs = pci_msix_count(dev); 
2826         if (msgs == 0) { /* system has msix disabled */
2827                 bus_release_resource(dev, SYS_RES_MEMORY,
2828                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2829                 adapter->msix_mem = NULL;
2830                 goto msi;
2831         }
2832
2833         /* Figure out a reasonable auto config value */
2834         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2835
2836         /* Manual override */
2837         if (igb_num_queues != 0)
2838                 queues = igb_num_queues;
2839
2840         /* Sanity check based on HW */
2841         switch (adapter->hw.mac.type) {
2842                 case e1000_82575:
2843                         maxqueues = 4;
2844                         break;
2845                 case e1000_82576:
2846                 case e1000_82580:
2847                 case e1000_i350:
2848                         maxqueues = 8;
2849                         break;
2850                 case e1000_i210:
2851                         maxqueues = 4;
2852                         break;
2853                 case e1000_i211:
2854                         maxqueues = 2;
2855                         break;
2856                 default:  /* VF interfaces */
2857                         maxqueues = 1;
2858                         break;
2859         }
2860         if (queues > maxqueues)
2861                 queues = maxqueues;
2862
2863         /*
2864         ** One vector (RX/TX pair) per queue
2865         ** plus an additional for Link interrupt
2866         */
2867         want = queues + 1;
2868         if (msgs >= want)
2869                 msgs = want;
2870         else {
2871                 device_printf(adapter->dev,
2872                     "MSIX Configuration Problem, "
2873                     "%d vectors configured, but %d queues wanted!\n",
2874                     msgs, want);
2875                 return (0);
2876         }
2877         if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2878                 device_printf(adapter->dev,
2879                     "Using MSIX interrupts with %d vectors\n", msgs);
2880                 adapter->num_queues = queues;
2881                 return (msgs);
2882         }
2883 msi:
2884         msgs = pci_msi_count(dev);
2885         if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0) {
2886                 device_printf(adapter->dev," Using MSI interrupt\n");
2887                 return (msgs);
2888         }
2889         return (0);
2890 }
2891
2892 /*********************************************************************
2893  *
2894  *  Set up an fresh starting state
2895  *
2896  **********************************************************************/
2897 static void
2898 igb_reset(struct adapter *adapter)
2899 {
2900         device_t        dev = adapter->dev;
2901         struct e1000_hw *hw = &adapter->hw;
2902         struct e1000_fc_info *fc = &hw->fc;
2903         struct ifnet    *ifp = adapter->ifp;
2904         u32             pba = 0;
2905         u16             hwm;
2906
2907         INIT_DEBUGOUT("igb_reset: begin");
2908
2909         /* Let the firmware know the OS is in control */
2910         igb_get_hw_control(adapter);
2911
2912         /*
2913          * Packet Buffer Allocation (PBA)
2914          * Writing PBA sets the receive portion of the buffer
2915          * the remainder is used for the transmit buffer.
2916          */
2917         switch (hw->mac.type) {
2918         case e1000_82575:
2919                 pba = E1000_PBA_32K;
2920                 break;
2921         case e1000_82576:
2922         case e1000_vfadapt:
2923                 pba = E1000_READ_REG(hw, E1000_RXPBS);
2924                 pba &= E1000_RXPBS_SIZE_MASK_82576;
2925                 break;
2926         case e1000_82580:
2927         case e1000_i350:
2928         case e1000_vfadapt_i350:
2929                 pba = E1000_READ_REG(hw, E1000_RXPBS);
2930                 pba = e1000_rxpbs_adjust_82580(pba);
2931                 break;
2932         case e1000_i210:
2933         case e1000_i211:
2934                 pba = E1000_PBA_34K;
2935         default:
2936                 break;
2937         }
2938
2939         /* Special needs in case of Jumbo frames */
2940         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2941                 u32 tx_space, min_tx, min_rx;
2942                 pba = E1000_READ_REG(hw, E1000_PBA);
2943                 tx_space = pba >> 16;
2944                 pba &= 0xffff;
2945                 min_tx = (adapter->max_frame_size +
2946                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2947                 min_tx = roundup2(min_tx, 1024);
2948                 min_tx >>= 10;
2949                 min_rx = adapter->max_frame_size;
2950                 min_rx = roundup2(min_rx, 1024);
2951                 min_rx >>= 10;
2952                 if (tx_space < min_tx &&
2953                     ((min_tx - tx_space) < pba)) {
2954                         pba = pba - (min_tx - tx_space);
2955                         /*
2956                          * if short on rx space, rx wins
2957                          * and must trump tx adjustment
2958                          */
2959                         if (pba < min_rx)
2960                                 pba = min_rx;
2961                 }
2962                 E1000_WRITE_REG(hw, E1000_PBA, pba);
2963         }
2964
2965         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2966
2967         /*
2968          * These parameters control the automatic generation (Tx) and
2969          * response (Rx) to Ethernet PAUSE frames.
2970          * - High water mark should allow for at least two frames to be
2971          *   received after sending an XOFF.
2972          * - Low water mark works best when it is very near the high water mark.
2973          *   This allows the receiver to restart by sending XON when it has
2974          *   drained a bit.
2975          */
2976         hwm = min(((pba << 10) * 9 / 10),
2977             ((pba << 10) - 2 * adapter->max_frame_size));
2978
2979         if (hw->mac.type < e1000_82576) {
2980                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2981                 fc->low_water = fc->high_water - 8;
2982         } else {
2983                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2984                 fc->low_water = fc->high_water - 16;
2985         }
2986
2987         fc->pause_time = IGB_FC_PAUSE_TIME;
2988         fc->send_xon = TRUE;
2989         if (adapter->fc)
2990                 fc->requested_mode = adapter->fc;
2991         else
2992                 fc->requested_mode = e1000_fc_default;
2993
2994         /* Issue a global reset */
2995         e1000_reset_hw(hw);
2996         E1000_WRITE_REG(hw, E1000_WUC, 0);
2997
2998         if (e1000_init_hw(hw) < 0)
2999                 device_printf(dev, "Hardware Initialization Failed\n");
3000
3001         /* Setup DMA Coalescing */
3002         if ((hw->mac.type > e1000_82580) &&
3003             (hw->mac.type != e1000_i211)) {
3004                 u32 dmac;
3005                 u32 reg = ~E1000_DMACR_DMAC_EN;
3006
3007                 if (adapter->dmac == 0) { /* Disabling it */
3008                         E1000_WRITE_REG(hw, E1000_DMACR, reg);
3009                         goto reset_out;
3010                 }
3011
3012                 /* Set starting thresholds */
3013                 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
3014                 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
3015
3016                 hwm = 64 * pba - adapter->max_frame_size / 16;
3017                 if (hwm < 64 * (pba - 6))
3018                         hwm = 64 * (pba - 6);
3019                 reg = E1000_READ_REG(hw, E1000_FCRTC);
3020                 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
3021                 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
3022                     & E1000_FCRTC_RTH_COAL_MASK);
3023                 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
3024
3025
3026                 dmac = pba - adapter->max_frame_size / 512;
3027                 if (dmac < pba - 10)
3028                         dmac = pba - 10;
3029                 reg = E1000_READ_REG(hw, E1000_DMACR);
3030                 reg &= ~E1000_DMACR_DMACTHR_MASK;
3031                 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
3032                     & E1000_DMACR_DMACTHR_MASK);
3033                 /* transition to L0x or L1 if available..*/
3034                 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
3035                 /* timer = value in adapter->dmac in 32usec intervals */
3036                 reg |= (adapter->dmac >> 5);
3037                 E1000_WRITE_REG(hw, E1000_DMACR, reg);
3038
3039                 /* Set the interval before transition */
3040                 reg = E1000_READ_REG(hw, E1000_DMCTLX);
3041                 reg |= 0x80000004;
3042                 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3043
3044                 /* free space in tx packet buffer to wake from DMA coal */
3045                 E1000_WRITE_REG(hw, E1000_DMCTXTH,
3046                     (20480 - (2 * adapter->max_frame_size)) >> 6);
3047
3048                 /* make low power state decision controlled by DMA coal */
3049                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3050                 reg &= ~E1000_PCIEMISC_LX_DECISION;
3051                 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3052                 device_printf(dev, "DMA Coalescing enabled\n");
3053
3054         } else if (hw->mac.type == e1000_82580) {
3055                 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3056                 E1000_WRITE_REG(hw, E1000_DMACR, 0);
3057                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
3058                     reg & ~E1000_PCIEMISC_LX_DECISION);
3059         }
3060
3061 reset_out:
3062         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3063         e1000_get_phy_info(hw);
3064         e1000_check_for_link(hw);
3065         return;
3066 }
3067
3068 /*********************************************************************
3069  *
3070  *  Setup networking device structure and register an interface.
3071  *
3072  **********************************************************************/
3073 static int
3074 igb_setup_interface(device_t dev, struct adapter *adapter)
3075 {
3076         struct ifnet   *ifp;
3077
3078         INIT_DEBUGOUT("igb_setup_interface: begin");
3079
3080         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3081         if (ifp == NULL) {
3082                 device_printf(dev, "can not allocate ifnet structure\n");
3083                 return (-1);
3084         }
3085         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3086         ifp->if_init =  igb_init;
3087         ifp->if_softc = adapter;
3088         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3089         ifp->if_ioctl = igb_ioctl;
3090 #if __FreeBSD_version >= 800000
3091         ifp->if_transmit = igb_mq_start;
3092         ifp->if_qflush = igb_qflush;
3093 #else
3094         ifp->if_start = igb_start;
3095         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3096         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3097         IFQ_SET_READY(&ifp->if_snd);
3098 #endif
3099
3100         ether_ifattach(ifp, adapter->hw.mac.addr);
3101
3102         ifp->if_capabilities = ifp->if_capenable = 0;
3103
3104         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3105         ifp->if_capabilities |= IFCAP_TSO4;
3106         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3107         ifp->if_capenable = ifp->if_capabilities;
3108
3109         /* Don't enable LRO by default */
3110         ifp->if_capabilities |= IFCAP_LRO;
3111
3112 #ifdef DEVICE_POLLING
3113         ifp->if_capabilities |= IFCAP_POLLING;
3114 #endif
3115
3116         /*
3117          * Tell the upper layer(s) we
3118          * support full VLAN capability.
3119          */
3120         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3121         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3122                              |  IFCAP_VLAN_HWTSO
3123                              |  IFCAP_VLAN_MTU;
3124         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3125                           |  IFCAP_VLAN_HWTSO
3126                           |  IFCAP_VLAN_MTU;
3127
3128         /*
3129         ** Don't turn this on by default, if vlans are
3130         ** created on another pseudo device (eg. lagg)
3131         ** then vlan events are not passed thru, breaking
3132         ** operation, but with HW FILTER off it works. If
3133         ** using vlans directly on the igb driver you can
3134         ** enable this and get full hardware tag filtering.
3135         */
3136         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3137
3138         /*
3139          * Specify the media types supported by this adapter and register
3140          * callbacks to update media and link information
3141          */
3142         ifmedia_init(&adapter->media, IFM_IMASK,
3143             igb_media_change, igb_media_status);
3144         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3145             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3146                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
3147                             0, NULL);
3148                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3149         } else {
3150                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3151                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3152                             0, NULL);
3153                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3154                             0, NULL);
3155                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3156                             0, NULL);
3157                 if (adapter->hw.phy.type != e1000_phy_ife) {
3158                         ifmedia_add(&adapter->media,
3159                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3160                         ifmedia_add(&adapter->media,
3161                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3162                 }
3163         }
3164         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3165         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3166         return (0);
3167 }
3168
3169
3170 /*
3171  * Manage DMA'able memory.
3172  */
3173 static void
3174 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3175 {
3176         if (error)
3177                 return;
3178         *(bus_addr_t *) arg = segs[0].ds_addr;
3179 }
3180
3181 static int
3182 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3183         struct igb_dma_alloc *dma, int mapflags)
3184 {
3185         int error;
3186
3187         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3188                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
3189                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3190                                 BUS_SPACE_MAXADDR,      /* highaddr */
3191                                 NULL, NULL,             /* filter, filterarg */
3192                                 size,                   /* maxsize */
3193                                 1,                      /* nsegments */
3194                                 size,                   /* maxsegsize */
3195                                 0,                      /* flags */
3196                                 NULL,                   /* lockfunc */
3197                                 NULL,                   /* lockarg */
3198                                 &dma->dma_tag);
3199         if (error) {
3200                 device_printf(adapter->dev,
3201                     "%s: bus_dma_tag_create failed: %d\n",
3202                     __func__, error);
3203                 goto fail_0;
3204         }
3205
3206         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3207             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3208         if (error) {
3209                 device_printf(adapter->dev,
3210                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3211                     __func__, (uintmax_t)size, error);
3212                 goto fail_2;
3213         }
3214
3215         dma->dma_paddr = 0;
3216         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3217             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3218         if (error || dma->dma_paddr == 0) {
3219                 device_printf(adapter->dev,
3220                     "%s: bus_dmamap_load failed: %d\n",
3221                     __func__, error);
3222                 goto fail_3;
3223         }
3224
3225         return (0);
3226
3227 fail_3:
3228         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3229 fail_2:
3230         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3231         bus_dma_tag_destroy(dma->dma_tag);
3232 fail_0:
3233         dma->dma_map = NULL;
3234         dma->dma_tag = NULL;
3235
3236         return (error);
3237 }
3238
3239 static void
3240 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3241 {
3242         if (dma->dma_tag == NULL)
3243                 return;
3244         if (dma->dma_map != NULL) {
3245                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3246                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3247                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3248                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3249                 dma->dma_map = NULL;
3250         }
3251         bus_dma_tag_destroy(dma->dma_tag);
3252         dma->dma_tag = NULL;
3253 }
3254
3255
3256 /*********************************************************************
3257  *
3258  *  Allocate memory for the transmit and receive rings, and then
3259  *  the descriptors associated with each, called only once at attach.
3260  *
3261  **********************************************************************/
3262 static int
3263 igb_allocate_queues(struct adapter *adapter)
3264 {
3265         device_t dev = adapter->dev;
3266         struct igb_queue        *que = NULL;
3267         struct tx_ring          *txr = NULL;
3268         struct rx_ring          *rxr = NULL;
3269         int rsize, tsize, error = E1000_SUCCESS;
3270         int txconf = 0, rxconf = 0;
3271
3272         /* First allocate the top level queue structs */
3273         if (!(adapter->queues =
3274             (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3275             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3276                 device_printf(dev, "Unable to allocate queue memory\n");
3277                 error = ENOMEM;
3278                 goto fail;
3279         }
3280
3281         /* Next allocate the TX ring struct memory */
3282         if (!(adapter->tx_rings =
3283             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3284             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3285                 device_printf(dev, "Unable to allocate TX ring memory\n");
3286                 error = ENOMEM;
3287                 goto tx_fail;
3288         }
3289
3290         /* Now allocate the RX */
3291         if (!(adapter->rx_rings =
3292             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3293             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3294                 device_printf(dev, "Unable to allocate RX ring memory\n");
3295                 error = ENOMEM;
3296                 goto rx_fail;
3297         }
3298
3299         tsize = roundup2(adapter->num_tx_desc *
3300             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3301         /*
3302          * Now set up the TX queues, txconf is needed to handle the
3303          * possibility that things fail midcourse and we need to
3304          * undo memory gracefully
3305          */ 
3306         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3307                 /* Set up some basics */
3308                 txr = &adapter->tx_rings[i];
3309                 txr->adapter = adapter;
3310                 txr->me = i;
3311
3312                 /* Initialize the TX lock */
3313                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3314                     device_get_nameunit(dev), txr->me);
3315                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3316
3317                 if (igb_dma_malloc(adapter, tsize,
3318                         &txr->txdma, BUS_DMA_NOWAIT)) {
3319                         device_printf(dev,
3320                             "Unable to allocate TX Descriptor memory\n");
3321                         error = ENOMEM;
3322                         goto err_tx_desc;
3323                 }
3324                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3325                 bzero((void *)txr->tx_base, tsize);
3326
3327                 /* Now allocate transmit buffers for the ring */
3328                 if (igb_allocate_transmit_buffers(txr)) {
3329                         device_printf(dev,
3330                             "Critical Failure setting up transmit buffers\n");
3331                         error = ENOMEM;
3332                         goto err_tx_desc;
3333                 }
3334 #if __FreeBSD_version >= 800000
3335                 /* Allocate a buf ring */
3336                 txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3337                     M_WAITOK, &txr->tx_mtx);
3338 #endif
3339         }
3340
3341         /*
3342          * Next the RX queues...
3343          */ 
3344         rsize = roundup2(adapter->num_rx_desc *
3345             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3346         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3347                 rxr = &adapter->rx_rings[i];
3348                 rxr->adapter = adapter;
3349                 rxr->me = i;
3350
3351                 /* Initialize the RX lock */
3352                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3353                     device_get_nameunit(dev), txr->me);
3354                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3355
3356                 if (igb_dma_malloc(adapter, rsize,
3357                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3358                         device_printf(dev,
3359                             "Unable to allocate RxDescriptor memory\n");
3360                         error = ENOMEM;
3361                         goto err_rx_desc;
3362                 }
3363                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3364                 bzero((void *)rxr->rx_base, rsize);
3365
3366                 /* Allocate receive buffers for the ring*/
3367                 if (igb_allocate_receive_buffers(rxr)) {
3368                         device_printf(dev,
3369                             "Critical Failure setting up receive buffers\n");
3370                         error = ENOMEM;
3371                         goto err_rx_desc;
3372                 }
3373         }
3374
3375         /*
3376         ** Finally set up the queue holding structs
3377         */
3378         for (int i = 0; i < adapter->num_queues; i++) {
3379                 que = &adapter->queues[i];
3380                 que->adapter = adapter;
3381                 que->txr = &adapter->tx_rings[i];
3382                 que->rxr = &adapter->rx_rings[i];
3383         }
3384
3385         return (0);
3386
3387 err_rx_desc:
3388         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3389                 igb_dma_free(adapter, &rxr->rxdma);
3390 err_tx_desc:
3391         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3392                 igb_dma_free(adapter, &txr->txdma);
3393         free(adapter->rx_rings, M_DEVBUF);
3394 rx_fail:
3395 #if __FreeBSD_version >= 800000
3396         buf_ring_free(txr->br, M_DEVBUF);
3397 #endif
3398         free(adapter->tx_rings, M_DEVBUF);
3399 tx_fail:
3400         free(adapter->queues, M_DEVBUF);
3401 fail:
3402         return (error);
3403 }
3404
3405 /*********************************************************************
3406  *
3407  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3408  *  the information needed to transmit a packet on the wire. This is
3409  *  called only once at attach, setup is done every reset.
3410  *
3411  **********************************************************************/
3412 static int
3413 igb_allocate_transmit_buffers(struct tx_ring *txr)
3414 {
3415         struct adapter *adapter = txr->adapter;
3416         device_t dev = adapter->dev;
3417         struct igb_tx_buffer *txbuf;
3418         int error, i;
3419
3420         /*
3421          * Setup DMA descriptor areas.
3422          */
3423         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3424                                1, 0,                    /* alignment, bounds */
3425                                BUS_SPACE_MAXADDR,       /* lowaddr */
3426                                BUS_SPACE_MAXADDR,       /* highaddr */
3427                                NULL, NULL,              /* filter, filterarg */
3428                                IGB_TSO_SIZE,            /* maxsize */
3429                                IGB_MAX_SCATTER,         /* nsegments */
3430                                PAGE_SIZE,               /* maxsegsize */
3431                                0,                       /* flags */
3432                                NULL,                    /* lockfunc */
3433                                NULL,                    /* lockfuncarg */
3434                                &txr->txtag))) {
3435                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3436                 goto fail;
3437         }
3438
3439         if (!(txr->tx_buffers =
3440             (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3441             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3442                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3443                 error = ENOMEM;
3444                 goto fail;
3445         }
3446
3447         /* Create the descriptor buffer dma maps */
3448         txbuf = txr->tx_buffers;
3449         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3450                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3451                 if (error != 0) {
3452                         device_printf(dev, "Unable to create TX DMA map\n");
3453                         goto fail;
3454                 }
3455         }
3456
3457         return 0;
3458 fail:
3459         /* We free all, it handles case where we are in the middle */
3460         igb_free_transmit_structures(adapter);
3461         return (error);
3462 }
3463
3464 /*********************************************************************
3465  *
3466  *  Initialize a transmit ring.
3467  *
3468  **********************************************************************/
3469 static void
3470 igb_setup_transmit_ring(struct tx_ring *txr)
3471 {
3472         struct adapter *adapter = txr->adapter;
3473         struct igb_tx_buffer *txbuf;
3474         int i;
3475 #ifdef DEV_NETMAP
3476         struct netmap_adapter *na = NA(adapter->ifp);
3477         struct netmap_slot *slot;
3478 #endif /* DEV_NETMAP */
3479
3480         /* Clear the old descriptor contents */
3481         IGB_TX_LOCK(txr);
3482 #ifdef DEV_NETMAP
3483         slot = netmap_reset(na, NR_TX, txr->me, 0);
3484 #endif /* DEV_NETMAP */
3485         bzero((void *)txr->tx_base,
3486               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3487         /* Reset indices */
3488         txr->next_avail_desc = 0;
3489         txr->next_to_clean = 0;
3490
3491         /* Free any existing tx buffers. */
3492         txbuf = txr->tx_buffers;
3493         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3494                 if (txbuf->m_head != NULL) {
3495                         bus_dmamap_sync(txr->txtag, txbuf->map,
3496                             BUS_DMASYNC_POSTWRITE);
3497                         bus_dmamap_unload(txr->txtag, txbuf->map);
3498                         m_freem(txbuf->m_head);
3499                         txbuf->m_head = NULL;
3500                 }
3501 #ifdef DEV_NETMAP
3502                 if (slot) {
3503                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3504                         /* no need to set the address */
3505                         netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3506                 }
3507 #endif /* DEV_NETMAP */
3508                 /* clear the watch index */
3509                 txbuf->next_eop = -1;
3510         }
3511
3512         /* Set number of descriptors available */
3513         txr->tx_avail = adapter->num_tx_desc;
3514
3515         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3516             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3517         IGB_TX_UNLOCK(txr);
3518 }
3519
3520 /*********************************************************************
3521  *
3522  *  Initialize all transmit rings.
3523  *
3524  **********************************************************************/
3525 static void
3526 igb_setup_transmit_structures(struct adapter *adapter)
3527 {
3528         struct tx_ring *txr = adapter->tx_rings;
3529
3530         for (int i = 0; i < adapter->num_queues; i++, txr++)
3531                 igb_setup_transmit_ring(txr);
3532
3533         return;
3534 }
3535
3536 /*********************************************************************
3537  *
3538  *  Enable transmit unit.
3539  *
3540  **********************************************************************/
3541 static void
3542 igb_initialize_transmit_units(struct adapter *adapter)
3543 {
3544         struct tx_ring  *txr = adapter->tx_rings;
3545         struct e1000_hw *hw = &adapter->hw;
3546         u32             tctl, txdctl;
3547
3548         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3549         tctl = txdctl = 0;
3550
3551         /* Setup the Tx Descriptor Rings */
3552         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3553                 u64 bus_addr = txr->txdma.dma_paddr;
3554
3555                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3556                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3557                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3558                     (uint32_t)(bus_addr >> 32));
3559                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3560                     (uint32_t)bus_addr);
3561
3562                 /* Setup the HW Tx Head and Tail descriptor pointers */
3563                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3564                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3565
3566                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3567                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3568                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3569
3570                 txr->queue_status = IGB_QUEUE_IDLE;
3571
3572                 txdctl |= IGB_TX_PTHRESH;
3573                 txdctl |= IGB_TX_HTHRESH << 8;
3574                 txdctl |= IGB_TX_WTHRESH << 16;
3575                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3576                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3577         }
3578
3579         if (adapter->vf_ifp)
3580                 return;
3581
3582         e1000_config_collision_dist(hw);
3583
3584         /* Program the Transmit Control Register */
3585         tctl = E1000_READ_REG(hw, E1000_TCTL);
3586         tctl &= ~E1000_TCTL_CT;
3587         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3588                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3589
3590         /* This write will effectively turn on the transmit unit. */
3591         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3592 }
3593
3594 /*********************************************************************
3595  *
3596  *  Free all transmit rings.
3597  *
3598  **********************************************************************/
3599 static void
3600 igb_free_transmit_structures(struct adapter *adapter)
3601 {
3602         struct tx_ring *txr = adapter->tx_rings;
3603
3604         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3605                 IGB_TX_LOCK(txr);
3606                 igb_free_transmit_buffers(txr);
3607                 igb_dma_free(adapter, &txr->txdma);
3608                 IGB_TX_UNLOCK(txr);
3609                 IGB_TX_LOCK_DESTROY(txr);
3610         }
3611         free(adapter->tx_rings, M_DEVBUF);
3612 }
3613
3614 /*********************************************************************
3615  *
3616  *  Free transmit ring related data structures.
3617  *
3618  **********************************************************************/
3619 static void
3620 igb_free_transmit_buffers(struct tx_ring *txr)
3621 {
3622         struct adapter *adapter = txr->adapter;
3623         struct igb_tx_buffer *tx_buffer;
3624         int             i;
3625
3626         INIT_DEBUGOUT("free_transmit_ring: begin");
3627
3628         if (txr->tx_buffers == NULL)
3629                 return;
3630
3631         tx_buffer = txr->tx_buffers;
3632         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3633                 if (tx_buffer->m_head != NULL) {
3634                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3635                             BUS_DMASYNC_POSTWRITE);
3636                         bus_dmamap_unload(txr->txtag,
3637                             tx_buffer->map);
3638                         m_freem(tx_buffer->m_head);
3639                         tx_buffer->m_head = NULL;
3640                         if (tx_buffer->map != NULL) {
3641                                 bus_dmamap_destroy(txr->txtag,
3642                                     tx_buffer->map);
3643                                 tx_buffer->map = NULL;
3644                         }
3645                 } else if (tx_buffer->map != NULL) {
3646                         bus_dmamap_unload(txr->txtag,
3647                             tx_buffer->map);
3648                         bus_dmamap_destroy(txr->txtag,
3649                             tx_buffer->map);
3650                         tx_buffer->map = NULL;
3651                 }
3652         }
3653 #if __FreeBSD_version >= 800000
3654         if (txr->br != NULL)
3655                 buf_ring_free(txr->br, M_DEVBUF);
3656 #endif
3657         if (txr->tx_buffers != NULL) {
3658                 free(txr->tx_buffers, M_DEVBUF);
3659                 txr->tx_buffers = NULL;
3660         }
3661         if (txr->txtag != NULL) {
3662                 bus_dma_tag_destroy(txr->txtag);
3663                 txr->txtag = NULL;
3664         }
3665         return;
3666 }
3667
3668 /**********************************************************************
3669  *
3670  *  Setup work for hardware segmentation offload (TSO)
3671  *
3672  **********************************************************************/
3673 static bool
3674 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ehdrlen,
3675         struct ip *ip, struct tcphdr *th)
3676 {
3677         struct adapter *adapter = txr->adapter;
3678         struct e1000_adv_tx_context_desc *TXD;
3679         struct igb_tx_buffer        *tx_buffer;
3680         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3681         u32 mss_l4len_idx = 0;
3682         u16 vtag = 0;
3683         int ctxd, ip_hlen, tcp_hlen;
3684
3685         ctxd = txr->next_avail_desc;
3686         tx_buffer = &txr->tx_buffers[ctxd];
3687         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3688
3689         ip->ip_sum = 0;
3690         ip_hlen = ip->ip_hl << 2;
3691         tcp_hlen = th->th_off << 2;
3692
3693         /* VLAN MACLEN IPLEN */
3694         if (mp->m_flags & M_VLANTAG) {
3695                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3696                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3697         }
3698
3699         vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3700         vlan_macip_lens |= ip_hlen;
3701         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3702
3703         /* ADV DTYPE TUCMD */
3704         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3705         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3706         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3707         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3708
3709         /* MSS L4LEN IDX */
3710         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3711         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3712         /* 82575 needs the queue index added */
3713         if (adapter->hw.mac.type == e1000_82575)
3714                 mss_l4len_idx |= txr->me << 4;
3715         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3716
3717         TXD->seqnum_seed = htole32(0);
3718         tx_buffer->m_head = NULL;
3719         tx_buffer->next_eop = -1;
3720
3721         if (++ctxd == adapter->num_tx_desc)
3722                 ctxd = 0;
3723
3724         txr->tx_avail--;
3725         txr->next_avail_desc = ctxd;
3726         return TRUE;
3727 }
3728
3729
3730 /*********************************************************************
3731  *
3732  *  Context Descriptor setup for VLAN or CSUM
3733  *
3734  **********************************************************************/
3735
3736 static bool
3737 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3738 {
3739         struct adapter *adapter = txr->adapter;
3740         struct e1000_adv_tx_context_desc *TXD;
3741         struct igb_tx_buffer        *tx_buffer;
3742         u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3743         struct ether_vlan_header *eh;
3744         struct ip *ip = NULL;
3745         struct ip6_hdr *ip6;
3746         int  ehdrlen, ctxd, ip_hlen = 0;
3747         u16     etype, vtag = 0;
3748         u8      ipproto = 0;
3749         bool    offload = TRUE;
3750
3751         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3752                 offload = FALSE;
3753
3754         vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3755         ctxd = txr->next_avail_desc;
3756         tx_buffer = &txr->tx_buffers[ctxd];
3757         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3758
3759         /*
3760         ** In advanced descriptors the vlan tag must 
3761         ** be placed into the context descriptor, thus
3762         ** we need to be here just for that setup.
3763         */
3764         if (mp->m_flags & M_VLANTAG) {
3765                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3766                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3767         } else if (offload == FALSE)
3768                 return FALSE;
3769
3770         /*
3771          * Determine where frame payload starts.
3772          * Jump over vlan headers if already present,
3773          * helpful for QinQ too.
3774          */
3775         eh = mtod(mp, struct ether_vlan_header *);
3776         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3777                 etype = ntohs(eh->evl_proto);
3778                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3779         } else {
3780                 etype = ntohs(eh->evl_encap_proto);
3781                 ehdrlen = ETHER_HDR_LEN;
3782         }
3783
3784         /* Set the ether header length */
3785         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3786
3787         switch (etype) {
3788                 case ETHERTYPE_IP:
3789                         ip = (struct ip *)(mp->m_data + ehdrlen);
3790                         ip_hlen = ip->ip_hl << 2;
3791                         if (mp->m_len < ehdrlen + ip_hlen) {
3792                                 offload = FALSE;
3793                                 break;
3794                         }
3795                         ipproto = ip->ip_p;
3796                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3797                         break;
3798                 case ETHERTYPE_IPV6:
3799                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3800                         ip_hlen = sizeof(struct ip6_hdr);
3801                         ipproto = ip6->ip6_nxt;
3802                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3803                         break;
3804                 default:
3805                         offload = FALSE;
3806                         break;
3807         }
3808
3809         vlan_macip_lens |= ip_hlen;
3810         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3811
3812         switch (ipproto) {
3813                 case IPPROTO_TCP:
3814                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3815                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3816                         break;
3817                 case IPPROTO_UDP:
3818                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3819                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3820                         break;
3821 #if __FreeBSD_version >= 800000
3822                 case IPPROTO_SCTP:
3823                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3824                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3825                         break;
3826 #endif
3827                 default:
3828                         offload = FALSE;
3829                         break;
3830         }
3831
3832         /* 82575 needs the queue index added */
3833         if (adapter->hw.mac.type == e1000_82575)
3834                 mss_l4len_idx = txr->me << 4;
3835
3836         /* Now copy bits into descriptor */
3837         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3838         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3839         TXD->seqnum_seed = htole32(0);
3840         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3841
3842         tx_buffer->m_head = NULL;
3843         tx_buffer->next_eop = -1;
3844
3845         /* We've consumed the first desc, adjust counters */
3846         if (++ctxd == adapter->num_tx_desc)
3847                 ctxd = 0;
3848         txr->next_avail_desc = ctxd;
3849         --txr->tx_avail;
3850
3851         return (offload);
3852 }
3853
3854
3855 /**********************************************************************
3856  *
3857  *  Examine each tx_buffer in the used queue. If the hardware is done
3858  *  processing the packet then free associated resources. The
3859  *  tx_buffer is put back on the free queue.
3860  *
3861  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3862  **********************************************************************/
3863 static bool
3864 igb_txeof(struct tx_ring *txr)
3865 {
3866         struct adapter  *adapter = txr->adapter;
3867         int first, last, done, processed;
3868         struct igb_tx_buffer *tx_buffer;
3869         struct e1000_tx_desc   *tx_desc, *eop_desc;
3870         struct ifnet   *ifp = adapter->ifp;
3871
3872         IGB_TX_LOCK_ASSERT(txr);
3873
3874 #ifdef DEV_NETMAP
3875         if (netmap_tx_irq(ifp, txr->me |
3876             (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT)))
3877                 return (FALSE);
3878 #endif /* DEV_NETMAP */
3879         if (txr->tx_avail == adapter->num_tx_desc) {
3880                 txr->queue_status = IGB_QUEUE_IDLE;
3881                 return FALSE;
3882         }
3883
3884         processed = 0;
3885         first = txr->next_to_clean;
3886         tx_desc = &txr->tx_base[first];
3887         tx_buffer = &txr->tx_buffers[first];
3888         last = tx_buffer->next_eop;
3889         eop_desc = &txr->tx_base[last];
3890
3891         /*
3892          * What this does is get the index of the
3893          * first descriptor AFTER the EOP of the 
3894          * first packet, that way we can do the
3895          * simple comparison on the inner while loop.
3896          */
3897         if (++last == adapter->num_tx_desc)
3898                 last = 0;
3899         done = last;
3900
3901         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3902             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3903
3904         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3905                 /* We clean the range of the packet */
3906                 while (first != done) {
3907                         tx_desc->upper.data = 0;
3908                         tx_desc->lower.data = 0;
3909                         tx_desc->buffer_addr = 0;
3910                         ++txr->tx_avail;
3911                         ++processed;
3912
3913                         if (tx_buffer->m_head) {
3914                                 txr->bytes +=
3915                                     tx_buffer->m_head->m_pkthdr.len;
3916                                 bus_dmamap_sync(txr->txtag,
3917                                     tx_buffer->map,
3918                                     BUS_DMASYNC_POSTWRITE);
3919                                 bus_dmamap_unload(txr->txtag,
3920                                     tx_buffer->map);
3921
3922                                 m_freem(tx_buffer->m_head);
3923                                 tx_buffer->m_head = NULL;
3924                         }
3925                         tx_buffer->next_eop = -1;
3926                         txr->watchdog_time = ticks;
3927
3928                         if (++first == adapter->num_tx_desc)
3929                                 first = 0;
3930
3931                         tx_buffer = &txr->tx_buffers[first];
3932                         tx_desc = &txr->tx_base[first];
3933                 }
3934                 ++txr->packets;
3935                 ++ifp->if_opackets;
3936                 /* See if we can continue to the next packet */
3937                 last = tx_buffer->next_eop;
3938                 if (last != -1) {
3939                         eop_desc = &txr->tx_base[last];
3940                         /* Get new done point */
3941                         if (++last == adapter->num_tx_desc) last = 0;
3942                         done = last;
3943                 } else
3944                         break;
3945         }
3946         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3947             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3948
3949         txr->next_to_clean = first;
3950
3951         /*
3952         ** Watchdog calculation, we know there's
3953         ** work outstanding or the first return
3954         ** would have been taken, so none processed
3955         ** for too long indicates a hang.
3956         */
3957         if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3958                 txr->queue_status |= IGB_QUEUE_HUNG;
3959         /*
3960          * If we have a minimum free,
3961          * clear depleted state bit
3962          */
3963         if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)          
3964                 txr->queue_status &= ~IGB_QUEUE_DEPLETED;
3965
3966         /* All clean, turn off the watchdog */
3967         if (txr->tx_avail == adapter->num_tx_desc) {
3968                 txr->queue_status = IGB_QUEUE_IDLE;
3969                 return (FALSE);
3970         }
3971
3972         return (TRUE);
3973 }
3974
3975 /*********************************************************************
3976  *
3977  *  Refresh mbuf buffers for RX descriptor rings
3978  *   - now keeps its own state so discards due to resource
3979  *     exhaustion are unnecessary, if an mbuf cannot be obtained
3980  *     it just returns, keeping its placeholder, thus it can simply
3981  *     be recalled to try again.
3982  *
3983  **********************************************************************/
3984 static void
3985 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3986 {
3987         struct adapter          *adapter = rxr->adapter;
3988         bus_dma_segment_t       hseg[1];
3989         bus_dma_segment_t       pseg[1];
3990         struct igb_rx_buf       *rxbuf;
3991         struct mbuf             *mh, *mp;
3992         int                     i, j, nsegs, error;
3993         bool                    refreshed = FALSE;
3994
3995         i = j = rxr->next_to_refresh;
3996         /*
3997         ** Get one descriptor beyond
3998         ** our work mark to control
3999         ** the loop.
4000         */
4001         if (++j == adapter->num_rx_desc)
4002                 j = 0;
4003
4004         while (j != limit) {
4005                 rxbuf = &rxr->rx_buffers[i];
4006                 /* No hdr mbuf used with header split off */
4007                 if (rxr->hdr_split == FALSE)
4008                         goto no_split;
4009                 if (rxbuf->m_head == NULL) {
4010                         mh = m_gethdr(M_NOWAIT, MT_DATA);
4011                         if (mh == NULL)
4012                                 goto update;
4013                 } else
4014                         mh = rxbuf->m_head;
4015
4016                 mh->m_pkthdr.len = mh->m_len = MHLEN;
4017                 mh->m_len = MHLEN;
4018                 mh->m_flags |= M_PKTHDR;
4019                 /* Get the memory mapping */
4020                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4021                     rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4022                 if (error != 0) {
4023                         printf("Refresh mbufs: hdr dmamap load"
4024                             " failure - %d\n", error);
4025                         m_free(mh);
4026                         rxbuf->m_head = NULL;
4027                         goto update;
4028                 }
4029                 rxbuf->m_head = mh;
4030                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4031                     BUS_DMASYNC_PREREAD);
4032                 rxr->rx_base[i].read.hdr_addr =
4033                     htole64(hseg[0].ds_addr);
4034 no_split:
4035                 if (rxbuf->m_pack == NULL) {
4036                         mp = m_getjcl(M_NOWAIT, MT_DATA,
4037                             M_PKTHDR, adapter->rx_mbuf_sz);
4038                         if (mp == NULL)
4039                                 goto update;
4040                 } else
4041                         mp = rxbuf->m_pack;
4042
4043                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4044                 /* Get the memory mapping */
4045                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4046                     rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4047                 if (error != 0) {
4048                         printf("Refresh mbufs: payload dmamap load"
4049                             " failure - %d\n", error);
4050                         m_free(mp);
4051                         rxbuf->m_pack = NULL;
4052                         goto update;
4053                 }
4054                 rxbuf->m_pack = mp;
4055                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4056                     BUS_DMASYNC_PREREAD);
4057                 rxr->rx_base[i].read.pkt_addr =
4058                     htole64(pseg[0].ds_addr);
4059                 refreshed = TRUE; /* I feel wefreshed :) */
4060
4061                 i = j; /* our next is precalculated */
4062                 rxr->next_to_refresh = i;
4063                 if (++j == adapter->num_rx_desc)
4064                         j = 0;
4065         }
4066 update:
4067         if (refreshed) /* update tail */
4068                 E1000_WRITE_REG(&adapter->hw,
4069                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4070         return;
4071 }
4072
4073
4074 /*********************************************************************
4075  *
4076  *  Allocate memory for rx_buffer structures. Since we use one
4077  *  rx_buffer per received packet, the maximum number of rx_buffer's
4078  *  that we'll need is equal to the number of receive descriptors
4079  *  that we've allocated.
4080  *
4081  **********************************************************************/
4082 static int
4083 igb_allocate_receive_buffers(struct rx_ring *rxr)
4084 {
4085         struct  adapter         *adapter = rxr->adapter;
4086         device_t                dev = adapter->dev;
4087         struct igb_rx_buf       *rxbuf;
4088         int                     i, bsize, error;
4089
4090         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4091         if (!(rxr->rx_buffers =
4092             (struct igb_rx_buf *) malloc(bsize,
4093             M_DEVBUF, M_NOWAIT | M_ZERO))) {
4094                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4095                 error = ENOMEM;
4096                 goto fail;
4097         }
4098
4099         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4100                                    1, 0,                /* alignment, bounds */
4101                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4102                                    BUS_SPACE_MAXADDR,   /* highaddr */
4103                                    NULL, NULL,          /* filter, filterarg */
4104                                    MSIZE,               /* maxsize */
4105                                    1,                   /* nsegments */
4106                                    MSIZE,               /* maxsegsize */
4107                                    0,                   /* flags */
4108                                    NULL,                /* lockfunc */
4109                                    NULL,                /* lockfuncarg */
4110                                    &rxr->htag))) {
4111                 device_printf(dev, "Unable to create RX DMA tag\n");
4112                 goto fail;
4113         }
4114
4115         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4116                                    1, 0,                /* alignment, bounds */
4117                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4118                                    BUS_SPACE_MAXADDR,   /* highaddr */
4119                                    NULL, NULL,          /* filter, filterarg */
4120                                    MJUM9BYTES,          /* maxsize */
4121                                    1,                   /* nsegments */
4122                                    MJUM9BYTES,          /* maxsegsize */
4123                                    0,                   /* flags */
4124                                    NULL,                /* lockfunc */
4125                                    NULL,                /* lockfuncarg */
4126                                    &rxr->ptag))) {
4127                 device_printf(dev, "Unable to create RX payload DMA tag\n");
4128                 goto fail;
4129         }
4130
4131         for (i = 0; i < adapter->num_rx_desc; i++) {
4132                 rxbuf = &rxr->rx_buffers[i];
4133                 error = bus_dmamap_create(rxr->htag,
4134                     BUS_DMA_NOWAIT, &rxbuf->hmap);
4135                 if (error) {
4136                         device_printf(dev,
4137                             "Unable to create RX head DMA maps\n");
4138                         goto fail;
4139                 }
4140                 error = bus_dmamap_create(rxr->ptag,
4141                     BUS_DMA_NOWAIT, &rxbuf->pmap);
4142                 if (error) {
4143                         device_printf(dev,
4144                             "Unable to create RX packet DMA maps\n");
4145                         goto fail;
4146                 }
4147         }
4148
4149         return (0);
4150
4151 fail:
4152         /* Frees all, but can handle partial completion */
4153         igb_free_receive_structures(adapter);
4154         return (error);
4155 }
4156
4157
4158 static void
4159 igb_free_receive_ring(struct rx_ring *rxr)
4160 {
4161         struct  adapter         *adapter = rxr->adapter;
4162         struct igb_rx_buf       *rxbuf;
4163
4164
4165         for (int i = 0; i < adapter->num_rx_desc; i++) {
4166                 rxbuf = &rxr->rx_buffers[i];
4167                 if (rxbuf->m_head != NULL) {
4168                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4169                             BUS_DMASYNC_POSTREAD);
4170                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4171                         rxbuf->m_head->m_flags |= M_PKTHDR;
4172                         m_freem(rxbuf->m_head);
4173                 }
4174                 if (rxbuf->m_pack != NULL) {
4175                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4176                             BUS_DMASYNC_POSTREAD);
4177                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4178                         rxbuf->m_pack->m_flags |= M_PKTHDR;
4179                         m_freem(rxbuf->m_pack);
4180                 }
4181                 rxbuf->m_head = NULL;
4182                 rxbuf->m_pack = NULL;
4183         }
4184 }
4185
4186
4187 /*********************************************************************
4188  *
4189  *  Initialize a receive ring and its buffers.
4190  *
4191  **********************************************************************/
4192 static int
4193 igb_setup_receive_ring(struct rx_ring *rxr)
4194 {
4195         struct  adapter         *adapter;
4196         struct  ifnet           *ifp;
4197         device_t                dev;
4198         struct igb_rx_buf       *rxbuf;
4199         bus_dma_segment_t       pseg[1], hseg[1];
4200         struct lro_ctrl         *lro = &rxr->lro;
4201         int                     rsize, nsegs, error = 0;
4202 #ifdef DEV_NETMAP
4203         struct netmap_adapter *na = NA(rxr->adapter->ifp);
4204         struct netmap_slot *slot;
4205 #endif /* DEV_NETMAP */
4206
4207         adapter = rxr->adapter;
4208         dev = adapter->dev;
4209         ifp = adapter->ifp;
4210
4211         /* Clear the ring contents */
4212         IGB_RX_LOCK(rxr);
4213 #ifdef DEV_NETMAP
4214         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4215 #endif /* DEV_NETMAP */
4216         rsize = roundup2(adapter->num_rx_desc *
4217             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4218         bzero((void *)rxr->rx_base, rsize);
4219
4220         /*
4221         ** Free current RX buffer structures and their mbufs
4222         */
4223         igb_free_receive_ring(rxr);
4224
4225         /* Configure for header split? */
4226         if (igb_header_split)
4227                 rxr->hdr_split = TRUE;
4228
4229         /* Now replenish the ring mbufs */
4230         for (int j = 0; j < adapter->num_rx_desc; ++j) {
4231                 struct mbuf     *mh, *mp;
4232
4233                 rxbuf = &rxr->rx_buffers[j];
4234 #ifdef DEV_NETMAP
4235                 if (slot) {
4236                         /* slot sj is mapped to the i-th NIC-ring entry */
4237                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4238                         uint64_t paddr;
4239                         void *addr;
4240
4241                         addr = PNMB(slot + sj, &paddr);
4242                         netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4243                         /* Update descriptor */
4244                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4245                         continue;
4246                 }
4247 #endif /* DEV_NETMAP */
4248                 if (rxr->hdr_split == FALSE)
4249                         goto skip_head;
4250
4251                 /* First the header */
4252                 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4253                 if (rxbuf->m_head == NULL) {
4254                         error = ENOBUFS;
4255                         goto fail;
4256                 }
4257                 m_adj(rxbuf->m_head, ETHER_ALIGN);
4258                 mh = rxbuf->m_head;
4259                 mh->m_len = mh->m_pkthdr.len = MHLEN;
4260                 mh->m_flags |= M_PKTHDR;
4261                 /* Get the memory mapping */
4262                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4263                     rxbuf->hmap, rxbuf->m_head, hseg,
4264                     &nsegs, BUS_DMA_NOWAIT);
4265                 if (error != 0) /* Nothing elegant to do here */
4266                         goto fail;
4267                 bus_dmamap_sync(rxr->htag,
4268                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
4269                 /* Update descriptor */
4270                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4271
4272 skip_head:
4273                 /* Now the payload cluster */
4274                 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4275                     M_PKTHDR, adapter->rx_mbuf_sz);
4276                 if (rxbuf->m_pack == NULL) {
4277                         error = ENOBUFS;
4278                         goto fail;
4279                 }
4280                 mp = rxbuf->m_pack;
4281                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4282                 /* Get the memory mapping */
4283                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4284                     rxbuf->pmap, mp, pseg,
4285                     &nsegs, BUS_DMA_NOWAIT);
4286                 if (error != 0)
4287                         goto fail;
4288                 bus_dmamap_sync(rxr->ptag,
4289                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4290                 /* Update descriptor */
4291                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4292         }
4293
4294         /* Setup our descriptor indices */
4295         rxr->next_to_check = 0;
4296         rxr->next_to_refresh = adapter->num_rx_desc - 1;
4297         rxr->lro_enabled = FALSE;
4298         rxr->rx_split_packets = 0;
4299         rxr->rx_bytes = 0;
4300
4301         rxr->fmp = NULL;
4302         rxr->lmp = NULL;
4303         rxr->discard = FALSE;
4304
4305         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4306             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4307
4308         /*
4309         ** Now set up the LRO interface, we
4310         ** also only do head split when LRO
4311         ** is enabled, since so often they
4312         ** are undesireable in similar setups.
4313         */
4314         if (ifp->if_capenable & IFCAP_LRO) {
4315                 error = tcp_lro_init(lro);
4316                 if (error) {
4317                         device_printf(dev, "LRO Initialization failed!\n");
4318                         goto fail;
4319                 }
4320                 INIT_DEBUGOUT("RX LRO Initialized\n");
4321                 rxr->lro_enabled = TRUE;
4322                 lro->ifp = adapter->ifp;
4323         }
4324
4325         IGB_RX_UNLOCK(rxr);
4326         return (0);
4327
4328 fail:
4329         igb_free_receive_ring(rxr);
4330         IGB_RX_UNLOCK(rxr);
4331         return (error);
4332 }
4333
4334
4335 /*********************************************************************
4336  *
4337  *  Initialize all receive rings.
4338  *
4339  **********************************************************************/
4340 static int
4341 igb_setup_receive_structures(struct adapter *adapter)
4342 {
4343         struct rx_ring *rxr = adapter->rx_rings;
4344         int i;
4345
4346         for (i = 0; i < adapter->num_queues; i++, rxr++)
4347                 if (igb_setup_receive_ring(rxr))
4348                         goto fail;
4349
4350         return (0);
4351 fail:
4352         /*
4353          * Free RX buffers allocated so far, we will only handle
4354          * the rings that completed, the failing case will have
4355          * cleaned up for itself. 'i' is the endpoint.
4356          */
4357         for (int j = 0; j < i; ++j) {
4358                 rxr = &adapter->rx_rings[j];
4359                 IGB_RX_LOCK(rxr);
4360                 igb_free_receive_ring(rxr);
4361                 IGB_RX_UNLOCK(rxr);
4362         }
4363
4364         return (ENOBUFS);
4365 }
4366
4367 /*********************************************************************
4368  *
4369  *  Enable receive unit.
4370  *
4371  **********************************************************************/
4372 static void
4373 igb_initialize_receive_units(struct adapter *adapter)
4374 {
4375         struct rx_ring  *rxr = adapter->rx_rings;
4376         struct ifnet    *ifp = adapter->ifp;
4377         struct e1000_hw *hw = &adapter->hw;
4378         u32             rctl, rxcsum, psize, srrctl = 0;
4379
4380         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4381
4382         /*
4383          * Make sure receives are disabled while setting
4384          * up the descriptor ring
4385          */
4386         rctl = E1000_READ_REG(hw, E1000_RCTL);
4387         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4388
4389         /*
4390         ** Set up for header split
4391         */
4392         if (igb_header_split) {
4393                 /* Use a standard mbuf for the header */
4394                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4395                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4396         } else
4397                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4398
4399         /*
4400         ** Set up for jumbo frames
4401         */
4402         if (ifp->if_mtu > ETHERMTU) {
4403                 rctl |= E1000_RCTL_LPE;
4404                 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4405                         srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4406                         rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4407                 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4408                         srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4409                         rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4410                 }
4411                 /* Set maximum packet len */
4412                 psize = adapter->max_frame_size;
4413                 /* are we on a vlan? */
4414                 if (adapter->ifp->if_vlantrunk != NULL)
4415                         psize += VLAN_TAG_SIZE;
4416                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4417         } else {
4418                 rctl &= ~E1000_RCTL_LPE;
4419                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4420                 rctl |= E1000_RCTL_SZ_2048;
4421         }
4422
4423         /* Setup the Base and Length of the Rx Descriptor Rings */
4424         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4425                 u64 bus_addr = rxr->rxdma.dma_paddr;
4426                 u32 rxdctl;
4427
4428                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4429                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4430                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4431                     (uint32_t)(bus_addr >> 32));
4432                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4433                     (uint32_t)bus_addr);
4434                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4435                 /* Enable this Queue */
4436                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4437                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4438                 rxdctl &= 0xFFF00000;
4439                 rxdctl |= IGB_RX_PTHRESH;
4440                 rxdctl |= IGB_RX_HTHRESH << 8;
4441                 rxdctl |= IGB_RX_WTHRESH << 16;
4442                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4443         }
4444
4445         /*
4446         ** Setup for RX MultiQueue
4447         */
4448         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4449         if (adapter->num_queues >1) {
4450                 u32 random[10], mrqc, shift = 0;
4451                 union igb_reta {
4452                         u32 dword;
4453                         u8  bytes[4];
4454                 } reta;
4455
4456                 arc4rand(&random, sizeof(random), 0);
4457                 if (adapter->hw.mac.type == e1000_82575)
4458                         shift = 6;
4459                 /* Warning FM follows */
4460                 for (int i = 0; i < 128; i++) {
4461                         reta.bytes[i & 3] =
4462                             (i % adapter->num_queues) << shift;
4463                         if ((i & 3) == 3)
4464                                 E1000_WRITE_REG(hw,
4465                                     E1000_RETA(i >> 2), reta.dword);
4466                 }
4467                 /* Now fill in hash table */
4468                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4469                 for (int i = 0; i < 10; i++)
4470                         E1000_WRITE_REG_ARRAY(hw,
4471                             E1000_RSSRK(0), i, random[i]);
4472
4473                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4474                     E1000_MRQC_RSS_FIELD_IPV4_TCP);
4475                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4476                     E1000_MRQC_RSS_FIELD_IPV6_TCP);
4477                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4478                     E1000_MRQC_RSS_FIELD_IPV6_UDP);
4479                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4480                     E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4481
4482                 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4483
4484                 /*
4485                 ** NOTE: Receive Full-Packet Checksum Offload 
4486                 ** is mutually exclusive with Multiqueue. However
4487                 ** this is not the same as TCP/IP checksums which
4488                 ** still work.
4489                 */
4490                 rxcsum |= E1000_RXCSUM_PCSD;
4491 #if __FreeBSD_version >= 800000
4492                 /* For SCTP Offload */
4493                 if ((hw->mac.type == e1000_82576)
4494                     && (ifp->if_capenable & IFCAP_RXCSUM))
4495                         rxcsum |= E1000_RXCSUM_CRCOFL;
4496 #endif
4497         } else {
4498                 /* Non RSS setup */
4499                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4500                         rxcsum |= E1000_RXCSUM_IPPCSE;
4501 #if __FreeBSD_version >= 800000
4502                         if (adapter->hw.mac.type == e1000_82576)
4503                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4504 #endif
4505                 } else
4506                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4507         }
4508         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4509
4510         /* Setup the Receive Control Register */
4511         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4512         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4513                    E1000_RCTL_RDMTS_HALF |
4514                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4515         /* Strip CRC bytes. */
4516         rctl |= E1000_RCTL_SECRC;
4517         /* Make sure VLAN Filters are off */
4518         rctl &= ~E1000_RCTL_VFE;
4519         /* Don't store bad packets */
4520         rctl &= ~E1000_RCTL_SBP;
4521
4522         /* Enable Receives */
4523         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4524
4525         /*
4526          * Setup the HW Rx Head and Tail Descriptor Pointers
4527          *   - needs to be after enable
4528          */
4529         for (int i = 0; i < adapter->num_queues; i++) {
4530                 rxr = &adapter->rx_rings[i];
4531                 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4532 #ifdef DEV_NETMAP
4533                 /*
4534                  * an init() while a netmap client is active must
4535                  * preserve the rx buffers passed to userspace.
4536                  * In this driver it means we adjust RDT to
4537                  * somthing different from next_to_refresh
4538                  * (which is not used in netmap mode).
4539                  */
4540                 if (ifp->if_capenable & IFCAP_NETMAP) {
4541                         struct netmap_adapter *na = NA(adapter->ifp);
4542                         struct netmap_kring *kring = &na->rx_rings[i];
4543                         int t = rxr->next_to_refresh - kring->nr_hwavail;
4544
4545                         if (t >= adapter->num_rx_desc)
4546                                 t -= adapter->num_rx_desc;
4547                         else if (t < 0)
4548                                 t += adapter->num_rx_desc;
4549                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4550                 } else
4551 #endif /* DEV_NETMAP */
4552                 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4553         }
4554         return;
4555 }
4556
4557 /*********************************************************************
4558  *
4559  *  Free receive rings.
4560  *
4561  **********************************************************************/
4562 static void
4563 igb_free_receive_structures(struct adapter *adapter)
4564 {
4565         struct rx_ring *rxr = adapter->rx_rings;
4566
4567         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4568                 struct lro_ctrl *lro = &rxr->lro;
4569                 igb_free_receive_buffers(rxr);
4570                 tcp_lro_free(lro);
4571                 igb_dma_free(adapter, &rxr->rxdma);
4572         }
4573
4574         free(adapter->rx_rings, M_DEVBUF);
4575 }
4576
4577 /*********************************************************************
4578  *
4579  *  Free receive ring data structures.
4580  *
4581  **********************************************************************/
4582 static void
4583 igb_free_receive_buffers(struct rx_ring *rxr)
4584 {
4585         struct adapter          *adapter = rxr->adapter;
4586         struct igb_rx_buf       *rxbuf;
4587         int i;
4588
4589         INIT_DEBUGOUT("free_receive_structures: begin");
4590
4591         /* Cleanup any existing buffers */
4592         if (rxr->rx_buffers != NULL) {
4593                 for (i = 0; i < adapter->num_rx_desc; i++) {
4594                         rxbuf = &rxr->rx_buffers[i];
4595                         if (rxbuf->m_head != NULL) {
4596                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4597                                     BUS_DMASYNC_POSTREAD);
4598                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4599                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4600                                 m_freem(rxbuf->m_head);
4601                         }
4602                         if (rxbuf->m_pack != NULL) {
4603                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4604                                     BUS_DMASYNC_POSTREAD);
4605                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4606                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4607                                 m_freem(rxbuf->m_pack);
4608                         }
4609                         rxbuf->m_head = NULL;
4610                         rxbuf->m_pack = NULL;
4611                         if (rxbuf->hmap != NULL) {
4612                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4613                                 rxbuf->hmap = NULL;
4614                         }
4615                         if (rxbuf->pmap != NULL) {
4616                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4617                                 rxbuf->pmap = NULL;
4618                         }
4619                 }
4620                 if (rxr->rx_buffers != NULL) {
4621                         free(rxr->rx_buffers, M_DEVBUF);
4622                         rxr->rx_buffers = NULL;
4623                 }
4624         }
4625
4626         if (rxr->htag != NULL) {
4627                 bus_dma_tag_destroy(rxr->htag);
4628                 rxr->htag = NULL;
4629         }
4630         if (rxr->ptag != NULL) {
4631                 bus_dma_tag_destroy(rxr->ptag);
4632                 rxr->ptag = NULL;
4633         }
4634 }
4635
4636 static __inline void
4637 igb_rx_discard(struct rx_ring *rxr, int i)
4638 {
4639         struct igb_rx_buf       *rbuf;
4640
4641         rbuf = &rxr->rx_buffers[i];
4642
4643         /* Partially received? Free the chain */
4644         if (rxr->fmp != NULL) {
4645                 rxr->fmp->m_flags |= M_PKTHDR;
4646                 m_freem(rxr->fmp);
4647                 rxr->fmp = NULL;
4648                 rxr->lmp = NULL;
4649         }
4650
4651         /*
4652         ** With advanced descriptors the writeback
4653         ** clobbers the buffer addrs, so its easier
4654         ** to just free the existing mbufs and take
4655         ** the normal refresh path to get new buffers
4656         ** and mapping.
4657         */
4658         if (rbuf->m_head) {
4659                 m_free(rbuf->m_head);
4660                 rbuf->m_head = NULL;
4661         }
4662
4663         if (rbuf->m_pack) {
4664                 m_free(rbuf->m_pack);
4665                 rbuf->m_pack = NULL;
4666         }
4667
4668         return;
4669 }
4670
4671 static __inline void
4672 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4673 {
4674
4675         /*
4676          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4677          * should be computed by hardware. Also it should not have VLAN tag in
4678          * ethernet header.
4679          */
4680         if (rxr->lro_enabled &&
4681             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4682             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4683             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4684             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4685             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4686             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4687                 /*
4688                  * Send to the stack if:
4689                  **  - LRO not enabled, or
4690                  **  - no LRO resources, or
4691                  **  - lro enqueue fails
4692                  */
4693                 if (rxr->lro.lro_cnt != 0)
4694                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4695                                 return;
4696         }
4697         IGB_RX_UNLOCK(rxr);
4698         (*ifp->if_input)(ifp, m);
4699         IGB_RX_LOCK(rxr);
4700 }
4701
4702 /*********************************************************************
4703  *
4704  *  This routine executes in interrupt context. It replenishes
4705  *  the mbufs in the descriptor and sends data which has been
4706  *  dma'ed into host memory to upper layer.
4707  *
4708  *  We loop at most count times if count is > 0, or until done if
4709  *  count < 0.
4710  *
4711  *  Return TRUE if more to clean, FALSE otherwise
4712  *********************************************************************/
4713 static bool
4714 igb_rxeof(struct igb_queue *que, int count, int *done)
4715 {
4716         struct adapter          *adapter = que->adapter;
4717         struct rx_ring          *rxr = que->rxr;
4718         struct ifnet            *ifp = adapter->ifp;
4719         struct lro_ctrl         *lro = &rxr->lro;
4720         struct lro_entry        *queued;
4721         int                     i, processed = 0, rxdone = 0;
4722         u32                     ptype, staterr = 0;
4723         union e1000_adv_rx_desc *cur;
4724
4725         IGB_RX_LOCK(rxr);
4726         /* Sync the ring. */
4727         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4728             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4729
4730 #ifdef DEV_NETMAP
4731         if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4732                 return (FALSE);
4733 #endif /* DEV_NETMAP */
4734
4735         /* Main clean loop */
4736         for (i = rxr->next_to_check; count != 0;) {
4737                 struct mbuf             *sendmp, *mh, *mp;
4738                 struct igb_rx_buf       *rxbuf;
4739                 u16                     hlen, plen, hdr, vtag;
4740                 bool                    eop = FALSE;
4741  
4742                 cur = &rxr->rx_base[i];
4743                 staterr = le32toh(cur->wb.upper.status_error);
4744                 if ((staterr & E1000_RXD_STAT_DD) == 0)
4745                         break;
4746                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4747                         break;
4748                 count--;
4749                 sendmp = mh = mp = NULL;
4750                 cur->wb.upper.status_error = 0;
4751                 rxbuf = &rxr->rx_buffers[i];
4752                 plen = le16toh(cur->wb.upper.length);
4753                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4754                 if ((adapter->hw.mac.type == e1000_i350) &&
4755                     (staterr & E1000_RXDEXT_STATERR_LB))
4756                         vtag = be16toh(cur->wb.upper.vlan);
4757                 else
4758                         vtag = le16toh(cur->wb.upper.vlan);
4759                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4760                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4761
4762                 /* Make sure all segments of a bad packet are discarded */
4763                 if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4764                     (rxr->discard)) {
4765                         adapter->dropped_pkts++;
4766                         ++rxr->rx_discarded;
4767                         if (!eop) /* Catch subsequent segs */
4768                                 rxr->discard = TRUE;
4769                         else
4770                                 rxr->discard = FALSE;
4771                         igb_rx_discard(rxr, i);
4772                         goto next_desc;
4773                 }
4774
4775                 /*
4776                 ** The way the hardware is configured to
4777                 ** split, it will ONLY use the header buffer
4778                 ** when header split is enabled, otherwise we
4779                 ** get normal behavior, ie, both header and
4780                 ** payload are DMA'd into the payload buffer.
4781                 **
4782                 ** The fmp test is to catch the case where a
4783                 ** packet spans multiple descriptors, in that
4784                 ** case only the first header is valid.
4785                 */
4786                 if (rxr->hdr_split && rxr->fmp == NULL) {
4787                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4788                             E1000_RXDADV_HDRBUFLEN_SHIFT;
4789                         if (hlen > IGB_HDR_BUF)
4790                                 hlen = IGB_HDR_BUF;
4791                         mh = rxr->rx_buffers[i].m_head;
4792                         mh->m_len = hlen;
4793                         /* clear buf pointer for refresh */
4794                         rxbuf->m_head = NULL;
4795                         /*
4796                         ** Get the payload length, this
4797                         ** could be zero if its a small
4798                         ** packet.
4799                         */
4800                         if (plen > 0) {
4801                                 mp = rxr->rx_buffers[i].m_pack;
4802                                 mp->m_len = plen;
4803                                 mh->m_next = mp;
4804                                 /* clear buf pointer */
4805                                 rxbuf->m_pack = NULL;
4806                                 rxr->rx_split_packets++;
4807                         }
4808                 } else {
4809                         /*
4810                         ** Either no header split, or a
4811                         ** secondary piece of a fragmented
4812                         ** split packet.
4813                         */
4814                         mh = rxr->rx_buffers[i].m_pack;
4815                         mh->m_len = plen;
4816                         /* clear buf info for refresh */
4817                         rxbuf->m_pack = NULL;
4818                 }
4819
4820                 ++processed; /* So we know when to refresh */
4821
4822                 /* Initial frame - setup */
4823                 if (rxr->fmp == NULL) {
4824                         mh->m_pkthdr.len = mh->m_len;
4825                         /* Save the head of the chain */
4826                         rxr->fmp = mh;
4827                         rxr->lmp = mh;
4828                         if (mp != NULL) {
4829                                 /* Add payload if split */
4830                                 mh->m_pkthdr.len += mp->m_len;
4831                                 rxr->lmp = mh->m_next;
4832                         }
4833                 } else {
4834                         /* Chain mbuf's together */
4835                         rxr->lmp->m_next = mh;
4836                         rxr->lmp = rxr->lmp->m_next;
4837                         rxr->fmp->m_pkthdr.len += mh->m_len;
4838                 }
4839
4840                 if (eop) {
4841                         rxr->fmp->m_pkthdr.rcvif = ifp;
4842                         ifp->if_ipackets++;
4843                         rxr->rx_packets++;
4844                         /* capture data for AIM */
4845                         rxr->packets++;
4846                         rxr->bytes += rxr->fmp->m_pkthdr.len;
4847                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4848
4849                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4850                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
4851
4852                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4853                             (staterr & E1000_RXD_STAT_VP) != 0) {
4854                                 rxr->fmp->m_pkthdr.ether_vtag = vtag;
4855                                 rxr->fmp->m_flags |= M_VLANTAG;
4856                         }
4857 #if __FreeBSD_version >= 800000
4858                         rxr->fmp->m_pkthdr.flowid = que->msix;
4859                         rxr->fmp->m_flags |= M_FLOWID;
4860 #endif
4861                         sendmp = rxr->fmp;
4862                         /* Make sure to set M_PKTHDR. */
4863                         sendmp->m_flags |= M_PKTHDR;
4864                         rxr->fmp = NULL;
4865                         rxr->lmp = NULL;
4866                 }
4867
4868 next_desc:
4869                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4870                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4871
4872                 /* Advance our pointers to the next descriptor. */
4873                 if (++i == adapter->num_rx_desc)
4874                         i = 0;
4875                 /*
4876                 ** Send to the stack or LRO
4877                 */
4878                 if (sendmp != NULL) {
4879                         rxr->next_to_check = i;
4880                         igb_rx_input(rxr, ifp, sendmp, ptype);
4881                         i = rxr->next_to_check;
4882                         rxdone++;
4883                 }
4884
4885                 /* Every 8 descriptors we go to refresh mbufs */
4886                 if (processed == 8) {
4887                         igb_refresh_mbufs(rxr, i);
4888                         processed = 0;
4889                 }
4890         }
4891
4892         /* Catch any remainders */
4893         if (igb_rx_unrefreshed(rxr))
4894                 igb_refresh_mbufs(rxr, i);
4895
4896         rxr->next_to_check = i;
4897
4898         /*
4899          * Flush any outstanding LRO work
4900          */
4901         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4902                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4903                 tcp_lro_flush(lro, queued);
4904         }
4905
4906         if (done != NULL)
4907                 *done += rxdone;
4908
4909         IGB_RX_UNLOCK(rxr);
4910         return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4911 }
4912
4913 /*********************************************************************
4914  *
4915  *  Verify that the hardware indicated that the checksum is valid.
4916  *  Inform the stack about the status of checksum so that stack
4917  *  doesn't spend time verifying the checksum.
4918  *
4919  *********************************************************************/
4920 static void
4921 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4922 {
4923         u16 status = (u16)staterr;
4924         u8  errors = (u8) (staterr >> 24);
4925         int sctp;
4926
4927         /* Ignore Checksum bit is set */
4928         if (status & E1000_RXD_STAT_IXSM) {
4929                 mp->m_pkthdr.csum_flags = 0;
4930                 return;
4931         }
4932
4933         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4934             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4935                 sctp = 1;
4936         else
4937                 sctp = 0;
4938         if (status & E1000_RXD_STAT_IPCS) {
4939                 /* Did it pass? */
4940                 if (!(errors & E1000_RXD_ERR_IPE)) {
4941                         /* IP Checksum Good */
4942                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4943                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4944                 } else
4945                         mp->m_pkthdr.csum_flags = 0;
4946         }
4947
4948         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4949                 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4950 #if __FreeBSD_version >= 800000
4951                 if (sctp) /* reassign */
4952                         type = CSUM_SCTP_VALID;
4953 #endif
4954                 /* Did it pass? */
4955                 if (!(errors & E1000_RXD_ERR_TCPE)) {
4956                         mp->m_pkthdr.csum_flags |= type;
4957                         if (sctp == 0)
4958                                 mp->m_pkthdr.csum_data = htons(0xffff);
4959                 }
4960         }
4961         return;
4962 }
4963
4964 /*
4965  * This routine is run via an vlan
4966  * config EVENT
4967  */
4968 static void
4969 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4970 {
4971         struct adapter  *adapter = ifp->if_softc;
4972         u32             index, bit;
4973
4974         if (ifp->if_softc !=  arg)   /* Not our event */
4975                 return;
4976
4977         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4978                 return;
4979
4980         IGB_CORE_LOCK(adapter);
4981         index = (vtag >> 5) & 0x7F;
4982         bit = vtag & 0x1F;
4983         adapter->shadow_vfta[index] |= (1 << bit);
4984         ++adapter->num_vlans;
4985         /* Change hw filter setting */
4986         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4987                 igb_setup_vlan_hw_support(adapter);
4988         IGB_CORE_UNLOCK(adapter);
4989 }
4990
4991 /*
4992  * This routine is run via an vlan
4993  * unconfig EVENT
4994  */
4995 static void
4996 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4997 {
4998         struct adapter  *adapter = ifp->if_softc;
4999         u32             index, bit;
5000
5001         if (ifp->if_softc !=  arg)
5002                 return;
5003
5004         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5005                 return;
5006
5007         IGB_CORE_LOCK(adapter);
5008         index = (vtag >> 5) & 0x7F;
5009         bit = vtag & 0x1F;
5010         adapter->shadow_vfta[index] &= ~(1 << bit);
5011         --adapter->num_vlans;
5012         /* Change hw filter setting */
5013         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5014                 igb_setup_vlan_hw_support(adapter);
5015         IGB_CORE_UNLOCK(adapter);
5016 }
5017
5018 static void
5019 igb_setup_vlan_hw_support(struct adapter *adapter)
5020 {
5021         struct e1000_hw *hw = &adapter->hw;
5022         struct ifnet    *ifp = adapter->ifp;
5023         u32             reg;
5024
5025         if (adapter->vf_ifp) {
5026                 e1000_rlpml_set_vf(hw,
5027                     adapter->max_frame_size + VLAN_TAG_SIZE);
5028                 return;
5029         }
5030
5031         reg = E1000_READ_REG(hw, E1000_CTRL);
5032         reg |= E1000_CTRL_VME;
5033         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5034
5035         /* Enable the Filter Table */
5036         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5037                 reg = E1000_READ_REG(hw, E1000_RCTL);
5038                 reg &= ~E1000_RCTL_CFIEN;
5039                 reg |= E1000_RCTL_VFE;
5040                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5041         }
5042
5043         /* Update the frame size */
5044         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5045             adapter->max_frame_size + VLAN_TAG_SIZE);
5046
5047         /* Don't bother with table if no vlans */
5048         if ((adapter->num_vlans == 0) ||
5049             ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5050                 return;
5051         /*
5052         ** A soft reset zero's out the VFTA, so
5053         ** we need to repopulate it now.
5054         */
5055         for (int i = 0; i < IGB_VFTA_SIZE; i++)
5056                 if (adapter->shadow_vfta[i] != 0) {
5057                         if (adapter->vf_ifp)
5058                                 e1000_vfta_set_vf(hw,
5059                                     adapter->shadow_vfta[i], TRUE);
5060                         else
5061                                 e1000_write_vfta(hw,
5062                                     i, adapter->shadow_vfta[i]);
5063                 }
5064 }
5065
5066 static void
5067 igb_enable_intr(struct adapter *adapter)
5068 {
5069         /* With RSS set up what to auto clear */
5070         if (adapter->msix_mem) {
5071                 u32 mask = (adapter->que_mask | adapter->link_mask);
5072                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5073                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5074                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5075                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5076                     E1000_IMS_LSC);
5077         } else {
5078                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5079                     IMS_ENABLE_MASK);
5080         }
5081         E1000_WRITE_FLUSH(&adapter->hw);
5082
5083         return;
5084 }
5085
5086 static void
5087 igb_disable_intr(struct adapter *adapter)
5088 {
5089         if (adapter->msix_mem) {
5090                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5091                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5092         } 
5093         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5094         E1000_WRITE_FLUSH(&adapter->hw);
5095         return;
5096 }
5097
5098 /*
5099  * Bit of a misnomer, what this really means is
5100  * to enable OS management of the system... aka
5101  * to disable special hardware management features 
5102  */
5103 static void
5104 igb_init_manageability(struct adapter *adapter)
5105 {
5106         if (adapter->has_manage) {
5107                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5108                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5109
5110                 /* disable hardware interception of ARP */
5111                 manc &= ~(E1000_MANC_ARP_EN);
5112
5113                 /* enable receiving management packets to the host */
5114                 manc |= E1000_MANC_EN_MNG2HOST;
5115                 manc2h |= 1 << 5;  /* Mng Port 623 */
5116                 manc2h |= 1 << 6;  /* Mng Port 664 */
5117                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5118                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5119         }
5120 }
5121
5122 /*
5123  * Give control back to hardware management
5124  * controller if there is one.
5125  */
5126 static void
5127 igb_release_manageability(struct adapter *adapter)
5128 {
5129         if (adapter->has_manage) {
5130                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5131
5132                 /* re-enable hardware interception of ARP */
5133                 manc |= E1000_MANC_ARP_EN;
5134                 manc &= ~E1000_MANC_EN_MNG2HOST;
5135
5136                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5137         }
5138 }
5139
5140 /*
5141  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5142  * For ASF and Pass Through versions of f/w this means that
5143  * the driver is loaded. 
5144  *
5145  */
5146 static void
5147 igb_get_hw_control(struct adapter *adapter)
5148 {
5149         u32 ctrl_ext;
5150
5151         if (adapter->vf_ifp)
5152                 return;
5153
5154         /* Let firmware know the driver has taken over */
5155         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5156         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5157             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5158 }
5159
5160 /*
5161  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5162  * For ASF and Pass Through versions of f/w this means that the
5163  * driver is no longer loaded.
5164  *
5165  */
5166 static void
5167 igb_release_hw_control(struct adapter *adapter)
5168 {
5169         u32 ctrl_ext;
5170
5171         if (adapter->vf_ifp)
5172                 return;
5173
5174         /* Let firmware taken over control of h/w */
5175         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5176         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5177             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5178 }
5179
5180 static int
5181 igb_is_valid_ether_addr(uint8_t *addr)
5182 {
5183         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5184
5185         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5186                 return (FALSE);
5187         }
5188
5189         return (TRUE);
5190 }
5191
5192
5193 /*
5194  * Enable PCI Wake On Lan capability
5195  */
5196 static void
5197 igb_enable_wakeup(device_t dev)
5198 {
5199         u16     cap, status;
5200         u8      id;
5201
5202         /* First find the capabilities pointer*/
5203         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5204         /* Read the PM Capabilities */
5205         id = pci_read_config(dev, cap, 1);
5206         if (id != PCIY_PMG)     /* Something wrong */
5207                 return;
5208         /* OK, we have the power capabilities, so
5209            now get the status register */
5210         cap += PCIR_POWER_STATUS;
5211         status = pci_read_config(dev, cap, 2);
5212         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5213         pci_write_config(dev, cap, status, 2);
5214         return;
5215 }
5216
5217 static void
5218 igb_led_func(void *arg, int onoff)
5219 {
5220         struct adapter  *adapter = arg;
5221
5222         IGB_CORE_LOCK(adapter);
5223         if (onoff) {
5224                 e1000_setup_led(&adapter->hw);
5225                 e1000_led_on(&adapter->hw);
5226         } else {
5227                 e1000_led_off(&adapter->hw);
5228                 e1000_cleanup_led(&adapter->hw);
5229         }
5230         IGB_CORE_UNLOCK(adapter);
5231 }
5232
5233 /**********************************************************************
5234  *
5235  *  Update the board statistics counters.
5236  *
5237  **********************************************************************/
5238 static void
5239 igb_update_stats_counters(struct adapter *adapter)
5240 {
5241         struct ifnet            *ifp;
5242         struct e1000_hw         *hw = &adapter->hw;
5243         struct e1000_hw_stats   *stats;
5244
5245         /* 
5246         ** The virtual function adapter has only a
5247         ** small controlled set of stats, do only 
5248         ** those and return.
5249         */
5250         if (adapter->vf_ifp) {
5251                 igb_update_vf_stats_counters(adapter);
5252                 return;
5253         }
5254
5255         stats = (struct e1000_hw_stats  *)adapter->stats;
5256
5257         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5258            (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5259                 stats->symerrs +=
5260                     E1000_READ_REG(hw,E1000_SYMERRS);
5261                 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5262         }
5263
5264         stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5265         stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5266         stats->scc += E1000_READ_REG(hw, E1000_SCC);
5267         stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5268
5269         stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5270         stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5271         stats->colc += E1000_READ_REG(hw, E1000_COLC);
5272         stats->dc += E1000_READ_REG(hw, E1000_DC);
5273         stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5274         stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5275         stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5276         /*
5277         ** For watchdog management we need to know if we have been
5278         ** paused during the last interval, so capture that here.
5279         */ 
5280         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5281         stats->xoffrxc += adapter->pause_frames;
5282         stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5283         stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5284         stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5285         stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5286         stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5287         stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5288         stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5289         stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5290         stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5291         stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5292         stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5293         stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5294
5295         /* For the 64-bit byte counters the low dword must be read first. */
5296         /* Both registers clear on the read of the high dword */
5297
5298         stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5299             ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5300         stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5301             ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5302
5303         stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5304         stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5305         stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5306         stats->roc += E1000_READ_REG(hw, E1000_ROC);
5307         stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5308
5309         stats->tor += E1000_READ_REG(hw, E1000_TORH);
5310         stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5311
5312         stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5313         stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5314         stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5315         stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5316         stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5317         stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5318         stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5319         stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5320         stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5321         stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5322
5323         /* Interrupt Counts */
5324
5325         stats->iac += E1000_READ_REG(hw, E1000_IAC);
5326         stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5327         stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5328         stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5329         stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5330         stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5331         stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5332         stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5333         stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5334
5335         /* Host to Card Statistics */
5336
5337         stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5338         stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5339         stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5340         stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5341         stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5342         stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5343         stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5344         stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5345             ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5346         stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5347             ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5348         stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5349         stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5350         stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5351
5352         stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5353         stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5354         stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5355         stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5356         stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5357         stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5358
5359         ifp = adapter->ifp;
5360         ifp->if_collisions = stats->colc;
5361
5362         /* Rx Errors */
5363         ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5364             stats->crcerrs + stats->algnerrc +
5365             stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5366
5367         /* Tx Errors */
5368         ifp->if_oerrors = stats->ecol +
5369             stats->latecol + adapter->watchdog_events;
5370
5371         /* Driver specific counters */
5372         adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5373         adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5374         adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5375         adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5376         adapter->packet_buf_alloc_tx =
5377             ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5378         adapter->packet_buf_alloc_rx =
5379             (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5380 }
5381
5382
5383 /**********************************************************************
5384  *
5385  *  Initialize the VF board statistics counters.
5386  *
5387  **********************************************************************/
5388 static void
5389 igb_vf_init_stats(struct adapter *adapter)
5390 {
5391         struct e1000_hw *hw = &adapter->hw;
5392         struct e1000_vf_stats   *stats;
5393
5394         stats = (struct e1000_vf_stats  *)adapter->stats;
5395         if (stats == NULL)
5396                 return;
5397         stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5398         stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5399         stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5400         stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5401         stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5402 }
5403  
5404 /**********************************************************************
5405  *
5406  *  Update the VF board statistics counters.
5407  *
5408  **********************************************************************/
5409 static void
5410 igb_update_vf_stats_counters(struct adapter *adapter)
5411 {
5412         struct e1000_hw *hw = &adapter->hw;
5413         struct e1000_vf_stats   *stats;
5414
5415         if (adapter->link_speed == 0)
5416                 return;
5417
5418         stats = (struct e1000_vf_stats  *)adapter->stats;
5419
5420         UPDATE_VF_REG(E1000_VFGPRC,
5421             stats->last_gprc, stats->gprc);
5422         UPDATE_VF_REG(E1000_VFGORC,
5423             stats->last_gorc, stats->gorc);
5424         UPDATE_VF_REG(E1000_VFGPTC,
5425             stats->last_gptc, stats->gptc);
5426         UPDATE_VF_REG(E1000_VFGOTC,
5427             stats->last_gotc, stats->gotc);
5428         UPDATE_VF_REG(E1000_VFMPRC,
5429             stats->last_mprc, stats->mprc);
5430 }
5431
5432 /* Export a single 32-bit register via a read-only sysctl. */
5433 static int
5434 igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5435 {
5436         struct adapter *adapter;
5437         u_int val;
5438
5439         adapter = oidp->oid_arg1;
5440         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5441         return (sysctl_handle_int(oidp, &val, 0, req));
5442 }
5443
5444 /*
5445 **  Tuneable interrupt rate handler
5446 */
5447 static int
5448 igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5449 {
5450         struct igb_queue        *que = ((struct igb_queue *)oidp->oid_arg1);
5451         int                     error;
5452         u32                     reg, usec, rate;
5453                         
5454         reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5455         usec = ((reg & 0x7FFC) >> 2);
5456         if (usec > 0)
5457                 rate = 1000000 / usec;
5458         else
5459                 rate = 0;
5460         error = sysctl_handle_int(oidp, &rate, 0, req);
5461         if (error || !req->newptr)
5462                 return error;
5463         return 0;
5464 }
5465
5466 /*
5467  * Add sysctl variables, one per statistic, to the system.
5468  */
5469 static void
5470 igb_add_hw_stats(struct adapter *adapter)
5471 {
5472         device_t dev = adapter->dev;
5473
5474         struct tx_ring *txr = adapter->tx_rings;
5475         struct rx_ring *rxr = adapter->rx_rings;
5476
5477         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5478         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5479         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5480         struct e1000_hw_stats *stats = adapter->stats;
5481
5482         struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5483         struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5484
5485 #define QUEUE_NAME_LEN 32
5486         char namebuf[QUEUE_NAME_LEN];
5487
5488         /* Driver Statistics */
5489         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq", 
5490                         CTLFLAG_RD, &adapter->link_irq, 0,
5491                         "Link MSIX IRQ Handled");
5492         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5493                         CTLFLAG_RD, &adapter->dropped_pkts,
5494                         "Driver dropped packets");
5495         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5496                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5497                         "Driver tx dma failure in xmit");
5498         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5499                         CTLFLAG_RD, &adapter->rx_overruns,
5500                         "RX overruns");
5501         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5502                         CTLFLAG_RD, &adapter->watchdog_events,
5503                         "Watchdog timeouts");
5504
5505         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", 
5506                         CTLFLAG_RD, &adapter->device_control,
5507                         "Device Control Register");
5508         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", 
5509                         CTLFLAG_RD, &adapter->rx_control,
5510                         "Receiver Control Register");
5511         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", 
5512                         CTLFLAG_RD, &adapter->int_mask,
5513                         "Interrupt Mask");
5514         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", 
5515                         CTLFLAG_RD, &adapter->eint_mask,
5516                         "Extended Interrupt Mask");
5517         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", 
5518                         CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5519                         "Transmit Buffer Packet Allocation");
5520         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", 
5521                         CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5522                         "Receive Buffer Packet Allocation");
5523         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5524                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5525                         "Flow Control High Watermark");
5526         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5527                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5528                         "Flow Control Low Watermark");
5529
5530         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5531                 struct lro_ctrl *lro = &rxr->lro;
5532
5533                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5534                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5535                                             CTLFLAG_RD, NULL, "Queue Name");
5536                 queue_list = SYSCTL_CHILDREN(queue_node);
5537
5538                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", 
5539                                 CTLFLAG_RD, &adapter->queues[i],
5540                                 sizeof(&adapter->queues[i]),
5541                                 igb_sysctl_interrupt_rate_handler,
5542                                 "IU", "Interrupt Rate");
5543
5544                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5545                                 CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5546                                 igb_sysctl_reg_handler, "IU",
5547                                 "Transmit Descriptor Head");
5548                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5549                                 CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5550                                 igb_sysctl_reg_handler, "IU",
5551                                 "Transmit Descriptor Tail");
5552                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5553                                 CTLFLAG_RD, &txr->no_desc_avail,
5554                                 "Queue No Descriptor Available");
5555                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5556                                 CTLFLAG_RD, &txr->tx_packets,
5557                                 "Queue Packets Transmitted");
5558
5559                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5560                                 CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5561                                 igb_sysctl_reg_handler, "IU",
5562                                 "Receive Descriptor Head");
5563                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5564                                 CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5565                                 igb_sysctl_reg_handler, "IU",
5566                                 "Receive Descriptor Tail");
5567                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5568                                 CTLFLAG_RD, &rxr->rx_packets,
5569                                 "Queue Packets Received");
5570                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5571                                 CTLFLAG_RD, &rxr->rx_bytes,
5572                                 "Queue Bytes Received");
5573                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5574                                 CTLFLAG_RD, &lro->lro_queued, 0,
5575                                 "LRO Queued");
5576                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5577                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5578                                 "LRO Flushed");
5579         }
5580
5581         /* MAC stats get their own sub node */
5582
5583         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5584                                     CTLFLAG_RD, NULL, "MAC Statistics");
5585         stat_list = SYSCTL_CHILDREN(stat_node);
5586
5587         /*
5588         ** VF adapter has a very limited set of stats
5589         ** since its not managing the metal, so to speak.
5590         */
5591         if (adapter->vf_ifp) {
5592         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5593                         CTLFLAG_RD, &stats->gprc,
5594                         "Good Packets Received");
5595         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5596                         CTLFLAG_RD, &stats->gptc,
5597                         "Good Packets Transmitted");
5598         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5599                         CTLFLAG_RD, &stats->gorc, 
5600                         "Good Octets Received"); 
5601         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5602                         CTLFLAG_RD, &stats->gotc, 
5603                         "Good Octets Transmitted"); 
5604         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5605                         CTLFLAG_RD, &stats->mprc,
5606                         "Multicast Packets Received");
5607                 return;
5608         }
5609
5610         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
5611                         CTLFLAG_RD, &stats->ecol,
5612                         "Excessive collisions");
5613         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
5614                         CTLFLAG_RD, &stats->scc,
5615                         "Single collisions");
5616         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
5617                         CTLFLAG_RD, &stats->mcc,
5618                         "Multiple collisions");
5619         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
5620                         CTLFLAG_RD, &stats->latecol,
5621                         "Late collisions");
5622         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
5623                         CTLFLAG_RD, &stats->colc,
5624                         "Collision Count");
5625         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5626                         CTLFLAG_RD, &stats->symerrs,
5627                         "Symbol Errors");
5628         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5629                         CTLFLAG_RD, &stats->sec,
5630                         "Sequence Errors");
5631         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5632                         CTLFLAG_RD, &stats->dc,
5633                         "Defer Count");
5634         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5635                         CTLFLAG_RD, &stats->mpc,
5636                         "Missed Packets");
5637         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5638                         CTLFLAG_RD, &stats->rnbc,
5639                         "Receive No Buffers");
5640         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5641                         CTLFLAG_RD, &stats->ruc,
5642                         "Receive Undersize");
5643         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5644                         CTLFLAG_RD, &stats->rfc,
5645                         "Fragmented Packets Received ");
5646         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5647                         CTLFLAG_RD, &stats->roc,
5648                         "Oversized Packets Received");
5649         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5650                         CTLFLAG_RD, &stats->rjc,
5651                         "Recevied Jabber");
5652         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5653                         CTLFLAG_RD, &stats->rxerrc,
5654                         "Receive Errors");
5655         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5656                         CTLFLAG_RD, &stats->crcerrs,
5657                         "CRC errors");
5658         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5659                         CTLFLAG_RD, &stats->algnerrc,
5660                         "Alignment Errors");
5661         /* On 82575 these are collision counts */
5662         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5663                         CTLFLAG_RD, &stats->cexterr,
5664                         "Collision/Carrier extension errors");
5665         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5666                         CTLFLAG_RD, &stats->xonrxc,
5667                         "XON Received");
5668         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5669                         CTLFLAG_RD, &stats->xontxc,
5670                         "XON Transmitted");
5671         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5672                         CTLFLAG_RD, &stats->xoffrxc,
5673                         "XOFF Received");
5674         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5675                         CTLFLAG_RD, &stats->xofftxc,
5676                         "XOFF Transmitted");
5677         /* Packet Reception Stats */
5678         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5679                         CTLFLAG_RD, &stats->tpr,
5680                         "Total Packets Received ");
5681         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5682                         CTLFLAG_RD, &stats->gprc,
5683                         "Good Packets Received");
5684         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5685                         CTLFLAG_RD, &stats->bprc,
5686                         "Broadcast Packets Received");
5687         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5688                         CTLFLAG_RD, &stats->mprc,
5689                         "Multicast Packets Received");
5690         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5691                         CTLFLAG_RD, &stats->prc64,
5692                         "64 byte frames received ");
5693         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5694                         CTLFLAG_RD, &stats->prc127,
5695                         "65-127 byte frames received");
5696         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5697                         CTLFLAG_RD, &stats->prc255,
5698                         "128-255 byte frames received");
5699         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5700                         CTLFLAG_RD, &stats->prc511,
5701                         "256-511 byte frames received");
5702         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5703                         CTLFLAG_RD, &stats->prc1023,
5704                         "512-1023 byte frames received");
5705         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5706                         CTLFLAG_RD, &stats->prc1522,
5707                         "1023-1522 byte frames received");
5708         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5709                         CTLFLAG_RD, &stats->gorc, 
5710                         "Good Octets Received"); 
5711
5712         /* Packet Transmission Stats */
5713         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5714                         CTLFLAG_RD, &stats->gotc, 
5715                         "Good Octets Transmitted"); 
5716         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5717                         CTLFLAG_RD, &stats->tpt,
5718                         "Total Packets Transmitted");
5719         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5720                         CTLFLAG_RD, &stats->gptc,
5721                         "Good Packets Transmitted");
5722         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5723                         CTLFLAG_RD, &stats->bptc,
5724                         "Broadcast Packets Transmitted");
5725         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5726                         CTLFLAG_RD, &stats->mptc,
5727                         "Multicast Packets Transmitted");
5728         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5729                         CTLFLAG_RD, &stats->ptc64,
5730                         "64 byte frames transmitted ");
5731         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5732                         CTLFLAG_RD, &stats->ptc127,
5733                         "65-127 byte frames transmitted");
5734         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5735                         CTLFLAG_RD, &stats->ptc255,
5736                         "128-255 byte frames transmitted");
5737         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5738                         CTLFLAG_RD, &stats->ptc511,
5739                         "256-511 byte frames transmitted");
5740         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5741                         CTLFLAG_RD, &stats->ptc1023,
5742                         "512-1023 byte frames transmitted");
5743         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5744                         CTLFLAG_RD, &stats->ptc1522,
5745                         "1024-1522 byte frames transmitted");
5746         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5747                         CTLFLAG_RD, &stats->tsctc,
5748                         "TSO Contexts Transmitted");
5749         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5750                         CTLFLAG_RD, &stats->tsctfc,
5751                         "TSO Contexts Failed");
5752
5753
5754         /* Interrupt Stats */
5755
5756         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5757                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5758         int_list = SYSCTL_CHILDREN(int_node);
5759
5760         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5761                         CTLFLAG_RD, &stats->iac,
5762                         "Interrupt Assertion Count");
5763
5764         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5765                         CTLFLAG_RD, &stats->icrxptc,
5766                         "Interrupt Cause Rx Pkt Timer Expire Count");
5767
5768         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5769                         CTLFLAG_RD, &stats->icrxatc,
5770                         "Interrupt Cause Rx Abs Timer Expire Count");
5771
5772         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5773                         CTLFLAG_RD, &stats->ictxptc,
5774                         "Interrupt Cause Tx Pkt Timer Expire Count");
5775
5776         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5777                         CTLFLAG_RD, &stats->ictxatc,
5778                         "Interrupt Cause Tx Abs Timer Expire Count");
5779
5780         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5781                         CTLFLAG_RD, &stats->ictxqec,
5782                         "Interrupt Cause Tx Queue Empty Count");
5783
5784         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5785                         CTLFLAG_RD, &stats->ictxqmtc,
5786                         "Interrupt Cause Tx Queue Min Thresh Count");
5787
5788         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5789                         CTLFLAG_RD, &stats->icrxdmtc,
5790                         "Interrupt Cause Rx Desc Min Thresh Count");
5791
5792         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5793                         CTLFLAG_RD, &stats->icrxoc,
5794                         "Interrupt Cause Receiver Overrun Count");
5795
5796         /* Host to Card Stats */
5797
5798         host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", 
5799                                     CTLFLAG_RD, NULL, 
5800                                     "Host to Card Statistics");
5801
5802         host_list = SYSCTL_CHILDREN(host_node);
5803
5804         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5805                         CTLFLAG_RD, &stats->cbtmpc,
5806                         "Circuit Breaker Tx Packet Count");
5807
5808         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5809                         CTLFLAG_RD, &stats->htdpmc,
5810                         "Host Transmit Discarded Packets");
5811
5812         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5813                         CTLFLAG_RD, &stats->rpthc,
5814                         "Rx Packets To Host");
5815
5816         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5817                         CTLFLAG_RD, &stats->cbrmpc,
5818                         "Circuit Breaker Rx Packet Count");
5819
5820         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5821                         CTLFLAG_RD, &stats->cbrdpc,
5822                         "Circuit Breaker Rx Dropped Count");
5823
5824         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5825                         CTLFLAG_RD, &stats->hgptc,
5826                         "Host Good Packets Tx Count");
5827
5828         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5829                         CTLFLAG_RD, &stats->htcbdpc,
5830                         "Host Tx Circuit Breaker Dropped Count");
5831
5832         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5833                         CTLFLAG_RD, &stats->hgorc,
5834                         "Host Good Octets Received Count");
5835
5836         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5837                         CTLFLAG_RD, &stats->hgotc,
5838                         "Host Good Octets Transmit Count");
5839
5840         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5841                         CTLFLAG_RD, &stats->lenerrs,
5842                         "Length Errors");
5843
5844         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5845                         CTLFLAG_RD, &stats->scvpc,
5846                         "SerDes/SGMII Code Violation Pkt Count");
5847
5848         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5849                         CTLFLAG_RD, &stats->hrmpc,
5850                         "Header Redirection Missed Packet Count");
5851 }
5852
5853
5854 /**********************************************************************
5855  *
5856  *  This routine provides a way to dump out the adapter eeprom,
5857  *  often a useful debug/service tool. This only dumps the first
5858  *  32 words, stuff that matters is in that extent.
5859  *
5860  **********************************************************************/
5861 static int
5862 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5863 {
5864         struct adapter *adapter;
5865         int error;
5866         int result;
5867
5868         result = -1;
5869         error = sysctl_handle_int(oidp, &result, 0, req);
5870
5871         if (error || !req->newptr)
5872                 return (error);
5873
5874         /*
5875          * This value will cause a hex dump of the
5876          * first 32 16-bit words of the EEPROM to
5877          * the screen.
5878          */
5879         if (result == 1) {
5880                 adapter = (struct adapter *)arg1;
5881                 igb_print_nvm_info(adapter);
5882         }
5883
5884         return (error);
5885 }
5886
5887 static void
5888 igb_print_nvm_info(struct adapter *adapter)
5889 {
5890         u16     eeprom_data;
5891         int     i, j, row = 0;
5892
5893         /* Its a bit crude, but it gets the job done */
5894         printf("\nInterface EEPROM Dump:\n");
5895         printf("Offset\n0x0000  ");
5896         for (i = 0, j = 0; i < 32; i++, j++) {
5897                 if (j == 8) { /* Make the offset block */
5898                         j = 0; ++row;
5899                         printf("\n0x00%x0  ",row);
5900                 }
5901                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5902                 printf("%04x ", eeprom_data);
5903         }
5904         printf("\n");
5905 }
5906
5907 static void
5908 igb_set_sysctl_value(struct adapter *adapter, const char *name,
5909         const char *description, int *limit, int value)
5910 {
5911         *limit = value;
5912         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5913             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5914             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5915 }
5916
5917 /*
5918 ** Set flow control using sysctl:
5919 ** Flow control values:
5920 **      0 - off
5921 **      1 - rx pause
5922 **      2 - tx pause
5923 **      3 - full
5924 */
5925 static int
5926 igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5927 {
5928         int             error;
5929         static int      input = 3; /* default is full */
5930         struct adapter  *adapter = (struct adapter *) arg1;
5931
5932         error = sysctl_handle_int(oidp, &input, 0, req);
5933
5934         if ((error) || (req->newptr == NULL))
5935                 return (error);
5936
5937         switch (input) {
5938                 case e1000_fc_rx_pause:
5939                 case e1000_fc_tx_pause:
5940                 case e1000_fc_full:
5941                 case e1000_fc_none:
5942                         adapter->hw.fc.requested_mode = input;
5943                         adapter->fc = input;
5944                         break;
5945                 default:
5946                         /* Do nothing */
5947                         return (error);
5948         }
5949
5950         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5951         e1000_force_mac_fc(&adapter->hw);
5952         return (error);
5953 }
5954
5955 /*
5956 ** Manage DMA Coalesce:
5957 ** Control values:
5958 **      0/1 - off/on
5959 **      Legal timer values are:
5960 **      250,500,1000-10000 in thousands
5961 */
5962 static int
5963 igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
5964 {
5965         struct adapter *adapter = (struct adapter *) arg1;
5966         int             error;
5967
5968         error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
5969
5970         if ((error) || (req->newptr == NULL))
5971                 return (error);
5972
5973         switch (adapter->dmac) {
5974                 case 0:
5975                         /*Disabling */
5976                         break;
5977                 case 1: /* Just enable and use default */
5978                         adapter->dmac = 1000;
5979                         break;
5980                 case 250:
5981                 case 500:
5982                 case 1000:
5983                 case 2000:
5984                 case 3000:
5985                 case 4000:
5986                 case 5000:
5987                 case 6000:
5988                 case 7000:
5989                 case 8000:
5990                 case 9000:
5991                 case 10000:
5992                         /* Legal values - allow */
5993                         break;
5994                 default:
5995                         /* Do nothing, illegal value */
5996                         adapter->dmac = 0;
5997                         return (error);
5998         }
5999         /* Reinit the interface */
6000         igb_init(adapter);
6001         return (error);
6002 }
6003
6004 /*
6005 ** Manage Energy Efficient Ethernet:
6006 ** Control values:
6007 **     0/1 - enabled/disabled
6008 */
6009 static int
6010 igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6011 {
6012         struct adapter  *adapter = (struct adapter *) arg1;
6013         int             error, value;
6014
6015         value = adapter->hw.dev_spec._82575.eee_disable;
6016         error = sysctl_handle_int(oidp, &value, 0, req);
6017         if (error || req->newptr == NULL)
6018                 return (error);
6019         IGB_CORE_LOCK(adapter);
6020         adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6021         igb_init_locked(adapter);
6022         IGB_CORE_UNLOCK(adapter);
6023         return (0);
6024 }