]> CyberLeo.Net >> Repos - FreeBSD/stable/8.git/blob - sys/dev/e1000/if_igb.c
MFC r309400:
[FreeBSD/stable/8.git] / sys / dev / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2013, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #ifdef HAVE_KERNEL_OPTION_HEADERS
37 #include "opt_device_polling.h"
38 #include "opt_inet.h"
39 #include "opt_inet6.h"
40 #include "opt_altq.h"
41 #endif
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #if __FreeBSD_version >= 800000
46 #include <sys/buf_ring.h>
47 #endif
48 #include <sys/bus.h>
49 #include <sys/endian.h>
50 #include <sys/kernel.h>
51 #include <sys/kthread.h>
52 #include <sys/malloc.h>
53 #include <sys/mbuf.h>
54 #include <sys/module.h>
55 #include <sys/rman.h>
56 #include <sys/socket.h>
57 #include <sys/sockio.h>
58 #include <sys/sysctl.h>
59 #include <sys/taskqueue.h>
60 #include <sys/eventhandler.h>
61 #include <sys/pcpu.h>
62 #include <sys/smp.h>
63 #include <machine/smp.h>
64 #include <machine/bus.h>
65 #include <machine/resource.h>
66
67 #include <net/bpf.h>
68 #include <net/ethernet.h>
69 #include <net/if.h>
70 #include <net/if_arp.h>
71 #include <net/if_dl.h>
72 #include <net/if_media.h>
73
74 #include <net/if_types.h>
75 #include <net/if_vlan_var.h>
76
77 #include <netinet/in_systm.h>
78 #include <netinet/in.h>
79 #include <netinet/if_ether.h>
80 #include <netinet/ip.h>
81 #include <netinet/ip6.h>
82 #include <netinet/tcp.h>
83 #include <netinet/tcp_lro.h>
84 #include <netinet/udp.h>
85
86 #include <machine/in_cksum.h>
87 #include <dev/led/led.h>
88 #include <dev/pci/pcivar.h>
89 #include <dev/pci/pcireg.h>
90
91 #include "e1000_api.h"
92 #include "e1000_82575.h"
93 #include "if_igb.h"
94
95 /*********************************************************************
96  *  Set this to one to display debug statistics
97  *********************************************************************/
98 int     igb_display_debug_stats = 0;
99
100 /*********************************************************************
101  *  Driver version:
102  *********************************************************************/
103 char igb_driver_version[] = "version - 2.3.9 - 8";
104
105
106 /*********************************************************************
107  *  PCI Device ID Table
108  *
109  *  Used by probe to select devices to load on
110  *  Last field stores an index into e1000_strings
111  *  Last entry must be all 0s
112  *
113  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
114  *********************************************************************/
115
116 static igb_vendor_info_t igb_vendor_info_array[] =
117 {
118         { 0x8086, E1000_DEV_ID_82575EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
119         { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
120                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
122                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82576,           PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82576_NS,        PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82576_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82576_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
129                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
131                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
133                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82576_VF,        PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82580_COPPER,    PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82580_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82580_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_82580_SGMII,     PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
140                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
141         { 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
142                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
143         { 0x8086, E1000_DEV_ID_DH89XXCC_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,  PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_DH89XXCC_SFP,    PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
147                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_I350_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_I350_FIBER,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_I350_SERDES,     PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_I350_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_I350_VF,         PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_I210_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_I210_COPPER_IT,  PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
156                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_I210_FIBER,      PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_I210_SERDES,     PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_I210_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_I211_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
161         /* required last entry */
162         { 0, 0, 0, 0, 0}
163 };
164
165 /*********************************************************************
166  *  Table of branding strings for all supported NICs.
167  *********************************************************************/
168
169 static char *igb_strings[] = {
170         "Intel(R) PRO/1000 Network Connection"
171 };
172
173 /*********************************************************************
174  *  Function prototypes
175  *********************************************************************/
176 static int      igb_probe(device_t);
177 static int      igb_attach(device_t);
178 static int      igb_detach(device_t);
179 static int      igb_shutdown(device_t);
180 static int      igb_suspend(device_t);
181 static int      igb_resume(device_t);
182 #if __FreeBSD_version >= 800000
183 static int      igb_mq_start(struct ifnet *, struct mbuf *);
184 static int      igb_mq_start_locked(struct ifnet *, struct tx_ring *);
185 static void     igb_qflush(struct ifnet *);
186 static void     igb_deferred_mq_start(void *, int);
187 #else
188 static void     igb_start(struct ifnet *);
189 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
190 #endif
191 static int      igb_ioctl(struct ifnet *, u_long, caddr_t);
192 static void     igb_init(void *);
193 static void     igb_init_locked(struct adapter *);
194 static void     igb_stop(void *);
195 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
196 static int      igb_media_change(struct ifnet *);
197 static void     igb_identify_hardware(struct adapter *);
198 static int      igb_allocate_pci_resources(struct adapter *);
199 static int      igb_allocate_msix(struct adapter *);
200 static int      igb_allocate_legacy(struct adapter *);
201 static int      igb_setup_msix(struct adapter *);
202 static void     igb_free_pci_resources(struct adapter *);
203 static void     igb_local_timer(void *);
204 static void     igb_reset(struct adapter *);
205 static int      igb_setup_interface(device_t, struct adapter *);
206 static int      igb_allocate_queues(struct adapter *);
207 static void     igb_configure_queues(struct adapter *);
208
209 static int      igb_allocate_transmit_buffers(struct tx_ring *);
210 static void     igb_setup_transmit_structures(struct adapter *);
211 static void     igb_setup_transmit_ring(struct tx_ring *);
212 static void     igb_initialize_transmit_units(struct adapter *);
213 static void     igb_free_transmit_structures(struct adapter *);
214 static void     igb_free_transmit_buffers(struct tx_ring *);
215
216 static int      igb_allocate_receive_buffers(struct rx_ring *);
217 static int      igb_setup_receive_structures(struct adapter *);
218 static int      igb_setup_receive_ring(struct rx_ring *);
219 static void     igb_initialize_receive_units(struct adapter *);
220 static void     igb_free_receive_structures(struct adapter *);
221 static void     igb_free_receive_buffers(struct rx_ring *);
222 static void     igb_free_receive_ring(struct rx_ring *);
223
224 static void     igb_enable_intr(struct adapter *);
225 static void     igb_disable_intr(struct adapter *);
226 static void     igb_update_stats_counters(struct adapter *);
227 static bool     igb_txeof(struct tx_ring *);
228
229 static __inline void igb_rx_discard(struct rx_ring *, int);
230 static __inline void igb_rx_input(struct rx_ring *,
231                     struct ifnet *, struct mbuf *, u32);
232
233 static bool     igb_rxeof(struct igb_queue *, int, int *);
234 static void     igb_rx_checksum(u32, struct mbuf *, u32);
235 static bool     igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
236 static bool     igb_tso_setup(struct tx_ring *, struct mbuf *, int,
237                     struct ip *, struct tcphdr *);
238 static void     igb_set_promisc(struct adapter *);
239 static void     igb_disable_promisc(struct adapter *);
240 static void     igb_set_multi(struct adapter *);
241 static void     igb_update_link_status(struct adapter *);
242 static void     igb_refresh_mbufs(struct rx_ring *, int);
243
244 static void     igb_register_vlan(void *, struct ifnet *, u16);
245 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
246 static void     igb_setup_vlan_hw_support(struct adapter *);
247
248 static int      igb_xmit(struct tx_ring *, struct mbuf **);
249 static int      igb_dma_malloc(struct adapter *, bus_size_t,
250                     struct igb_dma_alloc *, int);
251 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
252 static int      igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
253 static void     igb_print_nvm_info(struct adapter *);
254 static int      igb_is_valid_ether_addr(u8 *);
255 static void     igb_add_hw_stats(struct adapter *);
256
257 static void     igb_vf_init_stats(struct adapter *);
258 static void     igb_update_vf_stats_counters(struct adapter *);
259
260 /* Management and WOL Support */
261 static void     igb_init_manageability(struct adapter *);
262 static void     igb_release_manageability(struct adapter *);
263 static void     igb_get_hw_control(struct adapter *);
264 static void     igb_release_hw_control(struct adapter *);
265 static void     igb_enable_wakeup(device_t);
266 static void     igb_led_func(void *, int);
267
268 static int      igb_irq_fast(void *);
269 static void     igb_msix_que(void *);
270 static void     igb_msix_link(void *);
271 static void     igb_handle_que(void *context, int pending);
272 static void     igb_handle_link(void *context, int pending);
273 static void     igb_handle_link_locked(struct adapter *);
274
275 static void     igb_set_sysctl_value(struct adapter *, const char *,
276                     const char *, int *, int);
277 static int      igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
278 static int      igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
279 static int      igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
280
281 #ifdef DEVICE_POLLING
282 static poll_handler_t igb_poll;
283 #endif /* POLLING */
284
285 /*********************************************************************
286  *  FreeBSD Device Interface Entry Points
287  *********************************************************************/
288
289 static device_method_t igb_methods[] = {
290         /* Device interface */
291         DEVMETHOD(device_probe, igb_probe),
292         DEVMETHOD(device_attach, igb_attach),
293         DEVMETHOD(device_detach, igb_detach),
294         DEVMETHOD(device_shutdown, igb_shutdown),
295         DEVMETHOD(device_suspend, igb_suspend),
296         DEVMETHOD(device_resume, igb_resume),
297         DEVMETHOD_END
298 };
299
300 static driver_t igb_driver = {
301         "igb", igb_methods, sizeof(struct adapter),
302 };
303
304 static devclass_t igb_devclass;
305 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
306 MODULE_DEPEND(igb, pci, 1, 1, 1);
307 MODULE_DEPEND(igb, ether, 1, 1, 1);
308
309 /*********************************************************************
310  *  Tunable default values.
311  *********************************************************************/
312
313 static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
314
315 /* Descriptor defaults */
316 static int igb_rxd = IGB_DEFAULT_RXD;
317 static int igb_txd = IGB_DEFAULT_TXD;
318 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
319 TUNABLE_INT("hw.igb.txd", &igb_txd);
320 SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
321     "Number of receive descriptors per queue");
322 SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
323     "Number of transmit descriptors per queue");
324
325 /*
326 ** AIM: Adaptive Interrupt Moderation
327 ** which means that the interrupt rate
328 ** is varied over time based on the
329 ** traffic for that interrupt vector
330 */
331 static int igb_enable_aim = TRUE;
332 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
333 SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
334     "Enable adaptive interrupt moderation");
335
336 /*
337  * MSIX should be the default for best performance,
338  * but this allows it to be forced off for testing.
339  */         
340 static int igb_enable_msix = 1;
341 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
342 SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
343     "Enable MSI-X interrupts");
344
345 /*
346 ** Tuneable Interrupt rate
347 */
348 static int igb_max_interrupt_rate = 8000;
349 TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
350 SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
351     &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
352
353 #if __FreeBSD_version >= 800000
354 /*
355 ** Tuneable number of buffers in the buf-ring (drbr_xxx)
356 */
357 static int igb_buf_ring_size = IGB_BR_SIZE;
358 TUNABLE_INT("hw.igb.buf_ring_size", &igb_buf_ring_size);
359 SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
360     &igb_buf_ring_size, 0, "Size of the bufring");
361 #endif
362
363 /*
364 ** Header split causes the packet header to
365 ** be dma'd to a seperate mbuf from the payload.
366 ** this can have memory alignment benefits. But
367 ** another plus is that small packets often fit
368 ** into the header and thus use no cluster. Its
369 ** a very workload dependent type feature.
370 */
371 static int igb_header_split = FALSE;
372 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
373 SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
374     "Enable receive mbuf header split");
375
376 /*
377 ** This will autoconfigure based on
378 ** the number of CPUs if left at 0.
379 */
380 static int igb_num_queues = 0;
381 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
382 SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
383     "Number of queues to configure, 0 indicates autoconfigure");
384
385 /*
386 ** Global variable to store last used CPU when binding queues
387 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
388 ** queue is bound to a cpu.
389 */
390 static int igb_last_bind_cpu = -1;
391
392 /* How many packets rxeof tries to clean at a time */
393 static int igb_rx_process_limit = 100;
394 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
395 SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
396     &igb_rx_process_limit, 0,
397     "Maximum number of received packets to process at a time, -1 means unlimited");
398
399 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
400 #include <dev/netmap/if_igb_netmap.h>
401 #endif /* DEV_NETMAP */
402 /*********************************************************************
403  *  Device identification routine
404  *
405  *  igb_probe determines if the driver should be loaded on
406  *  adapter based on PCI vendor/device id of the adapter.
407  *
408  *  return BUS_PROBE_DEFAULT on success, positive on failure
409  *********************************************************************/
410
411 static int
412 igb_probe(device_t dev)
413 {
414         char            adapter_name[60];
415         uint16_t        pci_vendor_id = 0;
416         uint16_t        pci_device_id = 0;
417         uint16_t        pci_subvendor_id = 0;
418         uint16_t        pci_subdevice_id = 0;
419         igb_vendor_info_t *ent;
420
421         INIT_DEBUGOUT("igb_probe: begin");
422
423         pci_vendor_id = pci_get_vendor(dev);
424         if (pci_vendor_id != IGB_VENDOR_ID)
425                 return (ENXIO);
426
427         pci_device_id = pci_get_device(dev);
428         pci_subvendor_id = pci_get_subvendor(dev);
429         pci_subdevice_id = pci_get_subdevice(dev);
430
431         ent = igb_vendor_info_array;
432         while (ent->vendor_id != 0) {
433                 if ((pci_vendor_id == ent->vendor_id) &&
434                     (pci_device_id == ent->device_id) &&
435
436                     ((pci_subvendor_id == ent->subvendor_id) ||
437                     (ent->subvendor_id == PCI_ANY_ID)) &&
438
439                     ((pci_subdevice_id == ent->subdevice_id) ||
440                     (ent->subdevice_id == PCI_ANY_ID))) {
441                         sprintf(adapter_name, "%s %s",
442                                 igb_strings[ent->index],
443                                 igb_driver_version);
444                         device_set_desc_copy(dev, adapter_name);
445                         return (BUS_PROBE_DEFAULT);
446                 }
447                 ent++;
448         }
449
450         return (ENXIO);
451 }
452
453 /*********************************************************************
454  *  Device initialization routine
455  *
456  *  The attach entry point is called when the driver is being loaded.
457  *  This routine identifies the type of hardware, allocates all resources
458  *  and initializes the hardware.
459  *
460  *  return 0 on success, positive on failure
461  *********************************************************************/
462
463 static int
464 igb_attach(device_t dev)
465 {
466         struct adapter  *adapter;
467         int             error = 0;
468         u16             eeprom_data;
469
470         INIT_DEBUGOUT("igb_attach: begin");
471
472         if (resource_disabled("igb", device_get_unit(dev))) {
473                 device_printf(dev, "Disabled by device hint\n");
474                 return (ENXIO);
475         }
476
477         adapter = device_get_softc(dev);
478         adapter->dev = adapter->osdep.dev = dev;
479         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
480
481         /* SYSCTL stuff */
482         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
483             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
484             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
485             igb_sysctl_nvm_info, "I", "NVM Information");
486
487         igb_set_sysctl_value(adapter, "enable_aim",
488             "Interrupt Moderation", &adapter->enable_aim,
489             igb_enable_aim);
490
491         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
492             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
493             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
494             adapter, 0, igb_set_flowcntl, "I", "Flow Control");
495
496         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
497
498         /* Determine hardware and mac info */
499         igb_identify_hardware(adapter);
500
501         /* Setup PCI resources */
502         if (igb_allocate_pci_resources(adapter)) {
503                 device_printf(dev, "Allocation of PCI resources failed\n");
504                 error = ENXIO;
505                 goto err_pci;
506         }
507
508         /* Do Shared Code initialization */
509         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
510                 device_printf(dev, "Setup of Shared code failed\n");
511                 error = ENXIO;
512                 goto err_pci;
513         }
514
515         e1000_get_bus_info(&adapter->hw);
516
517         /* Sysctl for limiting the amount of work done in the taskqueue */
518         igb_set_sysctl_value(adapter, "rx_processing_limit",
519             "max number of rx packets to process",
520             &adapter->rx_process_limit, igb_rx_process_limit);
521
522         /*
523          * Validate number of transmit and receive descriptors. It
524          * must not exceed hardware maximum, and must be multiple
525          * of E1000_DBA_ALIGN.
526          */
527         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
528             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
529                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
530                     IGB_DEFAULT_TXD, igb_txd);
531                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
532         } else
533                 adapter->num_tx_desc = igb_txd;
534         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
535             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
536                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
537                     IGB_DEFAULT_RXD, igb_rxd);
538                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
539         } else
540                 adapter->num_rx_desc = igb_rxd;
541
542         adapter->hw.mac.autoneg = DO_AUTO_NEG;
543         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
544         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
545
546         /* Copper options */
547         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
548                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
549                 adapter->hw.phy.disable_polarity_correction = FALSE;
550                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
551         }
552
553         /*
554          * Set the frame limits assuming
555          * standard ethernet sized frames.
556          */
557         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
558         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
559
560         /*
561         ** Allocate and Setup Queues
562         */
563         if (igb_allocate_queues(adapter)) {
564                 error = ENOMEM;
565                 goto err_pci;
566         }
567
568         /* Allocate the appropriate stats memory */
569         if (adapter->vf_ifp) {
570                 adapter->stats =
571                     (struct e1000_vf_stats *)malloc(sizeof \
572                     (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
573                 igb_vf_init_stats(adapter);
574         } else
575                 adapter->stats =
576                     (struct e1000_hw_stats *)malloc(sizeof \
577                     (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
578         if (adapter->stats == NULL) {
579                 device_printf(dev, "Can not allocate stats memory\n");
580                 error = ENOMEM;
581                 goto err_late;
582         }
583
584         /* Allocate multicast array memory. */
585         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
586             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
587         if (adapter->mta == NULL) {
588                 device_printf(dev, "Can not allocate multicast setup array\n");
589                 error = ENOMEM;
590                 goto err_late;
591         }
592
593         /* Some adapter-specific advanced features */
594         if (adapter->hw.mac.type >= e1000_i350) {
595                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
596                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
597                     OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
598                     adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
599                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
600                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
601                     OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
602                     adapter, 0, igb_sysctl_eee, "I",
603                     "Disable Energy Efficient Ethernet");
604                 if (adapter->hw.phy.media_type == e1000_media_type_copper)
605                         e1000_set_eee_i350(&adapter->hw);
606         }
607
608         /*
609         ** Start from a known state, this is
610         ** important in reading the nvm and
611         ** mac from that.
612         */
613         e1000_reset_hw(&adapter->hw);
614
615         /* Make sure we have a good EEPROM before we read from it */
616         if (((adapter->hw.mac.type != e1000_i210) &&
617             (adapter->hw.mac.type != e1000_i211)) &&
618             (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
619                 /*
620                 ** Some PCI-E parts fail the first check due to
621                 ** the link being in sleep state, call it again,
622                 ** if it fails a second time its a real issue.
623                 */
624                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
625                         device_printf(dev,
626                             "The EEPROM Checksum Is Not Valid\n");
627                         error = EIO;
628                         goto err_late;
629                 }
630         }
631
632         /*
633         ** Copy the permanent MAC address out of the EEPROM
634         */
635         if (e1000_read_mac_addr(&adapter->hw) < 0) {
636                 device_printf(dev, "EEPROM read error while reading MAC"
637                     " address\n");
638                 error = EIO;
639                 goto err_late;
640         }
641         /* Check its sanity */
642         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
643                 device_printf(dev, "Invalid MAC address\n");
644                 error = EIO;
645                 goto err_late;
646         }
647
648         /* Setup OS specific network interface */
649         if (igb_setup_interface(dev, adapter) != 0)
650                 goto err_late;
651
652         /* Now get a good starting state */
653         igb_reset(adapter);
654
655         /* Initialize statistics */
656         igb_update_stats_counters(adapter);
657
658         adapter->hw.mac.get_link_status = 1;
659         igb_update_link_status(adapter);
660
661         /* Indicate SOL/IDER usage */
662         if (e1000_check_reset_block(&adapter->hw))
663                 device_printf(dev,
664                     "PHY reset is blocked due to SOL/IDER session.\n");
665
666         /* Determine if we have to control management hardware */
667         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
668
669         /*
670          * Setup Wake-on-Lan
671          */
672         /* APME bit in EEPROM is mapped to WUC.APME */
673         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
674         if (eeprom_data)
675                 adapter->wol = E1000_WUFC_MAG;
676
677         /* Register for VLAN events */
678         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
679              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
680         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
681              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
682
683         igb_add_hw_stats(adapter);
684
685         /* Tell the stack that the interface is not active */
686         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
687         adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
688
689         adapter->led_dev = led_create(igb_led_func, adapter,
690             device_get_nameunit(dev));
691
692         /* 
693         ** Configure Interrupts
694         */
695         if ((adapter->msix > 1) && (igb_enable_msix))
696                 error = igb_allocate_msix(adapter);
697         else /* MSI or Legacy */
698                 error = igb_allocate_legacy(adapter);
699         if (error)
700                 goto err_late;
701
702 #ifdef DEV_NETMAP
703         igb_netmap_attach(adapter);
704 #endif /* DEV_NETMAP */
705         INIT_DEBUGOUT("igb_attach: end");
706
707         return (0);
708
709 err_late:
710         igb_detach(dev);
711         igb_free_transmit_structures(adapter);
712         igb_free_receive_structures(adapter);
713         igb_release_hw_control(adapter);
714 err_pci:
715         igb_free_pci_resources(adapter);
716         if (adapter->ifp != NULL)
717                 if_free(adapter->ifp);
718         free(adapter->mta, M_DEVBUF);
719         IGB_CORE_LOCK_DESTROY(adapter);
720
721         return (error);
722 }
723
724 /*********************************************************************
725  *  Device removal routine
726  *
727  *  The detach entry point is called when the driver is being removed.
728  *  This routine stops the adapter and deallocates all the resources
729  *  that were allocated for driver operation.
730  *
731  *  return 0 on success, positive on failure
732  *********************************************************************/
733
734 static int
735 igb_detach(device_t dev)
736 {
737         struct adapter  *adapter = device_get_softc(dev);
738         struct ifnet    *ifp = adapter->ifp;
739
740         INIT_DEBUGOUT("igb_detach: begin");
741
742         /* Make sure VLANS are not using driver */
743         if (adapter->ifp->if_vlantrunk != NULL) {
744                 device_printf(dev,"Vlan in use, detach first\n");
745                 return (EBUSY);
746         }
747
748         ether_ifdetach(adapter->ifp);
749
750         if (adapter->led_dev != NULL)
751                 led_destroy(adapter->led_dev);
752
753 #ifdef DEVICE_POLLING
754         if (ifp->if_capenable & IFCAP_POLLING)
755                 ether_poll_deregister(ifp);
756 #endif
757
758         IGB_CORE_LOCK(adapter);
759         adapter->in_detach = 1;
760         igb_stop(adapter);
761         IGB_CORE_UNLOCK(adapter);
762
763         e1000_phy_hw_reset(&adapter->hw);
764
765         /* Give control back to firmware */
766         igb_release_manageability(adapter);
767         igb_release_hw_control(adapter);
768
769         if (adapter->wol) {
770                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
771                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
772                 igb_enable_wakeup(dev);
773         }
774
775         /* Unregister VLAN events */
776         if (adapter->vlan_attach != NULL)
777                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
778         if (adapter->vlan_detach != NULL)
779                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
780
781         callout_drain(&adapter->timer);
782
783 #ifdef DEV_NETMAP
784         netmap_detach(adapter->ifp);
785 #endif /* DEV_NETMAP */
786         igb_free_pci_resources(adapter);
787         bus_generic_detach(dev);
788         if_free(ifp);
789
790         igb_free_transmit_structures(adapter);
791         igb_free_receive_structures(adapter);
792         if (adapter->mta != NULL)
793                 free(adapter->mta, M_DEVBUF);
794
795         IGB_CORE_LOCK_DESTROY(adapter);
796
797         return (0);
798 }
799
800 /*********************************************************************
801  *
802  *  Shutdown entry point
803  *
804  **********************************************************************/
805
806 static int
807 igb_shutdown(device_t dev)
808 {
809         return igb_suspend(dev);
810 }
811
812 /*
813  * Suspend/resume device methods.
814  */
815 static int
816 igb_suspend(device_t dev)
817 {
818         struct adapter *adapter = device_get_softc(dev);
819
820         IGB_CORE_LOCK(adapter);
821
822         igb_stop(adapter);
823
824         igb_release_manageability(adapter);
825         igb_release_hw_control(adapter);
826
827         if (adapter->wol) {
828                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
829                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
830                 igb_enable_wakeup(dev);
831         }
832
833         IGB_CORE_UNLOCK(adapter);
834
835         return bus_generic_suspend(dev);
836 }
837
838 static int
839 igb_resume(device_t dev)
840 {
841         struct adapter *adapter = device_get_softc(dev);
842         struct tx_ring  *txr = adapter->tx_rings;
843         struct ifnet *ifp = adapter->ifp;
844
845         IGB_CORE_LOCK(adapter);
846         igb_init_locked(adapter);
847         igb_init_manageability(adapter);
848
849         if ((ifp->if_flags & IFF_UP) &&
850             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
851                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
852                         IGB_TX_LOCK(txr);
853 #if __FreeBSD_version >= 800000
854                         /* Process the stack queue only if not depleted */
855                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
856                             !drbr_empty(ifp, txr->br))
857                                 igb_mq_start_locked(ifp, txr);
858 #else
859                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
860                                 igb_start_locked(txr, ifp);
861 #endif
862                         IGB_TX_UNLOCK(txr);
863                 }
864         }
865         IGB_CORE_UNLOCK(adapter);
866
867         return bus_generic_resume(dev);
868 }
869
870
871 #if __FreeBSD_version < 800000
872
873 /*********************************************************************
874  *  Transmit entry point
875  *
876  *  igb_start is called by the stack to initiate a transmit.
877  *  The driver will remain in this routine as long as there are
878  *  packets to transmit and transmit resources are available.
879  *  In case resources are not available stack is notified and
880  *  the packet is requeued.
881  **********************************************************************/
882
883 static void
884 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
885 {
886         struct adapter  *adapter = ifp->if_softc;
887         struct mbuf     *m_head;
888
889         IGB_TX_LOCK_ASSERT(txr);
890
891         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
892             IFF_DRV_RUNNING)
893                 return;
894         if (!adapter->link_active)
895                 return;
896
897         /* Call cleanup if number of TX descriptors low */
898         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
899                 igb_txeof(txr);
900
901         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
902                 if (txr->tx_avail <= IGB_MAX_SCATTER) {
903                         txr->queue_status |= IGB_QUEUE_DEPLETED;
904                         break;
905                 }
906                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
907                 if (m_head == NULL)
908                         break;
909                 /*
910                  *  Encapsulation can modify our pointer, and or make it
911                  *  NULL on failure.  In that event, we can't requeue.
912                  */
913                 if (igb_xmit(txr, &m_head)) {
914                         if (m_head != NULL)
915                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
916                         if (txr->tx_avail <= IGB_MAX_SCATTER)
917                                 txr->queue_status |= IGB_QUEUE_DEPLETED;
918                         break;
919                 }
920
921                 /* Send a copy of the frame to the BPF listener */
922                 ETHER_BPF_MTAP(ifp, m_head);
923
924                 /* Set watchdog on */
925                 txr->watchdog_time = ticks;
926                 txr->queue_status |= IGB_QUEUE_WORKING;
927         }
928 }
929  
930 /*
931  * Legacy TX driver routine, called from the
932  * stack, always uses tx[0], and spins for it.
933  * Should not be used with multiqueue tx
934  */
935 static void
936 igb_start(struct ifnet *ifp)
937 {
938         struct adapter  *adapter = ifp->if_softc;
939         struct tx_ring  *txr = adapter->tx_rings;
940
941         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
942                 IGB_TX_LOCK(txr);
943                 igb_start_locked(txr, ifp);
944                 IGB_TX_UNLOCK(txr);
945         }
946         return;
947 }
948
949 #else /* __FreeBSD_version >= 800000 */
950
951 /*
952 ** Multiqueue Transmit Entry:
953 **  quick turnaround to the stack
954 **
955 */
956 static int
957 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
958 {
959         struct adapter          *adapter = ifp->if_softc;
960         struct igb_queue        *que;
961         struct tx_ring          *txr;
962         int                     i, err = 0;
963
964         /* Which queue to use */
965         if ((m->m_flags & M_FLOWID) != 0)
966                 i = m->m_pkthdr.flowid % adapter->num_queues;
967         else
968                 i = curcpu % adapter->num_queues;
969         txr = &adapter->tx_rings[i];
970         que = &adapter->queues[i];
971
972         err = drbr_enqueue(ifp, txr->br, m);
973         taskqueue_enqueue(que->tq, &txr->txq_task);
974
975         return (err);
976 }
977
978 static int
979 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
980 {
981         struct adapter  *adapter = txr->adapter;
982         struct mbuf     *buf;
983         int             err = 0, enq = 0;
984
985         IGB_TX_LOCK_ASSERT(txr);
986
987         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
988             adapter->link_active == 0)
989                 return (ENETDOWN);
990
991         /* Process the queue */
992         buf = drbr_dequeue(ifp, txr->br);
993         while (buf != NULL) {
994                 if ((err = igb_xmit(txr, &buf)) != 0) {
995                         if (buf != NULL)
996                                 err = drbr_enqueue(ifp, txr->br, buf);
997                         break;
998                 }
999                 enq++;
1000                 ifp->if_obytes += buf->m_pkthdr.len;
1001                 if (buf->m_flags & M_MCAST)
1002                         ifp->if_omcasts++;
1003                 ETHER_BPF_MTAP(ifp, buf);
1004                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1005                         break;
1006                 buf = drbr_dequeue(ifp, txr->br);
1007         }
1008         if (enq > 0) {
1009                 /* Set the watchdog */
1010                 txr->queue_status |= IGB_QUEUE_WORKING;
1011                 txr->watchdog_time = ticks;
1012         }
1013         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1014                 igb_txeof(txr);
1015         if (txr->tx_avail <= IGB_MAX_SCATTER)
1016                 txr->queue_status |= IGB_QUEUE_DEPLETED;
1017         return (err);
1018 }
1019
1020 /*
1021  * Called from a taskqueue to drain queued transmit packets.
1022  */
1023 static void
1024 igb_deferred_mq_start(void *arg, int pending)
1025 {
1026         struct tx_ring *txr = arg;
1027         struct adapter *adapter = txr->adapter;
1028         struct ifnet *ifp = adapter->ifp;
1029
1030         IGB_TX_LOCK(txr);
1031         if (!drbr_empty(ifp, txr->br))
1032                 igb_mq_start_locked(ifp, txr);
1033         IGB_TX_UNLOCK(txr);
1034 }
1035
1036 /*
1037 ** Flush all ring buffers
1038 */
1039 static void
1040 igb_qflush(struct ifnet *ifp)
1041 {
1042         struct adapter  *adapter = ifp->if_softc;
1043         struct tx_ring  *txr = adapter->tx_rings;
1044         struct mbuf     *m;
1045
1046         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1047                 IGB_TX_LOCK(txr);
1048                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1049                         m_freem(m);
1050                 IGB_TX_UNLOCK(txr);
1051         }
1052         if_qflush(ifp);
1053 }
1054 #endif /* __FreeBSD_version >= 800000 */
1055
1056 /*********************************************************************
1057  *  Ioctl entry point
1058  *
1059  *  igb_ioctl is called when the user wants to configure the
1060  *  interface.
1061  *
1062  *  return 0 on success, positive on failure
1063  **********************************************************************/
1064
1065 static int
1066 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1067 {
1068         struct adapter  *adapter = ifp->if_softc;
1069         struct ifreq    *ifr = (struct ifreq *)data;
1070 #if defined(INET) || defined(INET6)
1071         struct ifaddr   *ifa = (struct ifaddr *)data;
1072 #endif
1073         bool            avoid_reset = FALSE;
1074         int             error = 0;
1075
1076         if (adapter->in_detach)
1077                 return (error);
1078
1079         switch (command) {
1080         case SIOCSIFADDR:
1081 #ifdef INET
1082                 if (ifa->ifa_addr->sa_family == AF_INET)
1083                         avoid_reset = TRUE;
1084 #endif
1085 #ifdef INET6
1086                 if (ifa->ifa_addr->sa_family == AF_INET6)
1087                         avoid_reset = TRUE;
1088 #endif
1089                 /*
1090                 ** Calling init results in link renegotiation,
1091                 ** so we avoid doing it when possible.
1092                 */
1093                 if (avoid_reset) {
1094                         ifp->if_flags |= IFF_UP;
1095                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1096                                 igb_init(adapter);
1097 #ifdef INET
1098                         if (!(ifp->if_flags & IFF_NOARP))
1099                                 arp_ifinit(ifp, ifa);
1100 #endif
1101                 } else
1102                         error = ether_ioctl(ifp, command, data);
1103                 break;
1104         case SIOCSIFMTU:
1105             {
1106                 int max_frame_size;
1107
1108                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1109
1110                 IGB_CORE_LOCK(adapter);
1111                 max_frame_size = 9234;
1112                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1113                     ETHER_CRC_LEN) {
1114                         IGB_CORE_UNLOCK(adapter);
1115                         error = EINVAL;
1116                         break;
1117                 }
1118
1119                 ifp->if_mtu = ifr->ifr_mtu;
1120                 adapter->max_frame_size =
1121                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1122                 igb_init_locked(adapter);
1123                 IGB_CORE_UNLOCK(adapter);
1124                 break;
1125             }
1126         case SIOCSIFFLAGS:
1127                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1128                     SIOCSIFFLAGS (Set Interface Flags)");
1129                 IGB_CORE_LOCK(adapter);
1130                 if (ifp->if_flags & IFF_UP) {
1131                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1132                                 if ((ifp->if_flags ^ adapter->if_flags) &
1133                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1134                                         igb_disable_promisc(adapter);
1135                                         igb_set_promisc(adapter);
1136                                 }
1137                         } else
1138                                 igb_init_locked(adapter);
1139                 } else
1140                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1141                                 igb_stop(adapter);
1142                 adapter->if_flags = ifp->if_flags;
1143                 IGB_CORE_UNLOCK(adapter);
1144                 break;
1145         case SIOCADDMULTI:
1146         case SIOCDELMULTI:
1147                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1148                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1149                         IGB_CORE_LOCK(adapter);
1150                         igb_disable_intr(adapter);
1151                         igb_set_multi(adapter);
1152 #ifdef DEVICE_POLLING
1153                         if (!(ifp->if_capenable & IFCAP_POLLING))
1154 #endif
1155                                 igb_enable_intr(adapter);
1156                         IGB_CORE_UNLOCK(adapter);
1157                 }
1158                 break;
1159         case SIOCSIFMEDIA:
1160                 /* Check SOL/IDER usage */
1161                 IGB_CORE_LOCK(adapter);
1162                 if (e1000_check_reset_block(&adapter->hw)) {
1163                         IGB_CORE_UNLOCK(adapter);
1164                         device_printf(adapter->dev, "Media change is"
1165                             " blocked due to SOL/IDER session.\n");
1166                         break;
1167                 }
1168                 IGB_CORE_UNLOCK(adapter);
1169         case SIOCGIFMEDIA:
1170                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1171                     SIOCxIFMEDIA (Get/Set Interface Media)");
1172                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1173                 break;
1174         case SIOCSIFCAP:
1175             {
1176                 int mask, reinit;
1177
1178                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1179                 reinit = 0;
1180                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1181 #ifdef DEVICE_POLLING
1182                 if (mask & IFCAP_POLLING) {
1183                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1184                                 error = ether_poll_register(igb_poll, ifp);
1185                                 if (error)
1186                                         return (error);
1187                                 IGB_CORE_LOCK(adapter);
1188                                 igb_disable_intr(adapter);
1189                                 ifp->if_capenable |= IFCAP_POLLING;
1190                                 IGB_CORE_UNLOCK(adapter);
1191                         } else {
1192                                 error = ether_poll_deregister(ifp);
1193                                 /* Enable interrupt even in error case */
1194                                 IGB_CORE_LOCK(adapter);
1195                                 igb_enable_intr(adapter);
1196                                 ifp->if_capenable &= ~IFCAP_POLLING;
1197                                 IGB_CORE_UNLOCK(adapter);
1198                         }
1199                 }
1200 #endif
1201                 if (mask & IFCAP_HWCSUM) {
1202                         ifp->if_capenable ^= IFCAP_HWCSUM;
1203                         reinit = 1;
1204                 }
1205                 if (mask & IFCAP_TSO4) {
1206                         ifp->if_capenable ^= IFCAP_TSO4;
1207                         reinit = 1;
1208                 }
1209                 if (mask & IFCAP_VLAN_HWTAGGING) {
1210                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1211                         reinit = 1;
1212                 }
1213                 if (mask & IFCAP_VLAN_HWFILTER) {
1214                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1215                         reinit = 1;
1216                 }
1217                 if (mask & IFCAP_VLAN_HWTSO) {
1218                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1219                         reinit = 1;
1220                 }
1221                 if (mask & IFCAP_LRO) {
1222                         ifp->if_capenable ^= IFCAP_LRO;
1223                         reinit = 1;
1224                 }
1225                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1226                         igb_init(adapter);
1227                 VLAN_CAPABILITIES(ifp);
1228                 break;
1229             }
1230
1231         default:
1232                 error = ether_ioctl(ifp, command, data);
1233                 break;
1234         }
1235
1236         return (error);
1237 }
1238
1239
1240 /*********************************************************************
1241  *  Init entry point
1242  *
1243  *  This routine is used in two ways. It is used by the stack as
1244  *  init entry point in network interface structure. It is also used
1245  *  by the driver as a hw/sw initialization routine to get to a
1246  *  consistent state.
1247  *
1248  *  return 0 on success, positive on failure
1249  **********************************************************************/
1250
1251 static void
1252 igb_init_locked(struct adapter *adapter)
1253 {
1254         struct ifnet    *ifp = adapter->ifp;
1255         device_t        dev = adapter->dev;
1256
1257         INIT_DEBUGOUT("igb_init: begin");
1258
1259         IGB_CORE_LOCK_ASSERT(adapter);
1260
1261         igb_disable_intr(adapter);
1262         callout_stop(&adapter->timer);
1263
1264         /* Get the latest mac address, User can use a LAA */
1265         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1266               ETHER_ADDR_LEN);
1267
1268         /* Put the address into the Receive Address Array */
1269         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1270
1271         igb_reset(adapter);
1272         igb_update_link_status(adapter);
1273
1274         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1275
1276         /* Set hardware offload abilities */
1277         ifp->if_hwassist = 0;
1278         if (ifp->if_capenable & IFCAP_TXCSUM) {
1279                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1280 #if __FreeBSD_version >= 800000
1281                 if (adapter->hw.mac.type == e1000_82576)
1282                         ifp->if_hwassist |= CSUM_SCTP;
1283 #endif
1284         }
1285
1286         if (ifp->if_capenable & IFCAP_TSO4)
1287                 ifp->if_hwassist |= CSUM_TSO;
1288
1289         /* Configure for OS presence */
1290         igb_init_manageability(adapter);
1291
1292         /* Prepare transmit descriptors and buffers */
1293         igb_setup_transmit_structures(adapter);
1294         igb_initialize_transmit_units(adapter);
1295
1296         /* Setup Multicast table */
1297         igb_set_multi(adapter);
1298
1299         /*
1300         ** Figure out the desired mbuf pool
1301         ** for doing jumbo/packetsplit
1302         */
1303         if (adapter->max_frame_size <= 2048)
1304                 adapter->rx_mbuf_sz = MCLBYTES;
1305         else if (adapter->max_frame_size <= 4096)
1306                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1307         else
1308                 adapter->rx_mbuf_sz = MJUM9BYTES;
1309
1310         /* Prepare receive descriptors and buffers */
1311         if (igb_setup_receive_structures(adapter)) {
1312                 device_printf(dev, "Could not setup receive structures\n");
1313                 return;
1314         }
1315         igb_initialize_receive_units(adapter);
1316
1317         /* Enable VLAN support */
1318         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1319                 igb_setup_vlan_hw_support(adapter);
1320                                 
1321         /* Don't lose promiscuous settings */
1322         igb_set_promisc(adapter);
1323
1324         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1325         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1326
1327         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1328         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1329
1330         if (adapter->msix > 1) /* Set up queue routing */
1331                 igb_configure_queues(adapter);
1332
1333         /* this clears any pending interrupts */
1334         E1000_READ_REG(&adapter->hw, E1000_ICR);
1335 #ifdef DEVICE_POLLING
1336         /*
1337          * Only enable interrupts if we are not polling, make sure
1338          * they are off otherwise.
1339          */
1340         if (ifp->if_capenable & IFCAP_POLLING)
1341                 igb_disable_intr(adapter);
1342         else
1343 #endif /* DEVICE_POLLING */
1344         {
1345                 igb_enable_intr(adapter);
1346                 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1347         }
1348
1349         /* Set Energy Efficient Ethernet */
1350         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1351                 e1000_set_eee_i350(&adapter->hw);
1352 }
1353
1354 static void
1355 igb_init(void *arg)
1356 {
1357         struct adapter *adapter = arg;
1358
1359         IGB_CORE_LOCK(adapter);
1360         igb_init_locked(adapter);
1361         IGB_CORE_UNLOCK(adapter);
1362 }
1363
1364
1365 static void
1366 igb_handle_que(void *context, int pending)
1367 {
1368         struct igb_queue *que = context;
1369         struct adapter *adapter = que->adapter;
1370         struct tx_ring *txr = que->txr;
1371         struct ifnet    *ifp = adapter->ifp;
1372
1373         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1374                 bool    more;
1375
1376                 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1377
1378                 IGB_TX_LOCK(txr);
1379                 igb_txeof(txr);
1380 #if __FreeBSD_version >= 800000
1381                 /* Process the stack queue only if not depleted */
1382                 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1383                     !drbr_empty(ifp, txr->br))
1384                         igb_mq_start_locked(ifp, txr);
1385 #else
1386                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1387                         igb_start_locked(txr, ifp);
1388 #endif
1389                 IGB_TX_UNLOCK(txr);
1390                 /* Do we need another? */
1391                 if (more) {
1392                         taskqueue_enqueue(que->tq, &que->que_task);
1393                         return;
1394                 }
1395         }
1396
1397 #ifdef DEVICE_POLLING
1398         if (ifp->if_capenable & IFCAP_POLLING)
1399                 return;
1400 #endif
1401         /* Reenable this interrupt */
1402         if (que->eims)
1403                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1404         else
1405                 igb_enable_intr(adapter);
1406 }
1407
1408 /* Deal with link in a sleepable context */
1409 static void
1410 igb_handle_link(void *context, int pending)
1411 {
1412         struct adapter *adapter = context;
1413
1414         IGB_CORE_LOCK(adapter);
1415         igb_handle_link_locked(adapter);
1416         IGB_CORE_UNLOCK(adapter);
1417 }
1418
1419 static void
1420 igb_handle_link_locked(struct adapter *adapter)
1421 {
1422         struct tx_ring  *txr = adapter->tx_rings;
1423         struct ifnet *ifp = adapter->ifp;
1424
1425         IGB_CORE_LOCK_ASSERT(adapter);
1426         adapter->hw.mac.get_link_status = 1;
1427         igb_update_link_status(adapter);
1428         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1429                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1430                         IGB_TX_LOCK(txr);
1431 #if __FreeBSD_version >= 800000
1432                         /* Process the stack queue only if not depleted */
1433                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1434                             !drbr_empty(ifp, txr->br))
1435                                 igb_mq_start_locked(ifp, txr);
1436 #else
1437                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1438                                 igb_start_locked(txr, ifp);
1439 #endif
1440                         IGB_TX_UNLOCK(txr);
1441                 }
1442         }
1443 }
1444
1445 /*********************************************************************
1446  *
1447  *  MSI/Legacy Deferred
1448  *  Interrupt Service routine  
1449  *
1450  *********************************************************************/
1451 static int
1452 igb_irq_fast(void *arg)
1453 {
1454         struct adapter          *adapter = arg;
1455         struct igb_queue        *que = adapter->queues;
1456         u32                     reg_icr;
1457
1458
1459         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1460
1461         /* Hot eject?  */
1462         if (reg_icr == 0xffffffff)
1463                 return FILTER_STRAY;
1464
1465         /* Definitely not our interrupt.  */
1466         if (reg_icr == 0x0)
1467                 return FILTER_STRAY;
1468
1469         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1470                 return FILTER_STRAY;
1471
1472         /*
1473          * Mask interrupts until the taskqueue is finished running.  This is
1474          * cheap, just assume that it is needed.  This also works around the
1475          * MSI message reordering errata on certain systems.
1476          */
1477         igb_disable_intr(adapter);
1478         taskqueue_enqueue(que->tq, &que->que_task);
1479
1480         /* Link status change */
1481         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1482                 taskqueue_enqueue(que->tq, &adapter->link_task);
1483
1484         if (reg_icr & E1000_ICR_RXO)
1485                 adapter->rx_overruns++;
1486         return FILTER_HANDLED;
1487 }
1488
1489 #ifdef DEVICE_POLLING
1490 #if __FreeBSD_version >= 800000
1491 #define POLL_RETURN_COUNT(a) (a)
1492 static int
1493 #else
1494 #define POLL_RETURN_COUNT(a)
1495 static void
1496 #endif
1497 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1498 {
1499         struct adapter          *adapter = ifp->if_softc;
1500         struct igb_queue        *que;
1501         struct tx_ring          *txr;
1502         u32                     reg_icr, rx_done = 0;
1503         u32                     loop = IGB_MAX_LOOP;
1504         bool                    more;
1505
1506         IGB_CORE_LOCK(adapter);
1507         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1508                 IGB_CORE_UNLOCK(adapter);
1509                 return POLL_RETURN_COUNT(rx_done);
1510         }
1511
1512         if (cmd == POLL_AND_CHECK_STATUS) {
1513                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1514                 /* Link status change */
1515                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1516                         igb_handle_link_locked(adapter);
1517
1518                 if (reg_icr & E1000_ICR_RXO)
1519                         adapter->rx_overruns++;
1520         }
1521         IGB_CORE_UNLOCK(adapter);
1522
1523         for (int i = 0; i < adapter->num_queues; i++) {
1524                 que = &adapter->queues[i];
1525                 txr = que->txr;
1526
1527                 igb_rxeof(que, count, &rx_done);
1528
1529                 IGB_TX_LOCK(txr);
1530                 do {
1531                         more = igb_txeof(txr);
1532                 } while (loop-- && more);
1533 #if __FreeBSD_version >= 800000
1534                 if (!drbr_empty(ifp, txr->br))
1535                         igb_mq_start_locked(ifp, txr);
1536 #else
1537                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1538                         igb_start_locked(txr, ifp);
1539 #endif
1540                 IGB_TX_UNLOCK(txr);
1541         }
1542
1543         return POLL_RETURN_COUNT(rx_done);
1544 }
1545 #endif /* DEVICE_POLLING */
1546
1547 /*********************************************************************
1548  *
1549  *  MSIX Que Interrupt Service routine
1550  *
1551  **********************************************************************/
1552 static void
1553 igb_msix_que(void *arg)
1554 {
1555         struct igb_queue *que = arg;
1556         struct adapter *adapter = que->adapter;
1557         struct ifnet   *ifp = adapter->ifp;
1558         struct tx_ring *txr = que->txr;
1559         struct rx_ring *rxr = que->rxr;
1560         u32             newitr = 0;
1561         bool            more_rx;
1562
1563         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1564         ++que->irqs;
1565
1566         IGB_TX_LOCK(txr);
1567         igb_txeof(txr);
1568 #if __FreeBSD_version >= 800000
1569         /* Process the stack queue only if not depleted */
1570         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1571             !drbr_empty(ifp, txr->br))
1572                 igb_mq_start_locked(ifp, txr);
1573 #else
1574         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1575                 igb_start_locked(txr, ifp);
1576 #endif
1577         IGB_TX_UNLOCK(txr);
1578
1579         more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1580
1581         if (adapter->enable_aim == FALSE)
1582                 goto no_calc;
1583         /*
1584         ** Do Adaptive Interrupt Moderation:
1585         **  - Write out last calculated setting
1586         **  - Calculate based on average size over
1587         **    the last interval.
1588         */
1589         if (que->eitr_setting)
1590                 E1000_WRITE_REG(&adapter->hw,
1591                     E1000_EITR(que->msix), que->eitr_setting);
1592  
1593         que->eitr_setting = 0;
1594
1595         /* Idle, do nothing */
1596         if ((txr->bytes == 0) && (rxr->bytes == 0))
1597                 goto no_calc;
1598                                 
1599         /* Used half Default if sub-gig */
1600         if (adapter->link_speed != 1000)
1601                 newitr = IGB_DEFAULT_ITR / 2;
1602         else {
1603                 if ((txr->bytes) && (txr->packets))
1604                         newitr = txr->bytes/txr->packets;
1605                 if ((rxr->bytes) && (rxr->packets))
1606                         newitr = max(newitr,
1607                             (rxr->bytes / rxr->packets));
1608                 newitr += 24; /* account for hardware frame, crc */
1609                 /* set an upper boundary */
1610                 newitr = min(newitr, 3000);
1611                 /* Be nice to the mid range */
1612                 if ((newitr > 300) && (newitr < 1200))
1613                         newitr = (newitr / 3);
1614                 else
1615                         newitr = (newitr / 2);
1616         }
1617         newitr &= 0x7FFC;  /* Mask invalid bits */
1618         if (adapter->hw.mac.type == e1000_82575)
1619                 newitr |= newitr << 16;
1620         else
1621                 newitr |= E1000_EITR_CNT_IGNR;
1622                  
1623         /* save for next interrupt */
1624         que->eitr_setting = newitr;
1625
1626         /* Reset state */
1627         txr->bytes = 0;
1628         txr->packets = 0;
1629         rxr->bytes = 0;
1630         rxr->packets = 0;
1631
1632 no_calc:
1633         /* Schedule a clean task if needed*/
1634         if (more_rx)
1635                 taskqueue_enqueue(que->tq, &que->que_task);
1636         else
1637                 /* Reenable this interrupt */
1638                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1639         return;
1640 }
1641
1642
1643 /*********************************************************************
1644  *
1645  *  MSIX Link Interrupt Service routine
1646  *
1647  **********************************************************************/
1648
1649 static void
1650 igb_msix_link(void *arg)
1651 {
1652         struct adapter  *adapter = arg;
1653         u32             icr;
1654
1655         ++adapter->link_irq;
1656         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1657         if (!(icr & E1000_ICR_LSC))
1658                 goto spurious;
1659         igb_handle_link(adapter, 0);
1660
1661 spurious:
1662         /* Rearm */
1663         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1664         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1665         return;
1666 }
1667
1668
1669 /*********************************************************************
1670  *
1671  *  Media Ioctl callback
1672  *
1673  *  This routine is called whenever the user queries the status of
1674  *  the interface using ifconfig.
1675  *
1676  **********************************************************************/
1677 static void
1678 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1679 {
1680         struct adapter *adapter = ifp->if_softc;
1681
1682         INIT_DEBUGOUT("igb_media_status: begin");
1683
1684         IGB_CORE_LOCK(adapter);
1685         igb_update_link_status(adapter);
1686
1687         ifmr->ifm_status = IFM_AVALID;
1688         ifmr->ifm_active = IFM_ETHER;
1689
1690         if (!adapter->link_active) {
1691                 IGB_CORE_UNLOCK(adapter);
1692                 return;
1693         }
1694
1695         ifmr->ifm_status |= IFM_ACTIVE;
1696
1697         switch (adapter->link_speed) {
1698         case 10:
1699                 ifmr->ifm_active |= IFM_10_T;
1700                 break;
1701         case 100:
1702                 /*
1703                 ** Support for 100Mb SFP - these are Fiber 
1704                 ** but the media type appears as serdes
1705                 */
1706                 if (adapter->hw.phy.media_type ==
1707                     e1000_media_type_internal_serdes)
1708                         ifmr->ifm_active |= IFM_100_FX;
1709                 else
1710                         ifmr->ifm_active |= IFM_100_TX;
1711                 break;
1712         case 1000:
1713                 ifmr->ifm_active |= IFM_1000_T;
1714                 break;
1715         }
1716
1717         if (adapter->link_duplex == FULL_DUPLEX)
1718                 ifmr->ifm_active |= IFM_FDX;
1719         else
1720                 ifmr->ifm_active |= IFM_HDX;
1721
1722         IGB_CORE_UNLOCK(adapter);
1723 }
1724
1725 /*********************************************************************
1726  *
1727  *  Media Ioctl callback
1728  *
1729  *  This routine is called when the user changes speed/duplex using
1730  *  media/mediopt option with ifconfig.
1731  *
1732  **********************************************************************/
1733 static int
1734 igb_media_change(struct ifnet *ifp)
1735 {
1736         struct adapter *adapter = ifp->if_softc;
1737         struct ifmedia  *ifm = &adapter->media;
1738
1739         INIT_DEBUGOUT("igb_media_change: begin");
1740
1741         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1742                 return (EINVAL);
1743
1744         IGB_CORE_LOCK(adapter);
1745         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1746         case IFM_AUTO:
1747                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1748                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1749                 break;
1750         case IFM_1000_LX:
1751         case IFM_1000_SX:
1752         case IFM_1000_T:
1753                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1754                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1755                 break;
1756         case IFM_100_TX:
1757                 adapter->hw.mac.autoneg = FALSE;
1758                 adapter->hw.phy.autoneg_advertised = 0;
1759                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1760                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1761                 else
1762                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1763                 break;
1764         case IFM_10_T:
1765                 adapter->hw.mac.autoneg = FALSE;
1766                 adapter->hw.phy.autoneg_advertised = 0;
1767                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1768                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1769                 else
1770                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1771                 break;
1772         default:
1773                 device_printf(adapter->dev, "Unsupported media type\n");
1774         }
1775
1776         igb_init_locked(adapter);
1777         IGB_CORE_UNLOCK(adapter);
1778
1779         return (0);
1780 }
1781
1782
1783 /*********************************************************************
1784  *
1785  *  This routine maps the mbufs to Advanced TX descriptors.
1786  *  
1787  **********************************************************************/
1788 static int
1789 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1790 {
1791         struct adapter          *adapter = txr->adapter;
1792         bus_dma_segment_t       segs[IGB_MAX_SCATTER];
1793         bus_dmamap_t            map;
1794         struct igb_tx_buffer    *tx_buffer, *tx_buffer_mapped;
1795         union e1000_adv_tx_desc *txd = NULL;
1796         struct mbuf             *m_head = *m_headp;
1797         struct ether_vlan_header *eh = NULL;
1798         struct ip               *ip = NULL;
1799         struct tcphdr           *th = NULL;
1800         u32                     hdrlen, cmd_type_len, olinfo_status = 0;
1801         int                     ehdrlen, poff;
1802         int                     nsegs, i, first, last = 0;
1803         int                     error, do_tso, remap = 1;
1804
1805         /* Set basic descriptor constants */
1806         cmd_type_len = E1000_ADVTXD_DTYP_DATA;
1807         cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1808         if (m_head->m_flags & M_VLANTAG)
1809                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1810
1811 retry:
1812         m_head = *m_headp;
1813         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1814         hdrlen = ehdrlen = poff = 0;
1815
1816         /*
1817          * Intel recommends entire IP/TCP header length reside in a single
1818          * buffer. If multiple descriptors are used to describe the IP and
1819          * TCP header, each descriptor should describe one or more
1820          * complete headers; descriptors referencing only parts of headers
1821          * are not supported. If all layer headers are not coalesced into
1822          * a single buffer, each buffer should not cross a 4KB boundary,
1823          * or be larger than the maximum read request size.
1824          * Controller also requires modifing IP/TCP header to make TSO work
1825          * so we firstly get a writable mbuf chain then coalesce ethernet/
1826          * IP/TCP header into a single buffer to meet the requirement of
1827          * controller. This also simplifies IP/TCP/UDP checksum offloading
1828          * which also has similiar restrictions.
1829          */
1830         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1831                 if (do_tso || (m_head->m_next != NULL && 
1832                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1833                         if (M_WRITABLE(*m_headp) == 0) {
1834                                 m_head = m_dup(*m_headp, M_NOWAIT);
1835                                 m_freem(*m_headp);
1836                                 if (m_head == NULL) {
1837                                         *m_headp = NULL;
1838                                         return (ENOBUFS);
1839                                 }
1840                                 *m_headp = m_head;
1841                         }
1842                 }
1843                 /*
1844                  * Assume IPv4, we don't have TSO/checksum offload support
1845                  * for IPv6 yet.
1846                  */
1847                 ehdrlen = sizeof(struct ether_header);
1848                 m_head = m_pullup(m_head, ehdrlen);
1849                 if (m_head == NULL) {
1850                         *m_headp = NULL;
1851                         return (ENOBUFS);
1852                 }
1853                 eh = mtod(m_head, struct ether_vlan_header *);
1854                 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1855                         ehdrlen = sizeof(struct ether_vlan_header);
1856                         m_head = m_pullup(m_head, ehdrlen);
1857                         if (m_head == NULL) {
1858                                 *m_headp = NULL;
1859                                 return (ENOBUFS);
1860                         }
1861                 }
1862                 m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip));
1863                 if (m_head == NULL) {
1864                         *m_headp = NULL;
1865                         return (ENOBUFS);
1866                 }
1867                 ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1868                 poff = ehdrlen + (ip->ip_hl << 2);
1869                 if (do_tso) {
1870                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1871                         if (m_head == NULL) {
1872                                 *m_headp = NULL;
1873                                 return (ENOBUFS);
1874                         }
1875                         /*
1876                          * The pseudo TCP checksum does not include TCP payload
1877                          * length so driver should recompute the checksum here
1878                          * what hardware expect to see. This is adherence of
1879                          * Microsoft's Large Send specification.
1880                          */
1881                         th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1882                         th->th_sum = in_pseudo(ip->ip_src.s_addr,
1883                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1884                         /* Keep track of the full header length */
1885                         hdrlen = poff + (th->th_off << 2);
1886                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1887                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1888                         if (m_head == NULL) {
1889                                 *m_headp = NULL;
1890                                 return (ENOBUFS);
1891                         }
1892                         th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1893                         m_head = m_pullup(m_head, poff + (th->th_off << 2));
1894                         if (m_head == NULL) {
1895                                 *m_headp = NULL;
1896                                 return (ENOBUFS);
1897                         }
1898                         ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1899                         th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1900                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1901                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1902                         if (m_head == NULL) {
1903                                 *m_headp = NULL;
1904                                 return (ENOBUFS);
1905                         }
1906                         ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1907                 }
1908                 *m_headp = m_head;
1909         }
1910
1911         /*
1912          * Map the packet for DMA
1913          *
1914          * Capture the first descriptor index,
1915          * this descriptor will have the index
1916          * of the EOP which is the only one that
1917          * now gets a DONE bit writeback.
1918          */
1919         first = txr->next_avail_desc;
1920         tx_buffer = &txr->tx_buffers[first];
1921         tx_buffer_mapped = tx_buffer;
1922         map = tx_buffer->map;
1923
1924         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1925             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1926
1927         /*
1928          * There are two types of errors we can (try) to handle:
1929          * - EFBIG means the mbuf chain was too long and bus_dma ran
1930          *   out of segments.  Defragment the mbuf chain and try again.
1931          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1932          *   at this point in time.  Defer sending and try again later.
1933          * All other errors, in particular EINVAL, are fatal and prevent the
1934          * mbuf chain from ever going through.  Drop it and report error.
1935          */
1936         if (error == EFBIG && remap) {
1937                 struct mbuf *m;
1938
1939                 m = m_defrag(*m_headp, M_NOWAIT);
1940                 if (m == NULL) {
1941                         adapter->mbuf_defrag_failed++;
1942                         m_freem(*m_headp);
1943                         *m_headp = NULL;
1944                         return (ENOBUFS);
1945                 }
1946                 *m_headp = m;
1947
1948                 /* Try it again, but only once */
1949                 remap = 0;
1950                 goto retry;
1951         } else if (error == ENOMEM) {
1952                 adapter->no_tx_dma_setup++;
1953                 return (error);
1954         } else if (error != 0) {
1955                 adapter->no_tx_dma_setup++;
1956                 m_freem(*m_headp);
1957                 *m_headp = NULL;
1958                 return (error);
1959         }
1960
1961         /*
1962         ** Make sure we don't overrun the ring,
1963         ** we need nsegs descriptors and one for
1964         ** the context descriptor used for the
1965         ** offloads.
1966         */
1967         if ((nsegs + 1) > (txr->tx_avail - 2)) {
1968                 txr->no_desc_avail++;
1969                 bus_dmamap_unload(txr->txtag, map);
1970                 return (ENOBUFS);
1971         }
1972         m_head = *m_headp;
1973
1974         /* Do hardware assists:
1975          * Set up the context descriptor, used
1976          * when any hardware offload is done.
1977          * This includes CSUM, VLAN, and TSO.
1978          * It will use the first descriptor.
1979          */
1980
1981         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1982                 if (igb_tso_setup(txr, m_head, ehdrlen, ip, th)) {
1983                         cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1984                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1985                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1986                 } else
1987                         return (ENXIO);
1988         } else if (igb_tx_ctx_setup(txr, m_head))
1989                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1990
1991         /* Calculate payload length */
1992         olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1993             << E1000_ADVTXD_PAYLEN_SHIFT);
1994
1995         /* 82575 needs the queue index added */
1996         if (adapter->hw.mac.type == e1000_82575)
1997                 olinfo_status |= txr->me << 4;
1998
1999         /* Set up our transmit descriptors */
2000         i = txr->next_avail_desc;
2001         for (int j = 0; j < nsegs; j++) {
2002                 bus_size_t seg_len;
2003                 bus_addr_t seg_addr;
2004
2005                 tx_buffer = &txr->tx_buffers[i];
2006                 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
2007                 seg_addr = segs[j].ds_addr;
2008                 seg_len  = segs[j].ds_len;
2009
2010                 txd->read.buffer_addr = htole64(seg_addr);
2011                 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
2012                 txd->read.olinfo_status = htole32(olinfo_status);
2013                 last = i;
2014                 if (++i == adapter->num_tx_desc)
2015                         i = 0;
2016                 tx_buffer->m_head = NULL;
2017                 tx_buffer->next_eop = -1;
2018         }
2019
2020         txr->next_avail_desc = i;
2021         txr->tx_avail -= nsegs;
2022         tx_buffer->m_head = m_head;
2023
2024         /*
2025         ** Here we swap the map so the last descriptor,
2026         ** which gets the completion interrupt has the
2027         ** real map, and the first descriptor gets the
2028         ** unused map from this descriptor.
2029         */
2030         tx_buffer_mapped->map = tx_buffer->map;
2031         tx_buffer->map = map;
2032         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2033
2034         /*
2035          * Last Descriptor of Packet
2036          * needs End Of Packet (EOP)
2037          * and Report Status (RS)
2038          */
2039         txd->read.cmd_type_len |=
2040             htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
2041         /*
2042          * Keep track in the first buffer which
2043          * descriptor will be written back
2044          */
2045         tx_buffer = &txr->tx_buffers[first];
2046         tx_buffer->next_eop = last;
2047         /* Update the watchdog time early and often */
2048         txr->watchdog_time = ticks;
2049
2050         /*
2051          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2052          * that this frame is available to transmit.
2053          */
2054         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2055             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2056         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2057         ++txr->tx_packets;
2058
2059         return (0);
2060 }
2061 static void
2062 igb_set_promisc(struct adapter *adapter)
2063 {
2064         struct ifnet    *ifp = adapter->ifp;
2065         struct e1000_hw *hw = &adapter->hw;
2066         u32             reg;
2067
2068         if (adapter->vf_ifp) {
2069                 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
2070                 return;
2071         }
2072
2073         reg = E1000_READ_REG(hw, E1000_RCTL);
2074         if (ifp->if_flags & IFF_PROMISC) {
2075                 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2076                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
2077         } else if (ifp->if_flags & IFF_ALLMULTI) {
2078                 reg |= E1000_RCTL_MPE;
2079                 reg &= ~E1000_RCTL_UPE;
2080                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
2081         }
2082 }
2083
2084 static void
2085 igb_disable_promisc(struct adapter *adapter)
2086 {
2087         struct e1000_hw *hw = &adapter->hw;
2088         u32             reg;
2089
2090         if (adapter->vf_ifp) {
2091                 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2092                 return;
2093         }
2094         reg = E1000_READ_REG(hw, E1000_RCTL);
2095         reg &=  (~E1000_RCTL_UPE);
2096         reg &=  (~E1000_RCTL_MPE);
2097         E1000_WRITE_REG(hw, E1000_RCTL, reg);
2098 }
2099
2100
2101 /*********************************************************************
2102  *  Multicast Update
2103  *
2104  *  This routine is called whenever multicast address list is updated.
2105  *
2106  **********************************************************************/
2107
2108 static void
2109 igb_set_multi(struct adapter *adapter)
2110 {
2111         struct ifnet    *ifp = adapter->ifp;
2112         struct ifmultiaddr *ifma;
2113         u32 reg_rctl = 0;
2114         u8  *mta;
2115
2116         int mcnt = 0;
2117
2118         IOCTL_DEBUGOUT("igb_set_multi: begin");
2119
2120         mta = adapter->mta;
2121         bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2122             MAX_NUM_MULTICAST_ADDRESSES);
2123
2124 #if __FreeBSD_version < 800000
2125         IF_ADDR_LOCK(ifp);
2126 #else
2127         if_maddr_rlock(ifp);
2128 #endif
2129         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2130                 if (ifma->ifma_addr->sa_family != AF_LINK)
2131                         continue;
2132
2133                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2134                         break;
2135
2136                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2137                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2138                 mcnt++;
2139         }
2140 #if __FreeBSD_version < 800000
2141         IF_ADDR_UNLOCK(ifp);
2142 #else
2143         if_maddr_runlock(ifp);
2144 #endif
2145
2146         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2147                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2148                 reg_rctl |= E1000_RCTL_MPE;
2149                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2150         } else
2151                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2152 }
2153
2154
2155 /*********************************************************************
2156  *  Timer routine:
2157  *      This routine checks for link status,
2158  *      updates statistics, and does the watchdog.
2159  *
2160  **********************************************************************/
2161
2162 static void
2163 igb_local_timer(void *arg)
2164 {
2165         struct adapter          *adapter = arg;
2166         device_t                dev = adapter->dev;
2167         struct ifnet            *ifp = adapter->ifp;
2168         struct tx_ring          *txr = adapter->tx_rings;
2169         struct igb_queue        *que = adapter->queues;
2170         int                     hung = 0, busy = 0;
2171
2172
2173         IGB_CORE_LOCK_ASSERT(adapter);
2174
2175         igb_update_link_status(adapter);
2176         igb_update_stats_counters(adapter);
2177
2178         /*
2179         ** Check the TX queues status
2180         **      - central locked handling of OACTIVE
2181         **      - watchdog only if all queues show hung
2182         */
2183         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2184                 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2185                     (adapter->pause_frames == 0))
2186                         ++hung;
2187                 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2188                         ++busy;
2189                 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2190                         taskqueue_enqueue(que->tq, &que->que_task);
2191         }
2192         if (hung == adapter->num_queues)
2193                 goto timeout;
2194         if (busy == adapter->num_queues)
2195                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2196         else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2197             (busy < adapter->num_queues))
2198                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2199
2200         adapter->pause_frames = 0;
2201         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2202 #ifndef DEVICE_POLLING
2203         /* Schedule all queue interrupts - deadlock protection */
2204         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2205 #endif
2206         return;
2207
2208 timeout:
2209         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2210         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2211             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2212             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2213         device_printf(dev,"TX(%d) desc avail = %d,"
2214             "Next TX to Clean = %d\n",
2215             txr->me, txr->tx_avail, txr->next_to_clean);
2216         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2217         adapter->watchdog_events++;
2218         igb_init_locked(adapter);
2219 }
2220
2221 static void
2222 igb_update_link_status(struct adapter *adapter)
2223 {
2224         struct e1000_hw         *hw = &adapter->hw;
2225         struct e1000_fc_info    *fc = &hw->fc;
2226         struct ifnet            *ifp = adapter->ifp;
2227         device_t                dev = adapter->dev;
2228         struct tx_ring          *txr = adapter->tx_rings;
2229         u32                     link_check, thstat, ctrl;
2230         char                    *flowctl = NULL;
2231
2232         link_check = thstat = ctrl = 0;
2233
2234         /* Get the cached link value or read for real */
2235         switch (hw->phy.media_type) {
2236         case e1000_media_type_copper:
2237                 if (hw->mac.get_link_status) {
2238                         /* Do the work to read phy */
2239                         e1000_check_for_link(hw);
2240                         link_check = !hw->mac.get_link_status;
2241                 } else
2242                         link_check = TRUE;
2243                 break;
2244         case e1000_media_type_fiber:
2245                 e1000_check_for_link(hw);
2246                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2247                                  E1000_STATUS_LU);
2248                 break;
2249         case e1000_media_type_internal_serdes:
2250                 e1000_check_for_link(hw);
2251                 link_check = adapter->hw.mac.serdes_has_link;
2252                 break;
2253         /* VF device is type_unknown */
2254         case e1000_media_type_unknown:
2255                 e1000_check_for_link(hw);
2256                 link_check = !hw->mac.get_link_status;
2257                 /* Fall thru */
2258         default:
2259                 break;
2260         }
2261
2262         /* Check for thermal downshift or shutdown */
2263         if (hw->mac.type == e1000_i350) {
2264                 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2265                 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2266         }
2267
2268         /* Get the flow control for display */
2269         switch (fc->current_mode) {
2270         case e1000_fc_rx_pause:
2271                 flowctl = "RX";
2272                 break;  
2273         case e1000_fc_tx_pause:
2274                 flowctl = "TX";
2275                 break;  
2276         case e1000_fc_full:
2277                 flowctl = "Full";
2278                 break;  
2279         case e1000_fc_none:
2280         default:
2281                 flowctl = "None";
2282                 break;  
2283         }
2284
2285         /* Now we check if a transition has happened */
2286         if (link_check && (adapter->link_active == 0)) {
2287                 e1000_get_speed_and_duplex(&adapter->hw, 
2288                     &adapter->link_speed, &adapter->link_duplex);
2289                 if (bootverbose)
2290                         device_printf(dev, "Link is up %d Mbps %s,"
2291                             " Flow Control: %s\n",
2292                             adapter->link_speed,
2293                             ((adapter->link_duplex == FULL_DUPLEX) ?
2294                             "Full Duplex" : "Half Duplex"), flowctl);
2295                 adapter->link_active = 1;
2296                 ifp->if_baudrate = adapter->link_speed * 1000000;
2297                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2298                     (thstat & E1000_THSTAT_LINK_THROTTLE))
2299                         device_printf(dev, "Link: thermal downshift\n");
2300                 /* This can sleep */
2301                 if_link_state_change(ifp, LINK_STATE_UP);
2302         } else if (!link_check && (adapter->link_active == 1)) {
2303                 ifp->if_baudrate = adapter->link_speed = 0;
2304                 adapter->link_duplex = 0;
2305                 if (bootverbose)
2306                         device_printf(dev, "Link is Down\n");
2307                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2308                     (thstat & E1000_THSTAT_PWR_DOWN))
2309                         device_printf(dev, "Link: thermal shutdown\n");
2310                 adapter->link_active = 0;
2311                 /* This can sleep */
2312                 if_link_state_change(ifp, LINK_STATE_DOWN);
2313                 /* Reset queue state */
2314                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2315                         txr->queue_status = IGB_QUEUE_IDLE;
2316         }
2317 }
2318
2319 /*********************************************************************
2320  *
2321  *  This routine disables all traffic on the adapter by issuing a
2322  *  global reset on the MAC and deallocates TX/RX buffers.
2323  *
2324  **********************************************************************/
2325
2326 static void
2327 igb_stop(void *arg)
2328 {
2329         struct adapter  *adapter = arg;
2330         struct ifnet    *ifp = adapter->ifp;
2331         struct tx_ring *txr = adapter->tx_rings;
2332
2333         IGB_CORE_LOCK_ASSERT(adapter);
2334
2335         INIT_DEBUGOUT("igb_stop: begin");
2336
2337         igb_disable_intr(adapter);
2338
2339         callout_stop(&adapter->timer);
2340
2341         /* Tell the stack that the interface is no longer active */
2342         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2343         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2344
2345         /* Disarm watchdog timer. */
2346         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2347                 IGB_TX_LOCK(txr);
2348                 txr->queue_status = IGB_QUEUE_IDLE;
2349                 IGB_TX_UNLOCK(txr);
2350         }
2351
2352         e1000_reset_hw(&adapter->hw);
2353         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2354
2355         e1000_led_off(&adapter->hw);
2356         e1000_cleanup_led(&adapter->hw);
2357 }
2358
2359
2360 /*********************************************************************
2361  *
2362  *  Determine hardware revision.
2363  *
2364  **********************************************************************/
2365 static void
2366 igb_identify_hardware(struct adapter *adapter)
2367 {
2368         device_t dev = adapter->dev;
2369
2370         /* Make sure our PCI config space has the necessary stuff set */
2371         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2372         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2373             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2374                 INIT_DEBUGOUT("Memory Access and/or Bus Master "
2375                     "bits were not set!\n");
2376                 adapter->hw.bus.pci_cmd_word |=
2377                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2378                 pci_write_config(dev, PCIR_COMMAND,
2379                     adapter->hw.bus.pci_cmd_word, 2);
2380         }
2381
2382         /* Save off the information about this board */
2383         adapter->hw.vendor_id = pci_get_vendor(dev);
2384         adapter->hw.device_id = pci_get_device(dev);
2385         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2386         adapter->hw.subsystem_vendor_id =
2387             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2388         adapter->hw.subsystem_device_id =
2389             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2390
2391         /* Set MAC type early for PCI setup */
2392         e1000_set_mac_type(&adapter->hw);
2393
2394         /* Are we a VF device? */
2395         if ((adapter->hw.mac.type == e1000_vfadapt) ||
2396             (adapter->hw.mac.type == e1000_vfadapt_i350))
2397                 adapter->vf_ifp = 1;
2398         else
2399                 adapter->vf_ifp = 0;
2400 }
2401
2402 static int
2403 igb_allocate_pci_resources(struct adapter *adapter)
2404 {
2405         device_t        dev = adapter->dev;
2406         int             rid;
2407
2408         rid = PCIR_BAR(0);
2409         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2410             &rid, RF_ACTIVE);
2411         if (adapter->pci_mem == NULL) {
2412                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2413                 return (ENXIO);
2414         }
2415         adapter->osdep.mem_bus_space_tag =
2416             rman_get_bustag(adapter->pci_mem);
2417         adapter->osdep.mem_bus_space_handle =
2418             rman_get_bushandle(adapter->pci_mem);
2419         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2420
2421         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2422
2423         /* This will setup either MSI/X or MSI */
2424         adapter->msix = igb_setup_msix(adapter);
2425         adapter->hw.back = &adapter->osdep;
2426
2427         return (0);
2428 }
2429
2430 /*********************************************************************
2431  *
2432  *  Setup the Legacy or MSI Interrupt handler
2433  *
2434  **********************************************************************/
2435 static int
2436 igb_allocate_legacy(struct adapter *adapter)
2437 {
2438         device_t                dev = adapter->dev;
2439         struct igb_queue        *que = adapter->queues;
2440         struct tx_ring          *txr = adapter->tx_rings;
2441         int                     error, rid = 0;
2442
2443         /* Turn off all interrupts */
2444         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2445
2446         /* MSI RID is 1 */
2447         if (adapter->msix == 1)
2448                 rid = 1;
2449
2450         /* We allocate a single interrupt resource */
2451         adapter->res = bus_alloc_resource_any(dev,
2452             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2453         if (adapter->res == NULL) {
2454                 device_printf(dev, "Unable to allocate bus resource: "
2455                     "interrupt\n");
2456                 return (ENXIO);
2457         }
2458
2459 #if __FreeBSD_version >= 800000
2460         TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2461 #endif
2462
2463         /*
2464          * Try allocating a fast interrupt and the associated deferred
2465          * processing contexts.
2466          */
2467         TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2468         /* Make tasklet for deferred link handling */
2469         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2470         que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2471             taskqueue_thread_enqueue, &que->tq);
2472         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2473             device_get_nameunit(adapter->dev));
2474         if ((error = bus_setup_intr(dev, adapter->res,
2475             INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2476             adapter, &adapter->tag)) != 0) {
2477                 device_printf(dev, "Failed to register fast interrupt "
2478                             "handler: %d\n", error);
2479                 taskqueue_free(que->tq);
2480                 que->tq = NULL;
2481                 return (error);
2482         }
2483
2484         return (0);
2485 }
2486
2487
2488 /*********************************************************************
2489  *
2490  *  Setup the MSIX Queue Interrupt handlers: 
2491  *
2492  **********************************************************************/
2493 static int
2494 igb_allocate_msix(struct adapter *adapter)
2495 {
2496         device_t                dev = adapter->dev;
2497         struct igb_queue        *que = adapter->queues;
2498         int                     error, rid, vector = 0;
2499
2500         /* Be sure to start with all interrupts disabled */
2501         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2502         E1000_WRITE_FLUSH(&adapter->hw);
2503
2504         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2505                 rid = vector +1;
2506                 que->res = bus_alloc_resource_any(dev,
2507                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2508                 if (que->res == NULL) {
2509                         device_printf(dev,
2510                             "Unable to allocate bus resource: "
2511                             "MSIX Queue Interrupt\n");
2512                         return (ENXIO);
2513                 }
2514                 error = bus_setup_intr(dev, que->res,
2515                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2516                     igb_msix_que, que, &que->tag);
2517                 if (error) {
2518                         que->res = NULL;
2519                         device_printf(dev, "Failed to register Queue handler");
2520                         return (error);
2521                 }
2522 #if __FreeBSD_version >= 800504
2523                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2524 #endif
2525                 que->msix = vector;
2526                 if (adapter->hw.mac.type == e1000_82575)
2527                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2528                 else
2529                         que->eims = 1 << vector;
2530                 /*
2531                 ** Bind the msix vector, and thus the
2532                 ** rings to the corresponding cpu.
2533                 */
2534                 if (adapter->num_queues > 1) {
2535                         if (igb_last_bind_cpu < 0)
2536                                 igb_last_bind_cpu = CPU_FIRST();
2537                         bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2538                         device_printf(dev,
2539                                 "Bound queue %d to cpu %d\n",
2540                                 i,igb_last_bind_cpu);
2541                         igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2542                 }
2543 #if __FreeBSD_version >= 800000
2544                 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2545                     que->txr);
2546 #endif
2547                 /* Make tasklet for deferred handling */
2548                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2549                 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2550                     taskqueue_thread_enqueue, &que->tq);
2551                 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2552                     device_get_nameunit(adapter->dev));
2553         }
2554
2555         /* And Link */
2556         rid = vector + 1;
2557         adapter->res = bus_alloc_resource_any(dev,
2558             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2559         if (adapter->res == NULL) {
2560                 device_printf(dev,
2561                     "Unable to allocate bus resource: "
2562                     "MSIX Link Interrupt\n");
2563                 return (ENXIO);
2564         }
2565         if ((error = bus_setup_intr(dev, adapter->res,
2566             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2567             igb_msix_link, adapter, &adapter->tag)) != 0) {
2568                 device_printf(dev, "Failed to register Link handler");
2569                 return (error);
2570         }
2571 #if __FreeBSD_version >= 800504
2572         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2573 #endif
2574         adapter->linkvec = vector;
2575
2576         return (0);
2577 }
2578
2579
2580 static void
2581 igb_configure_queues(struct adapter *adapter)
2582 {
2583         struct  e1000_hw        *hw = &adapter->hw;
2584         struct  igb_queue       *que;
2585         u32                     tmp, ivar = 0, newitr = 0;
2586
2587         /* First turn on RSS capability */
2588         if (adapter->hw.mac.type != e1000_82575)
2589                 E1000_WRITE_REG(hw, E1000_GPIE,
2590                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2591                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2592
2593         /* Turn on MSIX */
2594         switch (adapter->hw.mac.type) {
2595         case e1000_82580:
2596         case e1000_i350:
2597         case e1000_i210:
2598         case e1000_i211:
2599         case e1000_vfadapt:
2600         case e1000_vfadapt_i350:
2601                 /* RX entries */
2602                 for (int i = 0; i < adapter->num_queues; i++) {
2603                         u32 index = i >> 1;
2604                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2605                         que = &adapter->queues[i];
2606                         if (i & 1) {
2607                                 ivar &= 0xFF00FFFF;
2608                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2609                         } else {
2610                                 ivar &= 0xFFFFFF00;
2611                                 ivar |= que->msix | E1000_IVAR_VALID;
2612                         }
2613                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2614                 }
2615                 /* TX entries */
2616                 for (int i = 0; i < adapter->num_queues; i++) {
2617                         u32 index = i >> 1;
2618                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2619                         que = &adapter->queues[i];
2620                         if (i & 1) {
2621                                 ivar &= 0x00FFFFFF;
2622                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2623                         } else {
2624                                 ivar &= 0xFFFF00FF;
2625                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2626                         }
2627                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2628                         adapter->que_mask |= que->eims;
2629                 }
2630
2631                 /* And for the link interrupt */
2632                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2633                 adapter->link_mask = 1 << adapter->linkvec;
2634                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2635                 break;
2636         case e1000_82576:
2637                 /* RX entries */
2638                 for (int i = 0; i < adapter->num_queues; i++) {
2639                         u32 index = i & 0x7; /* Each IVAR has two entries */
2640                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2641                         que = &adapter->queues[i];
2642                         if (i < 8) {
2643                                 ivar &= 0xFFFFFF00;
2644                                 ivar |= que->msix | E1000_IVAR_VALID;
2645                         } else {
2646                                 ivar &= 0xFF00FFFF;
2647                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2648                         }
2649                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2650                         adapter->que_mask |= que->eims;
2651                 }
2652                 /* TX entries */
2653                 for (int i = 0; i < adapter->num_queues; i++) {
2654                         u32 index = i & 0x7; /* Each IVAR has two entries */
2655                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2656                         que = &adapter->queues[i];
2657                         if (i < 8) {
2658                                 ivar &= 0xFFFF00FF;
2659                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2660                         } else {
2661                                 ivar &= 0x00FFFFFF;
2662                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2663                         }
2664                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2665                         adapter->que_mask |= que->eims;
2666                 }
2667
2668                 /* And for the link interrupt */
2669                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2670                 adapter->link_mask = 1 << adapter->linkvec;
2671                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2672                 break;
2673
2674         case e1000_82575:
2675                 /* enable MSI-X support*/
2676                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2677                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2678                 /* Auto-Mask interrupts upon ICR read. */
2679                 tmp |= E1000_CTRL_EXT_EIAME;
2680                 tmp |= E1000_CTRL_EXT_IRCA;
2681                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2682
2683                 /* Queues */
2684                 for (int i = 0; i < adapter->num_queues; i++) {
2685                         que = &adapter->queues[i];
2686                         tmp = E1000_EICR_RX_QUEUE0 << i;
2687                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2688                         que->eims = tmp;
2689                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2690                             i, que->eims);
2691                         adapter->que_mask |= que->eims;
2692                 }
2693
2694                 /* Link */
2695                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2696                     E1000_EIMS_OTHER);
2697                 adapter->link_mask |= E1000_EIMS_OTHER;
2698         default:
2699                 break;
2700         }
2701
2702         /* Set the starting interrupt rate */
2703         if (igb_max_interrupt_rate > 0)
2704                 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2705
2706         if (hw->mac.type == e1000_82575)
2707                 newitr |= newitr << 16;
2708         else
2709                 newitr |= E1000_EITR_CNT_IGNR;
2710
2711         for (int i = 0; i < adapter->num_queues; i++) {
2712                 que = &adapter->queues[i];
2713                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2714         }
2715
2716         return;
2717 }
2718
2719
2720 static void
2721 igb_free_pci_resources(struct adapter *adapter)
2722 {
2723         struct          igb_queue *que = adapter->queues;
2724         device_t        dev = adapter->dev;
2725         int             rid;
2726
2727         /*
2728         ** There is a slight possibility of a failure mode
2729         ** in attach that will result in entering this function
2730         ** before interrupt resources have been initialized, and
2731         ** in that case we do not want to execute the loops below
2732         ** We can detect this reliably by the state of the adapter
2733         ** res pointer.
2734         */
2735         if (adapter->res == NULL)
2736                 goto mem;
2737
2738         /*
2739          * First release all the interrupt resources:
2740          */
2741         for (int i = 0; i < adapter->num_queues; i++, que++) {
2742                 rid = que->msix + 1;
2743                 if (que->tag != NULL) {
2744                         bus_teardown_intr(dev, que->res, que->tag);
2745                         que->tag = NULL;
2746                 }
2747                 if (que->res != NULL)
2748                         bus_release_resource(dev,
2749                             SYS_RES_IRQ, rid, que->res);
2750         }
2751
2752         /* Clean the Legacy or Link interrupt last */
2753         if (adapter->linkvec) /* we are doing MSIX */
2754                 rid = adapter->linkvec + 1;
2755         else
2756                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2757
2758         que = adapter->queues;
2759         if (adapter->tag != NULL) {
2760                 taskqueue_drain(que->tq, &adapter->link_task);
2761                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2762                 adapter->tag = NULL;
2763         }
2764         if (adapter->res != NULL)
2765                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2766
2767         for (int i = 0; i < adapter->num_queues; i++, que++) {
2768                 if (que->tq != NULL) {
2769 #if __FreeBSD_version >= 800000
2770                         taskqueue_drain(que->tq, &que->txr->txq_task);
2771 #endif
2772                         taskqueue_drain(que->tq, &que->que_task);
2773                         taskqueue_free(que->tq);
2774                 }
2775         }
2776 mem:
2777         if (adapter->msix)
2778                 pci_release_msi(dev);
2779
2780         if (adapter->msix_mem != NULL)
2781                 bus_release_resource(dev, SYS_RES_MEMORY,
2782                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2783
2784         if (adapter->pci_mem != NULL)
2785                 bus_release_resource(dev, SYS_RES_MEMORY,
2786                     PCIR_BAR(0), adapter->pci_mem);
2787
2788 }
2789
2790 /*
2791  * Setup Either MSI/X or MSI
2792  */
2793 static int
2794 igb_setup_msix(struct adapter *adapter)
2795 {
2796         device_t dev = adapter->dev;
2797         int rid, want, queues, msgs, maxqueues;
2798
2799         /* tuneable override */
2800         if (igb_enable_msix == 0)
2801                 goto msi;
2802
2803         /* First try MSI/X */
2804         rid = PCIR_BAR(IGB_MSIX_BAR);
2805         adapter->msix_mem = bus_alloc_resource_any(dev,
2806             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2807         if (!adapter->msix_mem) {
2808                 /* May not be enabled */
2809                 device_printf(adapter->dev,
2810                     "Unable to map MSIX table \n");
2811                 goto msi;
2812         }
2813
2814         msgs = pci_msix_count(dev); 
2815         if (msgs == 0) { /* system has msix disabled */
2816                 bus_release_resource(dev, SYS_RES_MEMORY,
2817                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2818                 adapter->msix_mem = NULL;
2819                 goto msi;
2820         }
2821
2822         /* Figure out a reasonable auto config value */
2823         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2824
2825         /* Manual override */
2826         if (igb_num_queues != 0)
2827                 queues = igb_num_queues;
2828
2829         /* Sanity check based on HW */
2830         switch (adapter->hw.mac.type) {
2831                 case e1000_82575:
2832                         maxqueues = 4;
2833                         break;
2834                 case e1000_82576:
2835                 case e1000_82580:
2836                 case e1000_i350:
2837                         maxqueues = 8;
2838                         break;
2839                 case e1000_i210:
2840                         maxqueues = 4;
2841                         break;
2842                 case e1000_i211:
2843                         maxqueues = 2;
2844                         break;
2845                 default:  /* VF interfaces */
2846                         maxqueues = 1;
2847                         break;
2848         }
2849         if (queues > maxqueues)
2850                 queues = maxqueues;
2851
2852         /*
2853         ** One vector (RX/TX pair) per queue
2854         ** plus an additional for Link interrupt
2855         */
2856         want = queues + 1;
2857         if (msgs >= want)
2858                 msgs = want;
2859         else {
2860                 device_printf(adapter->dev,
2861                     "MSIX Configuration Problem, "
2862                     "%d vectors configured, but %d queues wanted!\n",
2863                     msgs, want);
2864                 return (0);
2865         }
2866         if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2867                 device_printf(adapter->dev,
2868                     "Using MSIX interrupts with %d vectors\n", msgs);
2869                 adapter->num_queues = queues;
2870                 return (msgs);
2871         }
2872 msi:
2873         msgs = pci_msi_count(dev);
2874         if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0) {
2875                 device_printf(adapter->dev," Using MSI interrupt\n");
2876                 return (msgs);
2877         }
2878         return (0);
2879 }
2880
2881 /*********************************************************************
2882  *
2883  *  Set up an fresh starting state
2884  *
2885  **********************************************************************/
2886 static void
2887 igb_reset(struct adapter *adapter)
2888 {
2889         device_t        dev = adapter->dev;
2890         struct e1000_hw *hw = &adapter->hw;
2891         struct e1000_fc_info *fc = &hw->fc;
2892         struct ifnet    *ifp = adapter->ifp;
2893         u32             pba = 0;
2894         u16             hwm;
2895
2896         INIT_DEBUGOUT("igb_reset: begin");
2897
2898         /* Let the firmware know the OS is in control */
2899         igb_get_hw_control(adapter);
2900
2901         /*
2902          * Packet Buffer Allocation (PBA)
2903          * Writing PBA sets the receive portion of the buffer
2904          * the remainder is used for the transmit buffer.
2905          */
2906         switch (hw->mac.type) {
2907         case e1000_82575:
2908                 pba = E1000_PBA_32K;
2909                 break;
2910         case e1000_82576:
2911         case e1000_vfadapt:
2912                 pba = E1000_READ_REG(hw, E1000_RXPBS);
2913                 pba &= E1000_RXPBS_SIZE_MASK_82576;
2914                 break;
2915         case e1000_82580:
2916         case e1000_i350:
2917         case e1000_vfadapt_i350:
2918                 pba = E1000_READ_REG(hw, E1000_RXPBS);
2919                 pba = e1000_rxpbs_adjust_82580(pba);
2920                 break;
2921         case e1000_i210:
2922         case e1000_i211:
2923                 pba = E1000_PBA_34K;
2924         default:
2925                 break;
2926         }
2927
2928         /* Special needs in case of Jumbo frames */
2929         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2930                 u32 tx_space, min_tx, min_rx;
2931                 pba = E1000_READ_REG(hw, E1000_PBA);
2932                 tx_space = pba >> 16;
2933                 pba &= 0xffff;
2934                 min_tx = (adapter->max_frame_size +
2935                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2936                 min_tx = roundup2(min_tx, 1024);
2937                 min_tx >>= 10;
2938                 min_rx = adapter->max_frame_size;
2939                 min_rx = roundup2(min_rx, 1024);
2940                 min_rx >>= 10;
2941                 if (tx_space < min_tx &&
2942                     ((min_tx - tx_space) < pba)) {
2943                         pba = pba - (min_tx - tx_space);
2944                         /*
2945                          * if short on rx space, rx wins
2946                          * and must trump tx adjustment
2947                          */
2948                         if (pba < min_rx)
2949                                 pba = min_rx;
2950                 }
2951                 E1000_WRITE_REG(hw, E1000_PBA, pba);
2952         }
2953
2954         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2955
2956         /*
2957          * These parameters control the automatic generation (Tx) and
2958          * response (Rx) to Ethernet PAUSE frames.
2959          * - High water mark should allow for at least two frames to be
2960          *   received after sending an XOFF.
2961          * - Low water mark works best when it is very near the high water mark.
2962          *   This allows the receiver to restart by sending XON when it has
2963          *   drained a bit.
2964          */
2965         hwm = min(((pba << 10) * 9 / 10),
2966             ((pba << 10) - 2 * adapter->max_frame_size));
2967
2968         if (hw->mac.type < e1000_82576) {
2969                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2970                 fc->low_water = fc->high_water - 8;
2971         } else {
2972                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2973                 fc->low_water = fc->high_water - 16;
2974         }
2975
2976         fc->pause_time = IGB_FC_PAUSE_TIME;
2977         fc->send_xon = TRUE;
2978         if (adapter->fc)
2979                 fc->requested_mode = adapter->fc;
2980         else
2981                 fc->requested_mode = e1000_fc_default;
2982
2983         /* Issue a global reset */
2984         e1000_reset_hw(hw);
2985         E1000_WRITE_REG(hw, E1000_WUC, 0);
2986
2987         if (e1000_init_hw(hw) < 0)
2988                 device_printf(dev, "Hardware Initialization Failed\n");
2989
2990         /* Setup DMA Coalescing */
2991         if ((hw->mac.type > e1000_82580) &&
2992             (hw->mac.type != e1000_i211)) {
2993                 u32 dmac;
2994                 u32 reg = ~E1000_DMACR_DMAC_EN;
2995
2996                 if (adapter->dmac == 0) { /* Disabling it */
2997                         E1000_WRITE_REG(hw, E1000_DMACR, reg);
2998                         goto reset_out;
2999                 }
3000
3001                 /* Set starting thresholds */
3002                 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
3003                 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
3004
3005                 hwm = 64 * pba - adapter->max_frame_size / 16;
3006                 if (hwm < 64 * (pba - 6))
3007                         hwm = 64 * (pba - 6);
3008                 reg = E1000_READ_REG(hw, E1000_FCRTC);
3009                 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
3010                 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
3011                     & E1000_FCRTC_RTH_COAL_MASK);
3012                 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
3013
3014
3015                 dmac = pba - adapter->max_frame_size / 512;
3016                 if (dmac < pba - 10)
3017                         dmac = pba - 10;
3018                 reg = E1000_READ_REG(hw, E1000_DMACR);
3019                 reg &= ~E1000_DMACR_DMACTHR_MASK;
3020                 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
3021                     & E1000_DMACR_DMACTHR_MASK);
3022                 /* transition to L0x or L1 if available..*/
3023                 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
3024                 /* timer = value in adapter->dmac in 32usec intervals */
3025                 reg |= (adapter->dmac >> 5);
3026                 E1000_WRITE_REG(hw, E1000_DMACR, reg);
3027
3028                 /* Set the interval before transition */
3029                 reg = E1000_READ_REG(hw, E1000_DMCTLX);
3030                 reg |= 0x80000004;
3031                 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3032
3033                 /* free space in tx packet buffer to wake from DMA coal */
3034                 E1000_WRITE_REG(hw, E1000_DMCTXTH,
3035                     (20480 - (2 * adapter->max_frame_size)) >> 6);
3036
3037                 /* make low power state decision controlled by DMA coal */
3038                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3039                 reg &= ~E1000_PCIEMISC_LX_DECISION;
3040                 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3041                 device_printf(dev, "DMA Coalescing enabled\n");
3042
3043         } else if (hw->mac.type == e1000_82580) {
3044                 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3045                 E1000_WRITE_REG(hw, E1000_DMACR, 0);
3046                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
3047                     reg & ~E1000_PCIEMISC_LX_DECISION);
3048         }
3049
3050 reset_out:
3051         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3052         e1000_get_phy_info(hw);
3053         e1000_check_for_link(hw);
3054         return;
3055 }
3056
3057 /*********************************************************************
3058  *
3059  *  Setup networking device structure and register an interface.
3060  *
3061  **********************************************************************/
3062 static int
3063 igb_setup_interface(device_t dev, struct adapter *adapter)
3064 {
3065         struct ifnet   *ifp;
3066
3067         INIT_DEBUGOUT("igb_setup_interface: begin");
3068
3069         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3070         if (ifp == NULL) {
3071                 device_printf(dev, "can not allocate ifnet structure\n");
3072                 return (-1);
3073         }
3074         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3075         ifp->if_init =  igb_init;
3076         ifp->if_softc = adapter;
3077         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3078         ifp->if_ioctl = igb_ioctl;
3079 #if __FreeBSD_version >= 800000
3080         ifp->if_transmit = igb_mq_start;
3081         ifp->if_qflush = igb_qflush;
3082 #else
3083         ifp->if_start = igb_start;
3084         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3085         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3086         IFQ_SET_READY(&ifp->if_snd);
3087 #endif
3088
3089         ether_ifattach(ifp, adapter->hw.mac.addr);
3090
3091         ifp->if_capabilities = ifp->if_capenable = 0;
3092
3093         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3094         ifp->if_capabilities |= IFCAP_TSO4;
3095         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3096         ifp->if_capenable = ifp->if_capabilities;
3097
3098         /* Don't enable LRO by default */
3099         ifp->if_capabilities |= IFCAP_LRO;
3100
3101 #ifdef DEVICE_POLLING
3102         ifp->if_capabilities |= IFCAP_POLLING;
3103 #endif
3104
3105         /*
3106          * Tell the upper layer(s) we
3107          * support full VLAN capability.
3108          */
3109         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3110         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3111                              |  IFCAP_VLAN_HWTSO
3112                              |  IFCAP_VLAN_MTU;
3113         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3114                           |  IFCAP_VLAN_HWTSO
3115                           |  IFCAP_VLAN_MTU;
3116
3117         /*
3118         ** Don't turn this on by default, if vlans are
3119         ** created on another pseudo device (eg. lagg)
3120         ** then vlan events are not passed thru, breaking
3121         ** operation, but with HW FILTER off it works. If
3122         ** using vlans directly on the igb driver you can
3123         ** enable this and get full hardware tag filtering.
3124         */
3125         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3126
3127         /*
3128          * Specify the media types supported by this adapter and register
3129          * callbacks to update media and link information
3130          */
3131         ifmedia_init(&adapter->media, IFM_IMASK,
3132             igb_media_change, igb_media_status);
3133         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3134             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3135                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
3136                             0, NULL);
3137                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3138         } else {
3139                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3140                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3141                             0, NULL);
3142                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3143                             0, NULL);
3144                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3145                             0, NULL);
3146                 if (adapter->hw.phy.type != e1000_phy_ife) {
3147                         ifmedia_add(&adapter->media,
3148                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3149                         ifmedia_add(&adapter->media,
3150                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3151                 }
3152         }
3153         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3154         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3155         return (0);
3156 }
3157
3158
3159 /*
3160  * Manage DMA'able memory.
3161  */
3162 static void
3163 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3164 {
3165         if (error)
3166                 return;
3167         *(bus_addr_t *) arg = segs[0].ds_addr;
3168 }
3169
3170 static int
3171 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3172         struct igb_dma_alloc *dma, int mapflags)
3173 {
3174         int error;
3175
3176         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3177                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
3178                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3179                                 BUS_SPACE_MAXADDR,      /* highaddr */
3180                                 NULL, NULL,             /* filter, filterarg */
3181                                 size,                   /* maxsize */
3182                                 1,                      /* nsegments */
3183                                 size,                   /* maxsegsize */
3184                                 0,                      /* flags */
3185                                 NULL,                   /* lockfunc */
3186                                 NULL,                   /* lockarg */
3187                                 &dma->dma_tag);
3188         if (error) {
3189                 device_printf(adapter->dev,
3190                     "%s: bus_dma_tag_create failed: %d\n",
3191                     __func__, error);
3192                 goto fail_0;
3193         }
3194
3195         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3196             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3197         if (error) {
3198                 device_printf(adapter->dev,
3199                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3200                     __func__, (uintmax_t)size, error);
3201                 goto fail_2;
3202         }
3203
3204         dma->dma_paddr = 0;
3205         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3206             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3207         if (error || dma->dma_paddr == 0) {
3208                 device_printf(adapter->dev,
3209                     "%s: bus_dmamap_load failed: %d\n",
3210                     __func__, error);
3211                 goto fail_3;
3212         }
3213
3214         return (0);
3215
3216 fail_3:
3217         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3218 fail_2:
3219         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3220         bus_dma_tag_destroy(dma->dma_tag);
3221 fail_0:
3222         dma->dma_map = NULL;
3223         dma->dma_tag = NULL;
3224
3225         return (error);
3226 }
3227
3228 static void
3229 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3230 {
3231         if (dma->dma_tag == NULL)
3232                 return;
3233         if (dma->dma_map != NULL) {
3234                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3235                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3236                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3237                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3238                 dma->dma_map = NULL;
3239         }
3240         bus_dma_tag_destroy(dma->dma_tag);
3241         dma->dma_tag = NULL;
3242 }
3243
3244
3245 /*********************************************************************
3246  *
3247  *  Allocate memory for the transmit and receive rings, and then
3248  *  the descriptors associated with each, called only once at attach.
3249  *
3250  **********************************************************************/
3251 static int
3252 igb_allocate_queues(struct adapter *adapter)
3253 {
3254         device_t dev = adapter->dev;
3255         struct igb_queue        *que = NULL;
3256         struct tx_ring          *txr = NULL;
3257         struct rx_ring          *rxr = NULL;
3258         int rsize, tsize, error = E1000_SUCCESS;
3259         int txconf = 0, rxconf = 0;
3260
3261         /* First allocate the top level queue structs */
3262         if (!(adapter->queues =
3263             (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3264             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3265                 device_printf(dev, "Unable to allocate queue memory\n");
3266                 error = ENOMEM;
3267                 goto fail;
3268         }
3269
3270         /* Next allocate the TX ring struct memory */
3271         if (!(adapter->tx_rings =
3272             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3273             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3274                 device_printf(dev, "Unable to allocate TX ring memory\n");
3275                 error = ENOMEM;
3276                 goto tx_fail;
3277         }
3278
3279         /* Now allocate the RX */
3280         if (!(adapter->rx_rings =
3281             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3282             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3283                 device_printf(dev, "Unable to allocate RX ring memory\n");
3284                 error = ENOMEM;
3285                 goto rx_fail;
3286         }
3287
3288         tsize = roundup2(adapter->num_tx_desc *
3289             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3290         /*
3291          * Now set up the TX queues, txconf is needed to handle the
3292          * possibility that things fail midcourse and we need to
3293          * undo memory gracefully
3294          */ 
3295         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3296                 /* Set up some basics */
3297                 txr = &adapter->tx_rings[i];
3298                 txr->adapter = adapter;
3299                 txr->me = i;
3300
3301                 /* Initialize the TX lock */
3302                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3303                     device_get_nameunit(dev), txr->me);
3304                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3305
3306                 if (igb_dma_malloc(adapter, tsize,
3307                         &txr->txdma, BUS_DMA_NOWAIT)) {
3308                         device_printf(dev,
3309                             "Unable to allocate TX Descriptor memory\n");
3310                         error = ENOMEM;
3311                         goto err_tx_desc;
3312                 }
3313                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3314                 bzero((void *)txr->tx_base, tsize);
3315
3316                 /* Now allocate transmit buffers for the ring */
3317                 if (igb_allocate_transmit_buffers(txr)) {
3318                         device_printf(dev,
3319                             "Critical Failure setting up transmit buffers\n");
3320                         error = ENOMEM;
3321                         goto err_tx_desc;
3322                 }
3323 #if __FreeBSD_version >= 800000
3324                 /* Allocate a buf ring */
3325                 txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3326                     M_WAITOK, &txr->tx_mtx);
3327 #endif
3328         }
3329
3330         /*
3331          * Next the RX queues...
3332          */ 
3333         rsize = roundup2(adapter->num_rx_desc *
3334             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3335         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3336                 rxr = &adapter->rx_rings[i];
3337                 rxr->adapter = adapter;
3338                 rxr->me = i;
3339
3340                 /* Initialize the RX lock */
3341                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3342                     device_get_nameunit(dev), txr->me);
3343                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3344
3345                 if (igb_dma_malloc(adapter, rsize,
3346                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3347                         device_printf(dev,
3348                             "Unable to allocate RxDescriptor memory\n");
3349                         error = ENOMEM;
3350                         goto err_rx_desc;
3351                 }
3352                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3353                 bzero((void *)rxr->rx_base, rsize);
3354
3355                 /* Allocate receive buffers for the ring*/
3356                 if (igb_allocate_receive_buffers(rxr)) {
3357                         device_printf(dev,
3358                             "Critical Failure setting up receive buffers\n");
3359                         error = ENOMEM;
3360                         goto err_rx_desc;
3361                 }
3362         }
3363
3364         /*
3365         ** Finally set up the queue holding structs
3366         */
3367         for (int i = 0; i < adapter->num_queues; i++) {
3368                 que = &adapter->queues[i];
3369                 que->adapter = adapter;
3370                 que->txr = &adapter->tx_rings[i];
3371                 que->rxr = &adapter->rx_rings[i];
3372         }
3373
3374         return (0);
3375
3376 err_rx_desc:
3377         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3378                 igb_dma_free(adapter, &rxr->rxdma);
3379 err_tx_desc:
3380         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3381                 igb_dma_free(adapter, &txr->txdma);
3382         free(adapter->rx_rings, M_DEVBUF);
3383 rx_fail:
3384 #if __FreeBSD_version >= 800000
3385         buf_ring_free(txr->br, M_DEVBUF);
3386 #endif
3387         free(adapter->tx_rings, M_DEVBUF);
3388 tx_fail:
3389         free(adapter->queues, M_DEVBUF);
3390 fail:
3391         return (error);
3392 }
3393
3394 /*********************************************************************
3395  *
3396  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3397  *  the information needed to transmit a packet on the wire. This is
3398  *  called only once at attach, setup is done every reset.
3399  *
3400  **********************************************************************/
3401 static int
3402 igb_allocate_transmit_buffers(struct tx_ring *txr)
3403 {
3404         struct adapter *adapter = txr->adapter;
3405         device_t dev = adapter->dev;
3406         struct igb_tx_buffer *txbuf;
3407         int error, i;
3408
3409         /*
3410          * Setup DMA descriptor areas.
3411          */
3412         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3413                                1, 0,                    /* alignment, bounds */
3414                                BUS_SPACE_MAXADDR,       /* lowaddr */
3415                                BUS_SPACE_MAXADDR,       /* highaddr */
3416                                NULL, NULL,              /* filter, filterarg */
3417                                IGB_TSO_SIZE,            /* maxsize */
3418                                IGB_MAX_SCATTER,         /* nsegments */
3419                                PAGE_SIZE,               /* maxsegsize */
3420                                0,                       /* flags */
3421                                NULL,                    /* lockfunc */
3422                                NULL,                    /* lockfuncarg */
3423                                &txr->txtag))) {
3424                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3425                 goto fail;
3426         }
3427
3428         if (!(txr->tx_buffers =
3429             (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3430             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3431                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3432                 error = ENOMEM;
3433                 goto fail;
3434         }
3435
3436         /* Create the descriptor buffer dma maps */
3437         txbuf = txr->tx_buffers;
3438         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3439                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3440                 if (error != 0) {
3441                         device_printf(dev, "Unable to create TX DMA map\n");
3442                         goto fail;
3443                 }
3444         }
3445
3446         return 0;
3447 fail:
3448         /* We free all, it handles case where we are in the middle */
3449         igb_free_transmit_structures(adapter);
3450         return (error);
3451 }
3452
3453 /*********************************************************************
3454  *
3455  *  Initialize a transmit ring.
3456  *
3457  **********************************************************************/
3458 static void
3459 igb_setup_transmit_ring(struct tx_ring *txr)
3460 {
3461         struct adapter *adapter = txr->adapter;
3462         struct igb_tx_buffer *txbuf;
3463         int i;
3464 #ifdef DEV_NETMAP
3465         struct netmap_adapter *na = NA(adapter->ifp);
3466         struct netmap_slot *slot;
3467 #endif /* DEV_NETMAP */
3468
3469         /* Clear the old descriptor contents */
3470         IGB_TX_LOCK(txr);
3471 #ifdef DEV_NETMAP
3472         slot = netmap_reset(na, NR_TX, txr->me, 0);
3473 #endif /* DEV_NETMAP */
3474         bzero((void *)txr->tx_base,
3475               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3476         /* Reset indices */
3477         txr->next_avail_desc = 0;
3478         txr->next_to_clean = 0;
3479
3480         /* Free any existing tx buffers. */
3481         txbuf = txr->tx_buffers;
3482         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3483                 if (txbuf->m_head != NULL) {
3484                         bus_dmamap_sync(txr->txtag, txbuf->map,
3485                             BUS_DMASYNC_POSTWRITE);
3486                         bus_dmamap_unload(txr->txtag, txbuf->map);
3487                         m_freem(txbuf->m_head);
3488                         txbuf->m_head = NULL;
3489                 }
3490 #ifdef DEV_NETMAP
3491                 if (slot) {
3492                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3493                         /* no need to set the address */
3494                         netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3495                 }
3496 #endif /* DEV_NETMAP */
3497                 /* clear the watch index */
3498                 txbuf->next_eop = -1;
3499         }
3500
3501         /* Set number of descriptors available */
3502         txr->tx_avail = adapter->num_tx_desc;
3503
3504         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3505             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3506         IGB_TX_UNLOCK(txr);
3507 }
3508
3509 /*********************************************************************
3510  *
3511  *  Initialize all transmit rings.
3512  *
3513  **********************************************************************/
3514 static void
3515 igb_setup_transmit_structures(struct adapter *adapter)
3516 {
3517         struct tx_ring *txr = adapter->tx_rings;
3518
3519         for (int i = 0; i < adapter->num_queues; i++, txr++)
3520                 igb_setup_transmit_ring(txr);
3521
3522         return;
3523 }
3524
3525 /*********************************************************************
3526  *
3527  *  Enable transmit unit.
3528  *
3529  **********************************************************************/
3530 static void
3531 igb_initialize_transmit_units(struct adapter *adapter)
3532 {
3533         struct tx_ring  *txr = adapter->tx_rings;
3534         struct e1000_hw *hw = &adapter->hw;
3535         u32             tctl, txdctl;
3536
3537         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3538         tctl = txdctl = 0;
3539
3540         /* Setup the Tx Descriptor Rings */
3541         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3542                 u64 bus_addr = txr->txdma.dma_paddr;
3543
3544                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3545                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3546                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3547                     (uint32_t)(bus_addr >> 32));
3548                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3549                     (uint32_t)bus_addr);
3550
3551                 /* Setup the HW Tx Head and Tail descriptor pointers */
3552                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3553                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3554
3555                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3556                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3557                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3558
3559                 txr->queue_status = IGB_QUEUE_IDLE;
3560
3561                 txdctl |= IGB_TX_PTHRESH;
3562                 txdctl |= IGB_TX_HTHRESH << 8;
3563                 txdctl |= IGB_TX_WTHRESH << 16;
3564                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3565                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3566         }
3567
3568         if (adapter->vf_ifp)
3569                 return;
3570
3571         e1000_config_collision_dist(hw);
3572
3573         /* Program the Transmit Control Register */
3574         tctl = E1000_READ_REG(hw, E1000_TCTL);
3575         tctl &= ~E1000_TCTL_CT;
3576         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3577                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3578
3579         /* This write will effectively turn on the transmit unit. */
3580         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3581 }
3582
3583 /*********************************************************************
3584  *
3585  *  Free all transmit rings.
3586  *
3587  **********************************************************************/
3588 static void
3589 igb_free_transmit_structures(struct adapter *adapter)
3590 {
3591         struct tx_ring *txr = adapter->tx_rings;
3592
3593         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3594                 IGB_TX_LOCK(txr);
3595                 igb_free_transmit_buffers(txr);
3596                 igb_dma_free(adapter, &txr->txdma);
3597                 IGB_TX_UNLOCK(txr);
3598                 IGB_TX_LOCK_DESTROY(txr);
3599         }
3600         free(adapter->tx_rings, M_DEVBUF);
3601 }
3602
3603 /*********************************************************************
3604  *
3605  *  Free transmit ring related data structures.
3606  *
3607  **********************************************************************/
3608 static void
3609 igb_free_transmit_buffers(struct tx_ring *txr)
3610 {
3611         struct adapter *adapter = txr->adapter;
3612         struct igb_tx_buffer *tx_buffer;
3613         int             i;
3614
3615         INIT_DEBUGOUT("free_transmit_ring: begin");
3616
3617         if (txr->tx_buffers == NULL)
3618                 return;
3619
3620         tx_buffer = txr->tx_buffers;
3621         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3622                 if (tx_buffer->m_head != NULL) {
3623                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3624                             BUS_DMASYNC_POSTWRITE);
3625                         bus_dmamap_unload(txr->txtag,
3626                             tx_buffer->map);
3627                         m_freem(tx_buffer->m_head);
3628                         tx_buffer->m_head = NULL;
3629                         if (tx_buffer->map != NULL) {
3630                                 bus_dmamap_destroy(txr->txtag,
3631                                     tx_buffer->map);
3632                                 tx_buffer->map = NULL;
3633                         }
3634                 } else if (tx_buffer->map != NULL) {
3635                         bus_dmamap_unload(txr->txtag,
3636                             tx_buffer->map);
3637                         bus_dmamap_destroy(txr->txtag,
3638                             tx_buffer->map);
3639                         tx_buffer->map = NULL;
3640                 }
3641         }
3642 #if __FreeBSD_version >= 800000
3643         if (txr->br != NULL)
3644                 buf_ring_free(txr->br, M_DEVBUF);
3645 #endif
3646         if (txr->tx_buffers != NULL) {
3647                 free(txr->tx_buffers, M_DEVBUF);
3648                 txr->tx_buffers = NULL;
3649         }
3650         if (txr->txtag != NULL) {
3651                 bus_dma_tag_destroy(txr->txtag);
3652                 txr->txtag = NULL;
3653         }
3654         return;
3655 }
3656
3657 /**********************************************************************
3658  *
3659  *  Setup work for hardware segmentation offload (TSO)
3660  *
3661  **********************************************************************/
3662 static bool
3663 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ehdrlen,
3664         struct ip *ip, struct tcphdr *th)
3665 {
3666         struct adapter *adapter = txr->adapter;
3667         struct e1000_adv_tx_context_desc *TXD;
3668         struct igb_tx_buffer        *tx_buffer;
3669         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3670         u32 mss_l4len_idx = 0;
3671         u16 vtag = 0;
3672         int ctxd, ip_hlen, tcp_hlen;
3673
3674         ctxd = txr->next_avail_desc;
3675         tx_buffer = &txr->tx_buffers[ctxd];
3676         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3677
3678         ip->ip_sum = 0;
3679         ip_hlen = ip->ip_hl << 2;
3680         tcp_hlen = th->th_off << 2;
3681
3682         /* VLAN MACLEN IPLEN */
3683         if (mp->m_flags & M_VLANTAG) {
3684                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3685                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3686         }
3687
3688         vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3689         vlan_macip_lens |= ip_hlen;
3690         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3691
3692         /* ADV DTYPE TUCMD */
3693         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3694         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3695         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3696         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3697
3698         /* MSS L4LEN IDX */
3699         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3700         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3701         /* 82575 needs the queue index added */
3702         if (adapter->hw.mac.type == e1000_82575)
3703                 mss_l4len_idx |= txr->me << 4;
3704         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3705
3706         TXD->seqnum_seed = htole32(0);
3707         tx_buffer->m_head = NULL;
3708         tx_buffer->next_eop = -1;
3709
3710         if (++ctxd == adapter->num_tx_desc)
3711                 ctxd = 0;
3712
3713         txr->tx_avail--;
3714         txr->next_avail_desc = ctxd;
3715         return TRUE;
3716 }
3717
3718
3719 /*********************************************************************
3720  *
3721  *  Context Descriptor setup for VLAN or CSUM
3722  *
3723  **********************************************************************/
3724
3725 static bool
3726 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3727 {
3728         struct adapter *adapter = txr->adapter;
3729         struct e1000_adv_tx_context_desc *TXD;
3730         struct igb_tx_buffer        *tx_buffer;
3731         u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3732         struct ether_vlan_header *eh;
3733         struct ip *ip = NULL;
3734         struct ip6_hdr *ip6;
3735         int  ehdrlen, ctxd, ip_hlen = 0;
3736         u16     etype, vtag = 0;
3737         u8      ipproto = 0;
3738         bool    offload = TRUE;
3739
3740         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3741                 offload = FALSE;
3742
3743         vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3744         ctxd = txr->next_avail_desc;
3745         tx_buffer = &txr->tx_buffers[ctxd];
3746         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3747
3748         /*
3749         ** In advanced descriptors the vlan tag must 
3750         ** be placed into the context descriptor, thus
3751         ** we need to be here just for that setup.
3752         */
3753         if (mp->m_flags & M_VLANTAG) {
3754                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3755                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3756         } else if (offload == FALSE)
3757                 return FALSE;
3758
3759         /*
3760          * Determine where frame payload starts.
3761          * Jump over vlan headers if already present,
3762          * helpful for QinQ too.
3763          */
3764         eh = mtod(mp, struct ether_vlan_header *);
3765         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3766                 etype = ntohs(eh->evl_proto);
3767                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3768         } else {
3769                 etype = ntohs(eh->evl_encap_proto);
3770                 ehdrlen = ETHER_HDR_LEN;
3771         }
3772
3773         /* Set the ether header length */
3774         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3775
3776         switch (etype) {
3777                 case ETHERTYPE_IP:
3778                         ip = (struct ip *)(mp->m_data + ehdrlen);
3779                         ip_hlen = ip->ip_hl << 2;
3780                         if (mp->m_len < ehdrlen + ip_hlen) {
3781                                 offload = FALSE;
3782                                 break;
3783                         }
3784                         ipproto = ip->ip_p;
3785                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3786                         break;
3787                 case ETHERTYPE_IPV6:
3788                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3789                         ip_hlen = sizeof(struct ip6_hdr);
3790                         ipproto = ip6->ip6_nxt;
3791                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3792                         break;
3793                 default:
3794                         offload = FALSE;
3795                         break;
3796         }
3797
3798         vlan_macip_lens |= ip_hlen;
3799         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3800
3801         switch (ipproto) {
3802                 case IPPROTO_TCP:
3803                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3804                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3805                         break;
3806                 case IPPROTO_UDP:
3807                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3808                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3809                         break;
3810 #if __FreeBSD_version >= 800000
3811                 case IPPROTO_SCTP:
3812                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3813                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3814                         break;
3815 #endif
3816                 default:
3817                         offload = FALSE;
3818                         break;
3819         }
3820
3821         /* 82575 needs the queue index added */
3822         if (adapter->hw.mac.type == e1000_82575)
3823                 mss_l4len_idx = txr->me << 4;
3824
3825         /* Now copy bits into descriptor */
3826         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3827         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3828         TXD->seqnum_seed = htole32(0);
3829         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3830
3831         tx_buffer->m_head = NULL;
3832         tx_buffer->next_eop = -1;
3833
3834         /* We've consumed the first desc, adjust counters */
3835         if (++ctxd == adapter->num_tx_desc)
3836                 ctxd = 0;
3837         txr->next_avail_desc = ctxd;
3838         --txr->tx_avail;
3839
3840         return (offload);
3841 }
3842
3843
3844 /**********************************************************************
3845  *
3846  *  Examine each tx_buffer in the used queue. If the hardware is done
3847  *  processing the packet then free associated resources. The
3848  *  tx_buffer is put back on the free queue.
3849  *
3850  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3851  **********************************************************************/
3852 static bool
3853 igb_txeof(struct tx_ring *txr)
3854 {
3855         struct adapter  *adapter = txr->adapter;
3856         int first, last, done, processed;
3857         struct igb_tx_buffer *tx_buffer;
3858         struct e1000_tx_desc   *tx_desc, *eop_desc;
3859         struct ifnet   *ifp = adapter->ifp;
3860
3861         IGB_TX_LOCK_ASSERT(txr);
3862
3863 #ifdef DEV_NETMAP
3864         if (ifp->if_capenable & IFCAP_NETMAP) {
3865                 struct netmap_adapter *na = NA(ifp);
3866
3867                 selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3868                 IGB_TX_UNLOCK(txr);
3869                 IGB_CORE_LOCK(adapter);
3870                 selwakeuppri(&na->tx_si, PI_NET);
3871                 IGB_CORE_UNLOCK(adapter);
3872                 IGB_TX_LOCK(txr);
3873                 return FALSE;
3874         }
3875 #endif /* DEV_NETMAP */
3876         if (txr->tx_avail == adapter->num_tx_desc) {
3877                 txr->queue_status = IGB_QUEUE_IDLE;
3878                 return FALSE;
3879         }
3880
3881         processed = 0;
3882         first = txr->next_to_clean;
3883         tx_desc = &txr->tx_base[first];
3884         tx_buffer = &txr->tx_buffers[first];
3885         last = tx_buffer->next_eop;
3886         eop_desc = &txr->tx_base[last];
3887
3888         /*
3889          * What this does is get the index of the
3890          * first descriptor AFTER the EOP of the 
3891          * first packet, that way we can do the
3892          * simple comparison on the inner while loop.
3893          */
3894         if (++last == adapter->num_tx_desc)
3895                 last = 0;
3896         done = last;
3897
3898         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3899             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3900
3901         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3902                 /* We clean the range of the packet */
3903                 while (first != done) {
3904                         tx_desc->upper.data = 0;
3905                         tx_desc->lower.data = 0;
3906                         tx_desc->buffer_addr = 0;
3907                         ++txr->tx_avail;
3908                         ++processed;
3909
3910                         if (tx_buffer->m_head) {
3911                                 txr->bytes +=
3912                                     tx_buffer->m_head->m_pkthdr.len;
3913                                 bus_dmamap_sync(txr->txtag,
3914                                     tx_buffer->map,
3915                                     BUS_DMASYNC_POSTWRITE);
3916                                 bus_dmamap_unload(txr->txtag,
3917                                     tx_buffer->map);
3918
3919                                 m_freem(tx_buffer->m_head);
3920                                 tx_buffer->m_head = NULL;
3921                         }
3922                         tx_buffer->next_eop = -1;
3923                         txr->watchdog_time = ticks;
3924
3925                         if (++first == adapter->num_tx_desc)
3926                                 first = 0;
3927
3928                         tx_buffer = &txr->tx_buffers[first];
3929                         tx_desc = &txr->tx_base[first];
3930                 }
3931                 ++txr->packets;
3932                 ++ifp->if_opackets;
3933                 /* See if we can continue to the next packet */
3934                 last = tx_buffer->next_eop;
3935                 if (last != -1) {
3936                         eop_desc = &txr->tx_base[last];
3937                         /* Get new done point */
3938                         if (++last == adapter->num_tx_desc) last = 0;
3939                         done = last;
3940                 } else
3941                         break;
3942         }
3943         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3944             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3945
3946         txr->next_to_clean = first;
3947
3948         /*
3949         ** Watchdog calculation, we know there's
3950         ** work outstanding or the first return
3951         ** would have been taken, so none processed
3952         ** for too long indicates a hang.
3953         */
3954         if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3955                 txr->queue_status |= IGB_QUEUE_HUNG;
3956         /*
3957          * If we have a minimum free,
3958          * clear depleted state bit
3959          */
3960         if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)          
3961                 txr->queue_status &= ~IGB_QUEUE_DEPLETED;
3962
3963         /* All clean, turn off the watchdog */
3964         if (txr->tx_avail == adapter->num_tx_desc) {
3965                 txr->queue_status = IGB_QUEUE_IDLE;
3966                 return (FALSE);
3967         }
3968
3969         return (TRUE);
3970 }
3971
3972 /*********************************************************************
3973  *
3974  *  Refresh mbuf buffers for RX descriptor rings
3975  *   - now keeps its own state so discards due to resource
3976  *     exhaustion are unnecessary, if an mbuf cannot be obtained
3977  *     it just returns, keeping its placeholder, thus it can simply
3978  *     be recalled to try again.
3979  *
3980  **********************************************************************/
3981 static void
3982 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3983 {
3984         struct adapter          *adapter = rxr->adapter;
3985         bus_dma_segment_t       hseg[1];
3986         bus_dma_segment_t       pseg[1];
3987         struct igb_rx_buf       *rxbuf;
3988         struct mbuf             *mh, *mp;
3989         int                     i, j, nsegs, error;
3990         bool                    refreshed = FALSE;
3991
3992         i = j = rxr->next_to_refresh;
3993         /*
3994         ** Get one descriptor beyond
3995         ** our work mark to control
3996         ** the loop.
3997         */
3998         if (++j == adapter->num_rx_desc)
3999                 j = 0;
4000
4001         while (j != limit) {
4002                 rxbuf = &rxr->rx_buffers[i];
4003                 /* No hdr mbuf used with header split off */
4004                 if (rxr->hdr_split == FALSE)
4005                         goto no_split;
4006                 if (rxbuf->m_head == NULL) {
4007                         mh = m_gethdr(M_NOWAIT, MT_DATA);
4008                         if (mh == NULL)
4009                                 goto update;
4010                 } else
4011                         mh = rxbuf->m_head;
4012
4013                 mh->m_pkthdr.len = mh->m_len = MHLEN;
4014                 mh->m_len = MHLEN;
4015                 mh->m_flags |= M_PKTHDR;
4016                 /* Get the memory mapping */
4017                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4018                     rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4019                 if (error != 0) {
4020                         printf("Refresh mbufs: hdr dmamap load"
4021                             " failure - %d\n", error);
4022                         m_free(mh);
4023                         rxbuf->m_head = NULL;
4024                         goto update;
4025                 }
4026                 rxbuf->m_head = mh;
4027                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4028                     BUS_DMASYNC_PREREAD);
4029                 rxr->rx_base[i].read.hdr_addr =
4030                     htole64(hseg[0].ds_addr);
4031 no_split:
4032                 if (rxbuf->m_pack == NULL) {
4033                         mp = m_getjcl(M_NOWAIT, MT_DATA,
4034                             M_PKTHDR, adapter->rx_mbuf_sz);
4035                         if (mp == NULL)
4036                                 goto update;
4037                 } else
4038                         mp = rxbuf->m_pack;
4039
4040                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4041                 /* Get the memory mapping */
4042                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4043                     rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4044                 if (error != 0) {
4045                         printf("Refresh mbufs: payload dmamap load"
4046                             " failure - %d\n", error);
4047                         m_free(mp);
4048                         rxbuf->m_pack = NULL;
4049                         goto update;
4050                 }
4051                 rxbuf->m_pack = mp;
4052                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4053                     BUS_DMASYNC_PREREAD);
4054                 rxr->rx_base[i].read.pkt_addr =
4055                     htole64(pseg[0].ds_addr);
4056                 refreshed = TRUE; /* I feel wefreshed :) */
4057
4058                 i = j; /* our next is precalculated */
4059                 rxr->next_to_refresh = i;
4060                 if (++j == adapter->num_rx_desc)
4061                         j = 0;
4062         }
4063 update:
4064         if (refreshed) /* update tail */
4065                 E1000_WRITE_REG(&adapter->hw,
4066                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4067         return;
4068 }
4069
4070
4071 /*********************************************************************
4072  *
4073  *  Allocate memory for rx_buffer structures. Since we use one
4074  *  rx_buffer per received packet, the maximum number of rx_buffer's
4075  *  that we'll need is equal to the number of receive descriptors
4076  *  that we've allocated.
4077  *
4078  **********************************************************************/
4079 static int
4080 igb_allocate_receive_buffers(struct rx_ring *rxr)
4081 {
4082         struct  adapter         *adapter = rxr->adapter;
4083         device_t                dev = adapter->dev;
4084         struct igb_rx_buf       *rxbuf;
4085         int                     i, bsize, error;
4086
4087         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4088         if (!(rxr->rx_buffers =
4089             (struct igb_rx_buf *) malloc(bsize,
4090             M_DEVBUF, M_NOWAIT | M_ZERO))) {
4091                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4092                 error = ENOMEM;
4093                 goto fail;
4094         }
4095
4096         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4097                                    1, 0,                /* alignment, bounds */
4098                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4099                                    BUS_SPACE_MAXADDR,   /* highaddr */
4100                                    NULL, NULL,          /* filter, filterarg */
4101                                    MSIZE,               /* maxsize */
4102                                    1,                   /* nsegments */
4103                                    MSIZE,               /* maxsegsize */
4104                                    0,                   /* flags */
4105                                    NULL,                /* lockfunc */
4106                                    NULL,                /* lockfuncarg */
4107                                    &rxr->htag))) {
4108                 device_printf(dev, "Unable to create RX DMA tag\n");
4109                 goto fail;
4110         }
4111
4112         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4113                                    1, 0,                /* alignment, bounds */
4114                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4115                                    BUS_SPACE_MAXADDR,   /* highaddr */
4116                                    NULL, NULL,          /* filter, filterarg */
4117                                    MJUM9BYTES,          /* maxsize */
4118                                    1,                   /* nsegments */
4119                                    MJUM9BYTES,          /* maxsegsize */
4120                                    0,                   /* flags */
4121                                    NULL,                /* lockfunc */
4122                                    NULL,                /* lockfuncarg */
4123                                    &rxr->ptag))) {
4124                 device_printf(dev, "Unable to create RX payload DMA tag\n");
4125                 goto fail;
4126         }
4127
4128         for (i = 0; i < adapter->num_rx_desc; i++) {
4129                 rxbuf = &rxr->rx_buffers[i];
4130                 error = bus_dmamap_create(rxr->htag,
4131                     BUS_DMA_NOWAIT, &rxbuf->hmap);
4132                 if (error) {
4133                         device_printf(dev,
4134                             "Unable to create RX head DMA maps\n");
4135                         goto fail;
4136                 }
4137                 error = bus_dmamap_create(rxr->ptag,
4138                     BUS_DMA_NOWAIT, &rxbuf->pmap);
4139                 if (error) {
4140                         device_printf(dev,
4141                             "Unable to create RX packet DMA maps\n");
4142                         goto fail;
4143                 }
4144         }
4145
4146         return (0);
4147
4148 fail:
4149         /* Frees all, but can handle partial completion */
4150         igb_free_receive_structures(adapter);
4151         return (error);
4152 }
4153
4154
4155 static void
4156 igb_free_receive_ring(struct rx_ring *rxr)
4157 {
4158         struct  adapter         *adapter = rxr->adapter;
4159         struct igb_rx_buf       *rxbuf;
4160
4161
4162         for (int i = 0; i < adapter->num_rx_desc; i++) {
4163                 rxbuf = &rxr->rx_buffers[i];
4164                 if (rxbuf->m_head != NULL) {
4165                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4166                             BUS_DMASYNC_POSTREAD);
4167                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4168                         rxbuf->m_head->m_flags |= M_PKTHDR;
4169                         m_freem(rxbuf->m_head);
4170                 }
4171                 if (rxbuf->m_pack != NULL) {
4172                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4173                             BUS_DMASYNC_POSTREAD);
4174                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4175                         rxbuf->m_pack->m_flags |= M_PKTHDR;
4176                         m_freem(rxbuf->m_pack);
4177                 }
4178                 rxbuf->m_head = NULL;
4179                 rxbuf->m_pack = NULL;
4180         }
4181 }
4182
4183
4184 /*********************************************************************
4185  *
4186  *  Initialize a receive ring and its buffers.
4187  *
4188  **********************************************************************/
4189 static int
4190 igb_setup_receive_ring(struct rx_ring *rxr)
4191 {
4192         struct  adapter         *adapter;
4193         struct  ifnet           *ifp;
4194         device_t                dev;
4195         struct igb_rx_buf       *rxbuf;
4196         bus_dma_segment_t       pseg[1], hseg[1];
4197         struct lro_ctrl         *lro = &rxr->lro;
4198         int                     rsize, nsegs, error = 0;
4199 #ifdef DEV_NETMAP
4200         struct netmap_adapter *na = NA(rxr->adapter->ifp);
4201         struct netmap_slot *slot;
4202 #endif /* DEV_NETMAP */
4203
4204         adapter = rxr->adapter;
4205         dev = adapter->dev;
4206         ifp = adapter->ifp;
4207
4208         /* Clear the ring contents */
4209         IGB_RX_LOCK(rxr);
4210 #ifdef DEV_NETMAP
4211         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4212 #endif /* DEV_NETMAP */
4213         rsize = roundup2(adapter->num_rx_desc *
4214             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4215         bzero((void *)rxr->rx_base, rsize);
4216
4217         /*
4218         ** Free current RX buffer structures and their mbufs
4219         */
4220         igb_free_receive_ring(rxr);
4221
4222         /* Configure for header split? */
4223         if (igb_header_split)
4224                 rxr->hdr_split = TRUE;
4225
4226         /* Now replenish the ring mbufs */
4227         for (int j = 0; j < adapter->num_rx_desc; ++j) {
4228                 struct mbuf     *mh, *mp;
4229
4230                 rxbuf = &rxr->rx_buffers[j];
4231 #ifdef DEV_NETMAP
4232                 if (slot) {
4233                         /* slot sj is mapped to the i-th NIC-ring entry */
4234                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4235                         uint64_t paddr;
4236                         void *addr;
4237
4238                         addr = PNMB(slot + sj, &paddr);
4239                         netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4240                         /* Update descriptor */
4241                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4242                         continue;
4243                 }
4244 #endif /* DEV_NETMAP */
4245                 if (rxr->hdr_split == FALSE)
4246                         goto skip_head;
4247
4248                 /* First the header */
4249                 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4250                 if (rxbuf->m_head == NULL) {
4251                         error = ENOBUFS;
4252                         goto fail;
4253                 }
4254                 m_adj(rxbuf->m_head, ETHER_ALIGN);
4255                 mh = rxbuf->m_head;
4256                 mh->m_len = mh->m_pkthdr.len = MHLEN;
4257                 mh->m_flags |= M_PKTHDR;
4258                 /* Get the memory mapping */
4259                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4260                     rxbuf->hmap, rxbuf->m_head, hseg,
4261                     &nsegs, BUS_DMA_NOWAIT);
4262                 if (error != 0) /* Nothing elegant to do here */
4263                         goto fail;
4264                 bus_dmamap_sync(rxr->htag,
4265                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
4266                 /* Update descriptor */
4267                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4268
4269 skip_head:
4270                 /* Now the payload cluster */
4271                 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4272                     M_PKTHDR, adapter->rx_mbuf_sz);
4273                 if (rxbuf->m_pack == NULL) {
4274                         error = ENOBUFS;
4275                         goto fail;
4276                 }
4277                 mp = rxbuf->m_pack;
4278                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4279                 /* Get the memory mapping */
4280                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4281                     rxbuf->pmap, mp, pseg,
4282                     &nsegs, BUS_DMA_NOWAIT);
4283                 if (error != 0)
4284                         goto fail;
4285                 bus_dmamap_sync(rxr->ptag,
4286                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4287                 /* Update descriptor */
4288                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4289         }
4290
4291         /* Setup our descriptor indices */
4292         rxr->next_to_check = 0;
4293         rxr->next_to_refresh = adapter->num_rx_desc - 1;
4294         rxr->lro_enabled = FALSE;
4295         rxr->rx_split_packets = 0;
4296         rxr->rx_bytes = 0;
4297
4298         rxr->fmp = NULL;
4299         rxr->lmp = NULL;
4300         rxr->discard = FALSE;
4301
4302         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4303             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4304
4305         /*
4306         ** Now set up the LRO interface, we
4307         ** also only do head split when LRO
4308         ** is enabled, since so often they
4309         ** are undesireable in similar setups.
4310         */
4311         if (ifp->if_capenable & IFCAP_LRO) {
4312                 error = tcp_lro_init(lro);
4313                 if (error) {
4314                         device_printf(dev, "LRO Initialization failed!\n");
4315                         goto fail;
4316                 }
4317                 INIT_DEBUGOUT("RX LRO Initialized\n");
4318                 rxr->lro_enabled = TRUE;
4319                 lro->ifp = adapter->ifp;
4320         }
4321
4322         IGB_RX_UNLOCK(rxr);
4323         return (0);
4324
4325 fail:
4326         igb_free_receive_ring(rxr);
4327         IGB_RX_UNLOCK(rxr);
4328         return (error);
4329 }
4330
4331
4332 /*********************************************************************
4333  *
4334  *  Initialize all receive rings.
4335  *
4336  **********************************************************************/
4337 static int
4338 igb_setup_receive_structures(struct adapter *adapter)
4339 {
4340         struct rx_ring *rxr = adapter->rx_rings;
4341         int i;
4342
4343         for (i = 0; i < adapter->num_queues; i++, rxr++)
4344                 if (igb_setup_receive_ring(rxr))
4345                         goto fail;
4346
4347         return (0);
4348 fail:
4349         /*
4350          * Free RX buffers allocated so far, we will only handle
4351          * the rings that completed, the failing case will have
4352          * cleaned up for itself. 'i' is the endpoint.
4353          */
4354         for (int j = 0; j < i; ++j) {
4355                 rxr = &adapter->rx_rings[j];
4356                 IGB_RX_LOCK(rxr);
4357                 igb_free_receive_ring(rxr);
4358                 IGB_RX_UNLOCK(rxr);
4359         }
4360
4361         return (ENOBUFS);
4362 }
4363
4364 /*********************************************************************
4365  *
4366  *  Enable receive unit.
4367  *
4368  **********************************************************************/
4369 static void
4370 igb_initialize_receive_units(struct adapter *adapter)
4371 {
4372         struct rx_ring  *rxr = adapter->rx_rings;
4373         struct ifnet    *ifp = adapter->ifp;
4374         struct e1000_hw *hw = &adapter->hw;
4375         u32             rctl, rxcsum, psize, srrctl = 0;
4376
4377         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4378
4379         /*
4380          * Make sure receives are disabled while setting
4381          * up the descriptor ring
4382          */
4383         rctl = E1000_READ_REG(hw, E1000_RCTL);
4384         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4385
4386         /*
4387         ** Set up for header split
4388         */
4389         if (igb_header_split) {
4390                 /* Use a standard mbuf for the header */
4391                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4392                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4393         } else
4394                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4395
4396         /*
4397         ** Set up for jumbo frames
4398         */
4399         if (ifp->if_mtu > ETHERMTU) {
4400                 rctl |= E1000_RCTL_LPE;
4401                 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4402                         srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4403                         rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4404                 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4405                         srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4406                         rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4407                 }
4408                 /* Set maximum packet len */
4409                 psize = adapter->max_frame_size;
4410                 /* are we on a vlan? */
4411                 if (adapter->ifp->if_vlantrunk != NULL)
4412                         psize += VLAN_TAG_SIZE;
4413                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4414         } else {
4415                 rctl &= ~E1000_RCTL_LPE;
4416                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4417                 rctl |= E1000_RCTL_SZ_2048;
4418         }
4419
4420         /* Setup the Base and Length of the Rx Descriptor Rings */
4421         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4422                 u64 bus_addr = rxr->rxdma.dma_paddr;
4423                 u32 rxdctl;
4424
4425                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4426                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4427                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4428                     (uint32_t)(bus_addr >> 32));
4429                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4430                     (uint32_t)bus_addr);
4431                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4432                 /* Enable this Queue */
4433                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4434                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4435                 rxdctl &= 0xFFF00000;
4436                 rxdctl |= IGB_RX_PTHRESH;
4437                 rxdctl |= IGB_RX_HTHRESH << 8;
4438                 rxdctl |= IGB_RX_WTHRESH << 16;
4439                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4440         }
4441
4442         /*
4443         ** Setup for RX MultiQueue
4444         */
4445         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4446         if (adapter->num_queues >1) {
4447                 u32 random[10], mrqc, shift = 0;
4448                 union igb_reta {
4449                         u32 dword;
4450                         u8  bytes[4];
4451                 } reta;
4452
4453                 arc4rand(&random, sizeof(random), 0);
4454                 if (adapter->hw.mac.type == e1000_82575)
4455                         shift = 6;
4456                 /* Warning FM follows */
4457                 for (int i = 0; i < 128; i++) {
4458                         reta.bytes[i & 3] =
4459                             (i % adapter->num_queues) << shift;
4460                         if ((i & 3) == 3)
4461                                 E1000_WRITE_REG(hw,
4462                                     E1000_RETA(i >> 2), reta.dword);
4463                 }
4464                 /* Now fill in hash table */
4465                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4466                 for (int i = 0; i < 10; i++)
4467                         E1000_WRITE_REG_ARRAY(hw,
4468                             E1000_RSSRK(0), i, random[i]);
4469
4470                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4471                     E1000_MRQC_RSS_FIELD_IPV4_TCP);
4472                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4473                     E1000_MRQC_RSS_FIELD_IPV6_TCP);
4474                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4475                     E1000_MRQC_RSS_FIELD_IPV6_UDP);
4476                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4477                     E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4478
4479                 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4480
4481                 /*
4482                 ** NOTE: Receive Full-Packet Checksum Offload 
4483                 ** is mutually exclusive with Multiqueue. However
4484                 ** this is not the same as TCP/IP checksums which
4485                 ** still work.
4486                 */
4487                 rxcsum |= E1000_RXCSUM_PCSD;
4488 #if __FreeBSD_version >= 800000
4489                 /* For SCTP Offload */
4490                 if ((hw->mac.type == e1000_82576)
4491                     && (ifp->if_capenable & IFCAP_RXCSUM))
4492                         rxcsum |= E1000_RXCSUM_CRCOFL;
4493 #endif
4494         } else {
4495                 /* Non RSS setup */
4496                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4497                         rxcsum |= E1000_RXCSUM_IPPCSE;
4498 #if __FreeBSD_version >= 800000
4499                         if (adapter->hw.mac.type == e1000_82576)
4500                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4501 #endif
4502                 } else
4503                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4504         }
4505         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4506
4507         /* Setup the Receive Control Register */
4508         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4509         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4510                    E1000_RCTL_RDMTS_HALF |
4511                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4512         /* Strip CRC bytes. */
4513         rctl |= E1000_RCTL_SECRC;
4514         /* Make sure VLAN Filters are off */
4515         rctl &= ~E1000_RCTL_VFE;
4516         /* Don't store bad packets */
4517         rctl &= ~E1000_RCTL_SBP;
4518
4519         /* Enable Receives */
4520         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4521
4522         /*
4523          * Setup the HW Rx Head and Tail Descriptor Pointers
4524          *   - needs to be after enable
4525          */
4526         for (int i = 0; i < adapter->num_queues; i++) {
4527                 rxr = &adapter->rx_rings[i];
4528                 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4529 #ifdef DEV_NETMAP
4530                 /*
4531                  * an init() while a netmap client is active must
4532                  * preserve the rx buffers passed to userspace.
4533                  * In this driver it means we adjust RDT to
4534                  * somthing different from next_to_refresh
4535                  * (which is not used in netmap mode).
4536                  */
4537                 if (ifp->if_capenable & IFCAP_NETMAP) {
4538                         struct netmap_adapter *na = NA(adapter->ifp);
4539                         struct netmap_kring *kring = &na->rx_rings[i];
4540                         int t = rxr->next_to_refresh - kring->nr_hwavail;
4541
4542                         if (t >= adapter->num_rx_desc)
4543                                 t -= adapter->num_rx_desc;
4544                         else if (t < 0)
4545                                 t += adapter->num_rx_desc;
4546                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4547                 } else
4548 #endif /* DEV_NETMAP */
4549                 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4550         }
4551         return;
4552 }
4553
4554 /*********************************************************************
4555  *
4556  *  Free receive rings.
4557  *
4558  **********************************************************************/
4559 static void
4560 igb_free_receive_structures(struct adapter *adapter)
4561 {
4562         struct rx_ring *rxr = adapter->rx_rings;
4563
4564         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4565                 struct lro_ctrl *lro = &rxr->lro;
4566                 igb_free_receive_buffers(rxr);
4567                 tcp_lro_free(lro);
4568                 igb_dma_free(adapter, &rxr->rxdma);
4569         }
4570
4571         free(adapter->rx_rings, M_DEVBUF);
4572 }
4573
4574 /*********************************************************************
4575  *
4576  *  Free receive ring data structures.
4577  *
4578  **********************************************************************/
4579 static void
4580 igb_free_receive_buffers(struct rx_ring *rxr)
4581 {
4582         struct adapter          *adapter = rxr->adapter;
4583         struct igb_rx_buf       *rxbuf;
4584         int i;
4585
4586         INIT_DEBUGOUT("free_receive_structures: begin");
4587
4588         /* Cleanup any existing buffers */
4589         if (rxr->rx_buffers != NULL) {
4590                 for (i = 0; i < adapter->num_rx_desc; i++) {
4591                         rxbuf = &rxr->rx_buffers[i];
4592                         if (rxbuf->m_head != NULL) {
4593                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4594                                     BUS_DMASYNC_POSTREAD);
4595                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4596                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4597                                 m_freem(rxbuf->m_head);
4598                         }
4599                         if (rxbuf->m_pack != NULL) {
4600                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4601                                     BUS_DMASYNC_POSTREAD);
4602                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4603                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4604                                 m_freem(rxbuf->m_pack);
4605                         }
4606                         rxbuf->m_head = NULL;
4607                         rxbuf->m_pack = NULL;
4608                         if (rxbuf->hmap != NULL) {
4609                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4610                                 rxbuf->hmap = NULL;
4611                         }
4612                         if (rxbuf->pmap != NULL) {
4613                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4614                                 rxbuf->pmap = NULL;
4615                         }
4616                 }
4617                 if (rxr->rx_buffers != NULL) {
4618                         free(rxr->rx_buffers, M_DEVBUF);
4619                         rxr->rx_buffers = NULL;
4620                 }
4621         }
4622
4623         if (rxr->htag != NULL) {
4624                 bus_dma_tag_destroy(rxr->htag);
4625                 rxr->htag = NULL;
4626         }
4627         if (rxr->ptag != NULL) {
4628                 bus_dma_tag_destroy(rxr->ptag);
4629                 rxr->ptag = NULL;
4630         }
4631 }
4632
4633 static __inline void
4634 igb_rx_discard(struct rx_ring *rxr, int i)
4635 {
4636         struct igb_rx_buf       *rbuf;
4637
4638         rbuf = &rxr->rx_buffers[i];
4639
4640         /* Partially received? Free the chain */
4641         if (rxr->fmp != NULL) {
4642                 rxr->fmp->m_flags |= M_PKTHDR;
4643                 m_freem(rxr->fmp);
4644                 rxr->fmp = NULL;
4645                 rxr->lmp = NULL;
4646         }
4647
4648         /*
4649         ** With advanced descriptors the writeback
4650         ** clobbers the buffer addrs, so its easier
4651         ** to just free the existing mbufs and take
4652         ** the normal refresh path to get new buffers
4653         ** and mapping.
4654         */
4655         if (rbuf->m_head) {
4656                 m_free(rbuf->m_head);
4657                 rbuf->m_head = NULL;
4658         }
4659
4660         if (rbuf->m_pack) {
4661                 m_free(rbuf->m_pack);
4662                 rbuf->m_pack = NULL;
4663         }
4664
4665         return;
4666 }
4667
4668 static __inline void
4669 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4670 {
4671
4672         /*
4673          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4674          * should be computed by hardware. Also it should not have VLAN tag in
4675          * ethernet header.
4676          */
4677         if (rxr->lro_enabled &&
4678             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4679             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4680             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4681             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4682             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4683             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4684                 /*
4685                  * Send to the stack if:
4686                  **  - LRO not enabled, or
4687                  **  - no LRO resources, or
4688                  **  - lro enqueue fails
4689                  */
4690                 if (rxr->lro.lro_cnt != 0)
4691                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4692                                 return;
4693         }
4694         IGB_RX_UNLOCK(rxr);
4695         (*ifp->if_input)(ifp, m);
4696         IGB_RX_LOCK(rxr);
4697 }
4698
4699 /*********************************************************************
4700  *
4701  *  This routine executes in interrupt context. It replenishes
4702  *  the mbufs in the descriptor and sends data which has been
4703  *  dma'ed into host memory to upper layer.
4704  *
4705  *  We loop at most count times if count is > 0, or until done if
4706  *  count < 0.
4707  *
4708  *  Return TRUE if more to clean, FALSE otherwise
4709  *********************************************************************/
4710 static bool
4711 igb_rxeof(struct igb_queue *que, int count, int *done)
4712 {
4713         struct adapter          *adapter = que->adapter;
4714         struct rx_ring          *rxr = que->rxr;
4715         struct ifnet            *ifp = adapter->ifp;
4716         struct lro_ctrl         *lro = &rxr->lro;
4717         struct lro_entry        *queued;
4718         int                     i, processed = 0, rxdone = 0;
4719         u32                     ptype, staterr = 0;
4720         union e1000_adv_rx_desc *cur;
4721
4722         IGB_RX_LOCK(rxr);
4723         /* Sync the ring. */
4724         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4725             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4726
4727 #ifdef DEV_NETMAP
4728         if (ifp->if_capenable & IFCAP_NETMAP) {
4729                 struct netmap_adapter *na = NA(ifp);
4730
4731                 na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4732                 selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4733                 IGB_RX_UNLOCK(rxr);
4734                 IGB_CORE_LOCK(adapter);
4735                 selwakeuppri(&na->rx_si, PI_NET);
4736                 IGB_CORE_UNLOCK(adapter);
4737                 return (0);
4738         }
4739 #endif /* DEV_NETMAP */
4740
4741         /* Main clean loop */
4742         for (i = rxr->next_to_check; count != 0;) {
4743                 struct mbuf             *sendmp, *mh, *mp;
4744                 struct igb_rx_buf       *rxbuf;
4745                 u16                     hlen, plen, hdr, vtag;
4746                 bool                    eop = FALSE;
4747  
4748                 cur = &rxr->rx_base[i];
4749                 staterr = le32toh(cur->wb.upper.status_error);
4750                 if ((staterr & E1000_RXD_STAT_DD) == 0)
4751                         break;
4752                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4753                         break;
4754                 count--;
4755                 sendmp = mh = mp = NULL;
4756                 cur->wb.upper.status_error = 0;
4757                 rxbuf = &rxr->rx_buffers[i];
4758                 plen = le16toh(cur->wb.upper.length);
4759                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4760                 if ((adapter->hw.mac.type == e1000_i350) &&
4761                     (staterr & E1000_RXDEXT_STATERR_LB))
4762                         vtag = be16toh(cur->wb.upper.vlan);
4763                 else
4764                         vtag = le16toh(cur->wb.upper.vlan);
4765                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4766                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4767
4768                 /* Make sure all segments of a bad packet are discarded */
4769                 if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4770                     (rxr->discard)) {
4771                         adapter->dropped_pkts++;
4772                         ++rxr->rx_discarded;
4773                         if (!eop) /* Catch subsequent segs */
4774                                 rxr->discard = TRUE;
4775                         else
4776                                 rxr->discard = FALSE;
4777                         igb_rx_discard(rxr, i);
4778                         goto next_desc;
4779                 }
4780
4781                 /*
4782                 ** The way the hardware is configured to
4783                 ** split, it will ONLY use the header buffer
4784                 ** when header split is enabled, otherwise we
4785                 ** get normal behavior, ie, both header and
4786                 ** payload are DMA'd into the payload buffer.
4787                 **
4788                 ** The fmp test is to catch the case where a
4789                 ** packet spans multiple descriptors, in that
4790                 ** case only the first header is valid.
4791                 */
4792                 if (rxr->hdr_split && rxr->fmp == NULL) {
4793                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4794                             E1000_RXDADV_HDRBUFLEN_SHIFT;
4795                         if (hlen > IGB_HDR_BUF)
4796                                 hlen = IGB_HDR_BUF;
4797                         mh = rxr->rx_buffers[i].m_head;
4798                         mh->m_len = hlen;
4799                         /* clear buf pointer for refresh */
4800                         rxbuf->m_head = NULL;
4801                         /*
4802                         ** Get the payload length, this
4803                         ** could be zero if its a small
4804                         ** packet.
4805                         */
4806                         if (plen > 0) {
4807                                 mp = rxr->rx_buffers[i].m_pack;
4808                                 mp->m_len = plen;
4809                                 mh->m_next = mp;
4810                                 /* clear buf pointer */
4811                                 rxbuf->m_pack = NULL;
4812                                 rxr->rx_split_packets++;
4813                         }
4814                 } else {
4815                         /*
4816                         ** Either no header split, or a
4817                         ** secondary piece of a fragmented
4818                         ** split packet.
4819                         */
4820                         mh = rxr->rx_buffers[i].m_pack;
4821                         mh->m_len = plen;
4822                         /* clear buf info for refresh */
4823                         rxbuf->m_pack = NULL;
4824                 }
4825
4826                 ++processed; /* So we know when to refresh */
4827
4828                 /* Initial frame - setup */
4829                 if (rxr->fmp == NULL) {
4830                         mh->m_pkthdr.len = mh->m_len;
4831                         /* Save the head of the chain */
4832                         rxr->fmp = mh;
4833                         rxr->lmp = mh;
4834                         if (mp != NULL) {
4835                                 /* Add payload if split */
4836                                 mh->m_pkthdr.len += mp->m_len;
4837                                 rxr->lmp = mh->m_next;
4838                         }
4839                 } else {
4840                         /* Chain mbuf's together */
4841                         rxr->lmp->m_next = mh;
4842                         rxr->lmp = rxr->lmp->m_next;
4843                         rxr->fmp->m_pkthdr.len += mh->m_len;
4844                 }
4845
4846                 if (eop) {
4847                         rxr->fmp->m_pkthdr.rcvif = ifp;
4848                         ifp->if_ipackets++;
4849                         rxr->rx_packets++;
4850                         /* capture data for AIM */
4851                         rxr->packets++;
4852                         rxr->bytes += rxr->fmp->m_pkthdr.len;
4853                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4854
4855                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4856                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
4857
4858                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4859                             (staterr & E1000_RXD_STAT_VP) != 0) {
4860                                 rxr->fmp->m_pkthdr.ether_vtag = vtag;
4861                                 rxr->fmp->m_flags |= M_VLANTAG;
4862                         }
4863 #if __FreeBSD_version >= 800000
4864                         rxr->fmp->m_pkthdr.flowid = que->msix;
4865                         rxr->fmp->m_flags |= M_FLOWID;
4866 #endif
4867                         sendmp = rxr->fmp;
4868                         /* Make sure to set M_PKTHDR. */
4869                         sendmp->m_flags |= M_PKTHDR;
4870                         rxr->fmp = NULL;
4871                         rxr->lmp = NULL;
4872                 }
4873
4874 next_desc:
4875                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4876                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4877
4878                 /* Advance our pointers to the next descriptor. */
4879                 if (++i == adapter->num_rx_desc)
4880                         i = 0;
4881                 /*
4882                 ** Send to the stack or LRO
4883                 */
4884                 if (sendmp != NULL) {
4885                         rxr->next_to_check = i;
4886                         igb_rx_input(rxr, ifp, sendmp, ptype);
4887                         i = rxr->next_to_check;
4888                         rxdone++;
4889                 }
4890
4891                 /* Every 8 descriptors we go to refresh mbufs */
4892                 if (processed == 8) {
4893                         igb_refresh_mbufs(rxr, i);
4894                         processed = 0;
4895                 }
4896         }
4897
4898         /* Catch any remainders */
4899         if (igb_rx_unrefreshed(rxr))
4900                 igb_refresh_mbufs(rxr, i);
4901
4902         rxr->next_to_check = i;
4903
4904         /*
4905          * Flush any outstanding LRO work
4906          */
4907         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4908                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4909                 tcp_lro_flush(lro, queued);
4910         }
4911
4912         if (done != NULL)
4913                 *done += rxdone;
4914
4915         IGB_RX_UNLOCK(rxr);
4916         return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4917 }
4918
4919 /*********************************************************************
4920  *
4921  *  Verify that the hardware indicated that the checksum is valid.
4922  *  Inform the stack about the status of checksum so that stack
4923  *  doesn't spend time verifying the checksum.
4924  *
4925  *********************************************************************/
4926 static void
4927 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4928 {
4929         u16 status = (u16)staterr;
4930         u8  errors = (u8) (staterr >> 24);
4931         int sctp;
4932
4933         /* Ignore Checksum bit is set */
4934         if (status & E1000_RXD_STAT_IXSM) {
4935                 mp->m_pkthdr.csum_flags = 0;
4936                 return;
4937         }
4938
4939         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4940             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4941                 sctp = 1;
4942         else
4943                 sctp = 0;
4944         if (status & E1000_RXD_STAT_IPCS) {
4945                 /* Did it pass? */
4946                 if (!(errors & E1000_RXD_ERR_IPE)) {
4947                         /* IP Checksum Good */
4948                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4949                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4950                 } else
4951                         mp->m_pkthdr.csum_flags = 0;
4952         }
4953
4954         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4955                 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4956 #if __FreeBSD_version >= 800000
4957                 if (sctp) /* reassign */
4958                         type = CSUM_SCTP_VALID;
4959 #endif
4960                 /* Did it pass? */
4961                 if (!(errors & E1000_RXD_ERR_TCPE)) {
4962                         mp->m_pkthdr.csum_flags |= type;
4963                         if (sctp == 0)
4964                                 mp->m_pkthdr.csum_data = htons(0xffff);
4965                 }
4966         }
4967         return;
4968 }
4969
4970 /*
4971  * This routine is run via an vlan
4972  * config EVENT
4973  */
4974 static void
4975 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4976 {
4977         struct adapter  *adapter = ifp->if_softc;
4978         u32             index, bit;
4979
4980         if (ifp->if_softc !=  arg)   /* Not our event */
4981                 return;
4982
4983         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4984                 return;
4985
4986         IGB_CORE_LOCK(adapter);
4987         index = (vtag >> 5) & 0x7F;
4988         bit = vtag & 0x1F;
4989         adapter->shadow_vfta[index] |= (1 << bit);
4990         ++adapter->num_vlans;
4991         /* Change hw filter setting */
4992         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4993                 igb_setup_vlan_hw_support(adapter);
4994         IGB_CORE_UNLOCK(adapter);
4995 }
4996
4997 /*
4998  * This routine is run via an vlan
4999  * unconfig EVENT
5000  */
5001 static void
5002 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5003 {
5004         struct adapter  *adapter = ifp->if_softc;
5005         u32             index, bit;
5006
5007         if (ifp->if_softc !=  arg)
5008                 return;
5009
5010         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5011                 return;
5012
5013         IGB_CORE_LOCK(adapter);
5014         index = (vtag >> 5) & 0x7F;
5015         bit = vtag & 0x1F;
5016         adapter->shadow_vfta[index] &= ~(1 << bit);
5017         --adapter->num_vlans;
5018         /* Change hw filter setting */
5019         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5020                 igb_setup_vlan_hw_support(adapter);
5021         IGB_CORE_UNLOCK(adapter);
5022 }
5023
5024 static void
5025 igb_setup_vlan_hw_support(struct adapter *adapter)
5026 {
5027         struct e1000_hw *hw = &adapter->hw;
5028         struct ifnet    *ifp = adapter->ifp;
5029         u32             reg;
5030
5031         if (adapter->vf_ifp) {
5032                 e1000_rlpml_set_vf(hw,
5033                     adapter->max_frame_size + VLAN_TAG_SIZE);
5034                 return;
5035         }
5036
5037         reg = E1000_READ_REG(hw, E1000_CTRL);
5038         reg |= E1000_CTRL_VME;
5039         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5040
5041         /* Enable the Filter Table */
5042         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5043                 reg = E1000_READ_REG(hw, E1000_RCTL);
5044                 reg &= ~E1000_RCTL_CFIEN;
5045                 reg |= E1000_RCTL_VFE;
5046                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5047         }
5048
5049         /* Update the frame size */
5050         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5051             adapter->max_frame_size + VLAN_TAG_SIZE);
5052
5053         /* Don't bother with table if no vlans */
5054         if ((adapter->num_vlans == 0) ||
5055             ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5056                 return;
5057         /*
5058         ** A soft reset zero's out the VFTA, so
5059         ** we need to repopulate it now.
5060         */
5061         for (int i = 0; i < IGB_VFTA_SIZE; i++)
5062                 if (adapter->shadow_vfta[i] != 0) {
5063                         if (adapter->vf_ifp)
5064                                 e1000_vfta_set_vf(hw,
5065                                     adapter->shadow_vfta[i], TRUE);
5066                         else
5067                                 e1000_write_vfta(hw,
5068                                     i, adapter->shadow_vfta[i]);
5069                 }
5070 }
5071
5072 static void
5073 igb_enable_intr(struct adapter *adapter)
5074 {
5075         /* With RSS set up what to auto clear */
5076         if (adapter->msix_mem) {
5077                 u32 mask = (adapter->que_mask | adapter->link_mask);
5078                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5079                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5080                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5081                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5082                     E1000_IMS_LSC);
5083         } else {
5084                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5085                     IMS_ENABLE_MASK);
5086         }
5087         E1000_WRITE_FLUSH(&adapter->hw);
5088
5089         return;
5090 }
5091
5092 static void
5093 igb_disable_intr(struct adapter *adapter)
5094 {
5095         if (adapter->msix_mem) {
5096                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5097                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5098         } 
5099         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5100         E1000_WRITE_FLUSH(&adapter->hw);
5101         return;
5102 }
5103
5104 /*
5105  * Bit of a misnomer, what this really means is
5106  * to enable OS management of the system... aka
5107  * to disable special hardware management features 
5108  */
5109 static void
5110 igb_init_manageability(struct adapter *adapter)
5111 {
5112         if (adapter->has_manage) {
5113                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5114                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5115
5116                 /* disable hardware interception of ARP */
5117                 manc &= ~(E1000_MANC_ARP_EN);
5118
5119                 /* enable receiving management packets to the host */
5120                 manc |= E1000_MANC_EN_MNG2HOST;
5121                 manc2h |= 1 << 5;  /* Mng Port 623 */
5122                 manc2h |= 1 << 6;  /* Mng Port 664 */
5123                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5124                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5125         }
5126 }
5127
5128 /*
5129  * Give control back to hardware management
5130  * controller if there is one.
5131  */
5132 static void
5133 igb_release_manageability(struct adapter *adapter)
5134 {
5135         if (adapter->has_manage) {
5136                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5137
5138                 /* re-enable hardware interception of ARP */
5139                 manc |= E1000_MANC_ARP_EN;
5140                 manc &= ~E1000_MANC_EN_MNG2HOST;
5141
5142                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5143         }
5144 }
5145
5146 /*
5147  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5148  * For ASF and Pass Through versions of f/w this means that
5149  * the driver is loaded. 
5150  *
5151  */
5152 static void
5153 igb_get_hw_control(struct adapter *adapter)
5154 {
5155         u32 ctrl_ext;
5156
5157         if (adapter->vf_ifp)
5158                 return;
5159
5160         /* Let firmware know the driver has taken over */
5161         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5162         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5163             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5164 }
5165
5166 /*
5167  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5168  * For ASF and Pass Through versions of f/w this means that the
5169  * driver is no longer loaded.
5170  *
5171  */
5172 static void
5173 igb_release_hw_control(struct adapter *adapter)
5174 {
5175         u32 ctrl_ext;
5176
5177         if (adapter->vf_ifp)
5178                 return;
5179
5180         /* Let firmware taken over control of h/w */
5181         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5182         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5183             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5184 }
5185
5186 static int
5187 igb_is_valid_ether_addr(uint8_t *addr)
5188 {
5189         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5190
5191         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5192                 return (FALSE);
5193         }
5194
5195         return (TRUE);
5196 }
5197
5198
5199 /*
5200  * Enable PCI Wake On Lan capability
5201  */
5202 static void
5203 igb_enable_wakeup(device_t dev)
5204 {
5205         u16     cap, status;
5206         u8      id;
5207
5208         /* First find the capabilities pointer*/
5209         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5210         /* Read the PM Capabilities */
5211         id = pci_read_config(dev, cap, 1);
5212         if (id != PCIY_PMG)     /* Something wrong */
5213                 return;
5214         /* OK, we have the power capabilities, so
5215            now get the status register */
5216         cap += PCIR_POWER_STATUS;
5217         status = pci_read_config(dev, cap, 2);
5218         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5219         pci_write_config(dev, cap, status, 2);
5220         return;
5221 }
5222
5223 static void
5224 igb_led_func(void *arg, int onoff)
5225 {
5226         struct adapter  *adapter = arg;
5227
5228         IGB_CORE_LOCK(adapter);
5229         if (onoff) {
5230                 e1000_setup_led(&adapter->hw);
5231                 e1000_led_on(&adapter->hw);
5232         } else {
5233                 e1000_led_off(&adapter->hw);
5234                 e1000_cleanup_led(&adapter->hw);
5235         }
5236         IGB_CORE_UNLOCK(adapter);
5237 }
5238
5239 /**********************************************************************
5240  *
5241  *  Update the board statistics counters.
5242  *
5243  **********************************************************************/
5244 static void
5245 igb_update_stats_counters(struct adapter *adapter)
5246 {
5247         struct ifnet            *ifp;
5248         struct e1000_hw         *hw = &adapter->hw;
5249         struct e1000_hw_stats   *stats;
5250
5251         /* 
5252         ** The virtual function adapter has only a
5253         ** small controlled set of stats, do only 
5254         ** those and return.
5255         */
5256         if (adapter->vf_ifp) {
5257                 igb_update_vf_stats_counters(adapter);
5258                 return;
5259         }
5260
5261         stats = (struct e1000_hw_stats  *)adapter->stats;
5262
5263         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5264            (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5265                 stats->symerrs +=
5266                     E1000_READ_REG(hw,E1000_SYMERRS);
5267                 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5268         }
5269
5270         stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5271         stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5272         stats->scc += E1000_READ_REG(hw, E1000_SCC);
5273         stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5274
5275         stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5276         stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5277         stats->colc += E1000_READ_REG(hw, E1000_COLC);
5278         stats->dc += E1000_READ_REG(hw, E1000_DC);
5279         stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5280         stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5281         stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5282         /*
5283         ** For watchdog management we need to know if we have been
5284         ** paused during the last interval, so capture that here.
5285         */ 
5286         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5287         stats->xoffrxc += adapter->pause_frames;
5288         stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5289         stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5290         stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5291         stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5292         stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5293         stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5294         stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5295         stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5296         stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5297         stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5298         stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5299         stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5300
5301         /* For the 64-bit byte counters the low dword must be read first. */
5302         /* Both registers clear on the read of the high dword */
5303
5304         stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5305             ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5306         stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5307             ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5308
5309         stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5310         stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5311         stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5312         stats->roc += E1000_READ_REG(hw, E1000_ROC);
5313         stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5314
5315         stats->tor += E1000_READ_REG(hw, E1000_TORH);
5316         stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5317
5318         stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5319         stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5320         stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5321         stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5322         stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5323         stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5324         stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5325         stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5326         stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5327         stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5328
5329         /* Interrupt Counts */
5330
5331         stats->iac += E1000_READ_REG(hw, E1000_IAC);
5332         stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5333         stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5334         stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5335         stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5336         stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5337         stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5338         stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5339         stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5340
5341         /* Host to Card Statistics */
5342
5343         stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5344         stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5345         stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5346         stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5347         stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5348         stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5349         stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5350         stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5351             ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5352         stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5353             ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5354         stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5355         stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5356         stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5357
5358         stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5359         stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5360         stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5361         stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5362         stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5363         stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5364
5365         ifp = adapter->ifp;
5366         ifp->if_collisions = stats->colc;
5367
5368         /* Rx Errors */
5369         ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5370             stats->crcerrs + stats->algnerrc +
5371             stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5372
5373         /* Tx Errors */
5374         ifp->if_oerrors = stats->ecol +
5375             stats->latecol + adapter->watchdog_events;
5376
5377         /* Driver specific counters */
5378         adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5379         adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5380         adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5381         adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5382         adapter->packet_buf_alloc_tx =
5383             ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5384         adapter->packet_buf_alloc_rx =
5385             (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5386 }
5387
5388
5389 /**********************************************************************
5390  *
5391  *  Initialize the VF board statistics counters.
5392  *
5393  **********************************************************************/
5394 static void
5395 igb_vf_init_stats(struct adapter *adapter)
5396 {
5397         struct e1000_hw *hw = &adapter->hw;
5398         struct e1000_vf_stats   *stats;
5399
5400         stats = (struct e1000_vf_stats  *)adapter->stats;
5401         if (stats == NULL)
5402                 return;
5403         stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5404         stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5405         stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5406         stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5407         stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5408 }
5409  
5410 /**********************************************************************
5411  *
5412  *  Update the VF board statistics counters.
5413  *
5414  **********************************************************************/
5415 static void
5416 igb_update_vf_stats_counters(struct adapter *adapter)
5417 {
5418         struct e1000_hw *hw = &adapter->hw;
5419         struct e1000_vf_stats   *stats;
5420
5421         if (adapter->link_speed == 0)
5422                 return;
5423
5424         stats = (struct e1000_vf_stats  *)adapter->stats;
5425
5426         UPDATE_VF_REG(E1000_VFGPRC,
5427             stats->last_gprc, stats->gprc);
5428         UPDATE_VF_REG(E1000_VFGORC,
5429             stats->last_gorc, stats->gorc);
5430         UPDATE_VF_REG(E1000_VFGPTC,
5431             stats->last_gptc, stats->gptc);
5432         UPDATE_VF_REG(E1000_VFGOTC,
5433             stats->last_gotc, stats->gotc);
5434         UPDATE_VF_REG(E1000_VFMPRC,
5435             stats->last_mprc, stats->mprc);
5436 }
5437
5438 /* Export a single 32-bit register via a read-only sysctl. */
5439 static int
5440 igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5441 {
5442         struct adapter *adapter;
5443         u_int val;
5444
5445         adapter = oidp->oid_arg1;
5446         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5447         return (sysctl_handle_int(oidp, &val, 0, req));
5448 }
5449
5450 /*
5451 **  Tuneable interrupt rate handler
5452 */
5453 static int
5454 igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5455 {
5456         struct igb_queue        *que = ((struct igb_queue *)oidp->oid_arg1);
5457         int                     error;
5458         u32                     reg, usec, rate;
5459                         
5460         reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5461         usec = ((reg & 0x7FFC) >> 2);
5462         if (usec > 0)
5463                 rate = 1000000 / usec;
5464         else
5465                 rate = 0;
5466         error = sysctl_handle_int(oidp, &rate, 0, req);
5467         if (error || !req->newptr)
5468                 return error;
5469         return 0;
5470 }
5471
5472 /*
5473  * Add sysctl variables, one per statistic, to the system.
5474  */
5475 static void
5476 igb_add_hw_stats(struct adapter *adapter)
5477 {
5478         device_t dev = adapter->dev;
5479
5480         struct tx_ring *txr = adapter->tx_rings;
5481         struct rx_ring *rxr = adapter->rx_rings;
5482
5483         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5484         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5485         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5486         struct e1000_hw_stats *stats = adapter->stats;
5487
5488         struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5489         struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5490
5491 #define QUEUE_NAME_LEN 32
5492         char namebuf[QUEUE_NAME_LEN];
5493
5494         /* Driver Statistics */
5495         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq", 
5496                         CTLFLAG_RD, &adapter->link_irq, 0,
5497                         "Link MSIX IRQ Handled");
5498         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5499                         CTLFLAG_RD, &adapter->dropped_pkts,
5500                         "Driver dropped packets");
5501         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5502                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5503                         "Driver tx dma failure in xmit");
5504         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5505                         CTLFLAG_RD, &adapter->rx_overruns,
5506                         "RX overruns");
5507         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5508                         CTLFLAG_RD, &adapter->watchdog_events,
5509                         "Watchdog timeouts");
5510
5511         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", 
5512                         CTLFLAG_RD, &adapter->device_control,
5513                         "Device Control Register");
5514         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", 
5515                         CTLFLAG_RD, &adapter->rx_control,
5516                         "Receiver Control Register");
5517         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", 
5518                         CTLFLAG_RD, &adapter->int_mask,
5519                         "Interrupt Mask");
5520         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", 
5521                         CTLFLAG_RD, &adapter->eint_mask,
5522                         "Extended Interrupt Mask");
5523         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", 
5524                         CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5525                         "Transmit Buffer Packet Allocation");
5526         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", 
5527                         CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5528                         "Receive Buffer Packet Allocation");
5529         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5530                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5531                         "Flow Control High Watermark");
5532         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5533                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5534                         "Flow Control Low Watermark");
5535
5536         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5537                 struct lro_ctrl *lro = &rxr->lro;
5538
5539                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5540                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5541                                             CTLFLAG_RD, NULL, "Queue Name");
5542                 queue_list = SYSCTL_CHILDREN(queue_node);
5543
5544                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", 
5545                                 CTLFLAG_RD, &adapter->queues[i],
5546                                 sizeof(&adapter->queues[i]),
5547                                 igb_sysctl_interrupt_rate_handler,
5548                                 "IU", "Interrupt Rate");
5549
5550                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5551                                 CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5552                                 igb_sysctl_reg_handler, "IU",
5553                                 "Transmit Descriptor Head");
5554                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5555                                 CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5556                                 igb_sysctl_reg_handler, "IU",
5557                                 "Transmit Descriptor Tail");
5558                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5559                                 CTLFLAG_RD, &txr->no_desc_avail,
5560                                 "Queue No Descriptor Available");
5561                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5562                                 CTLFLAG_RD, &txr->tx_packets,
5563                                 "Queue Packets Transmitted");
5564
5565                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5566                                 CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5567                                 igb_sysctl_reg_handler, "IU",
5568                                 "Receive Descriptor Head");
5569                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5570                                 CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5571                                 igb_sysctl_reg_handler, "IU",
5572                                 "Receive Descriptor Tail");
5573                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5574                                 CTLFLAG_RD, &rxr->rx_packets,
5575                                 "Queue Packets Received");
5576                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5577                                 CTLFLAG_RD, &rxr->rx_bytes,
5578                                 "Queue Bytes Received");
5579                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5580                                 CTLFLAG_RD, &lro->lro_queued, 0,
5581                                 "LRO Queued");
5582                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5583                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5584                                 "LRO Flushed");
5585         }
5586
5587         /* MAC stats get their own sub node */
5588
5589         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5590                                     CTLFLAG_RD, NULL, "MAC Statistics");
5591         stat_list = SYSCTL_CHILDREN(stat_node);
5592
5593         /*
5594         ** VF adapter has a very limited set of stats
5595         ** since its not managing the metal, so to speak.
5596         */
5597         if (adapter->vf_ifp) {
5598         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5599                         CTLFLAG_RD, &stats->gprc,
5600                         "Good Packets Received");
5601         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5602                         CTLFLAG_RD, &stats->gptc,
5603                         "Good Packets Transmitted");
5604         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5605                         CTLFLAG_RD, &stats->gorc, 
5606                         "Good Octets Received"); 
5607         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5608                         CTLFLAG_RD, &stats->gotc, 
5609                         "Good Octets Transmitted"); 
5610         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5611                         CTLFLAG_RD, &stats->mprc,
5612                         "Multicast Packets Received");
5613                 return;
5614         }
5615
5616         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
5617                         CTLFLAG_RD, &stats->ecol,
5618                         "Excessive collisions");
5619         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
5620                         CTLFLAG_RD, &stats->scc,
5621                         "Single collisions");
5622         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
5623                         CTLFLAG_RD, &stats->mcc,
5624                         "Multiple collisions");
5625         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
5626                         CTLFLAG_RD, &stats->latecol,
5627                         "Late collisions");
5628         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
5629                         CTLFLAG_RD, &stats->colc,
5630                         "Collision Count");
5631         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5632                         CTLFLAG_RD, &stats->symerrs,
5633                         "Symbol Errors");
5634         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5635                         CTLFLAG_RD, &stats->sec,
5636                         "Sequence Errors");
5637         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5638                         CTLFLAG_RD, &stats->dc,
5639                         "Defer Count");
5640         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5641                         CTLFLAG_RD, &stats->mpc,
5642                         "Missed Packets");
5643         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5644                         CTLFLAG_RD, &stats->rnbc,
5645                         "Receive No Buffers");
5646         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5647                         CTLFLAG_RD, &stats->ruc,
5648                         "Receive Undersize");
5649         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5650                         CTLFLAG_RD, &stats->rfc,
5651                         "Fragmented Packets Received ");
5652         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5653                         CTLFLAG_RD, &stats->roc,
5654                         "Oversized Packets Received");
5655         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5656                         CTLFLAG_RD, &stats->rjc,
5657                         "Recevied Jabber");
5658         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5659                         CTLFLAG_RD, &stats->rxerrc,
5660                         "Receive Errors");
5661         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5662                         CTLFLAG_RD, &stats->crcerrs,
5663                         "CRC errors");
5664         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5665                         CTLFLAG_RD, &stats->algnerrc,
5666                         "Alignment Errors");
5667         /* On 82575 these are collision counts */
5668         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5669                         CTLFLAG_RD, &stats->cexterr,
5670                         "Collision/Carrier extension errors");
5671         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5672                         CTLFLAG_RD, &stats->xonrxc,
5673                         "XON Received");
5674         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5675                         CTLFLAG_RD, &stats->xontxc,
5676                         "XON Transmitted");
5677         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5678                         CTLFLAG_RD, &stats->xoffrxc,
5679                         "XOFF Received");
5680         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5681                         CTLFLAG_RD, &stats->xofftxc,
5682                         "XOFF Transmitted");
5683         /* Packet Reception Stats */
5684         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5685                         CTLFLAG_RD, &stats->tpr,
5686                         "Total Packets Received ");
5687         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5688                         CTLFLAG_RD, &stats->gprc,
5689                         "Good Packets Received");
5690         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5691                         CTLFLAG_RD, &stats->bprc,
5692                         "Broadcast Packets Received");
5693         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5694                         CTLFLAG_RD, &stats->mprc,
5695                         "Multicast Packets Received");
5696         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5697                         CTLFLAG_RD, &stats->prc64,
5698                         "64 byte frames received ");
5699         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5700                         CTLFLAG_RD, &stats->prc127,
5701                         "65-127 byte frames received");
5702         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5703                         CTLFLAG_RD, &stats->prc255,
5704                         "128-255 byte frames received");
5705         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5706                         CTLFLAG_RD, &stats->prc511,
5707                         "256-511 byte frames received");
5708         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5709                         CTLFLAG_RD, &stats->prc1023,
5710                         "512-1023 byte frames received");
5711         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5712                         CTLFLAG_RD, &stats->prc1522,
5713                         "1023-1522 byte frames received");
5714         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5715                         CTLFLAG_RD, &stats->gorc, 
5716                         "Good Octets Received"); 
5717
5718         /* Packet Transmission Stats */
5719         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5720                         CTLFLAG_RD, &stats->gotc, 
5721                         "Good Octets Transmitted"); 
5722         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5723                         CTLFLAG_RD, &stats->tpt,
5724                         "Total Packets Transmitted");
5725         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5726                         CTLFLAG_RD, &stats->gptc,
5727                         "Good Packets Transmitted");
5728         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5729                         CTLFLAG_RD, &stats->bptc,
5730                         "Broadcast Packets Transmitted");
5731         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5732                         CTLFLAG_RD, &stats->mptc,
5733                         "Multicast Packets Transmitted");
5734         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5735                         CTLFLAG_RD, &stats->ptc64,
5736                         "64 byte frames transmitted ");
5737         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5738                         CTLFLAG_RD, &stats->ptc127,
5739                         "65-127 byte frames transmitted");
5740         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5741                         CTLFLAG_RD, &stats->ptc255,
5742                         "128-255 byte frames transmitted");
5743         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5744                         CTLFLAG_RD, &stats->ptc511,
5745                         "256-511 byte frames transmitted");
5746         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5747                         CTLFLAG_RD, &stats->ptc1023,
5748                         "512-1023 byte frames transmitted");
5749         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5750                         CTLFLAG_RD, &stats->ptc1522,
5751                         "1024-1522 byte frames transmitted");
5752         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5753                         CTLFLAG_RD, &stats->tsctc,
5754                         "TSO Contexts Transmitted");
5755         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5756                         CTLFLAG_RD, &stats->tsctfc,
5757                         "TSO Contexts Failed");
5758
5759
5760         /* Interrupt Stats */
5761
5762         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5763                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5764         int_list = SYSCTL_CHILDREN(int_node);
5765
5766         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5767                         CTLFLAG_RD, &stats->iac,
5768                         "Interrupt Assertion Count");
5769
5770         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5771                         CTLFLAG_RD, &stats->icrxptc,
5772                         "Interrupt Cause Rx Pkt Timer Expire Count");
5773
5774         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5775                         CTLFLAG_RD, &stats->icrxatc,
5776                         "Interrupt Cause Rx Abs Timer Expire Count");
5777
5778         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5779                         CTLFLAG_RD, &stats->ictxptc,
5780                         "Interrupt Cause Tx Pkt Timer Expire Count");
5781
5782         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5783                         CTLFLAG_RD, &stats->ictxatc,
5784                         "Interrupt Cause Tx Abs Timer Expire Count");
5785
5786         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5787                         CTLFLAG_RD, &stats->ictxqec,
5788                         "Interrupt Cause Tx Queue Empty Count");
5789
5790         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5791                         CTLFLAG_RD, &stats->ictxqmtc,
5792                         "Interrupt Cause Tx Queue Min Thresh Count");
5793
5794         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5795                         CTLFLAG_RD, &stats->icrxdmtc,
5796                         "Interrupt Cause Rx Desc Min Thresh Count");
5797
5798         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5799                         CTLFLAG_RD, &stats->icrxoc,
5800                         "Interrupt Cause Receiver Overrun Count");
5801
5802         /* Host to Card Stats */
5803
5804         host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", 
5805                                     CTLFLAG_RD, NULL, 
5806                                     "Host to Card Statistics");
5807
5808         host_list = SYSCTL_CHILDREN(host_node);
5809
5810         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5811                         CTLFLAG_RD, &stats->cbtmpc,
5812                         "Circuit Breaker Tx Packet Count");
5813
5814         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5815                         CTLFLAG_RD, &stats->htdpmc,
5816                         "Host Transmit Discarded Packets");
5817
5818         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5819                         CTLFLAG_RD, &stats->rpthc,
5820                         "Rx Packets To Host");
5821
5822         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5823                         CTLFLAG_RD, &stats->cbrmpc,
5824                         "Circuit Breaker Rx Packet Count");
5825
5826         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5827                         CTLFLAG_RD, &stats->cbrdpc,
5828                         "Circuit Breaker Rx Dropped Count");
5829
5830         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5831                         CTLFLAG_RD, &stats->hgptc,
5832                         "Host Good Packets Tx Count");
5833
5834         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5835                         CTLFLAG_RD, &stats->htcbdpc,
5836                         "Host Tx Circuit Breaker Dropped Count");
5837
5838         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5839                         CTLFLAG_RD, &stats->hgorc,
5840                         "Host Good Octets Received Count");
5841
5842         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5843                         CTLFLAG_RD, &stats->hgotc,
5844                         "Host Good Octets Transmit Count");
5845
5846         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5847                         CTLFLAG_RD, &stats->lenerrs,
5848                         "Length Errors");
5849
5850         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5851                         CTLFLAG_RD, &stats->scvpc,
5852                         "SerDes/SGMII Code Violation Pkt Count");
5853
5854         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5855                         CTLFLAG_RD, &stats->hrmpc,
5856                         "Header Redirection Missed Packet Count");
5857 }
5858
5859
5860 /**********************************************************************
5861  *
5862  *  This routine provides a way to dump out the adapter eeprom,
5863  *  often a useful debug/service tool. This only dumps the first
5864  *  32 words, stuff that matters is in that extent.
5865  *
5866  **********************************************************************/
5867 static int
5868 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5869 {
5870         struct adapter *adapter;
5871         int error;
5872         int result;
5873
5874         result = -1;
5875         error = sysctl_handle_int(oidp, &result, 0, req);
5876
5877         if (error || !req->newptr)
5878                 return (error);
5879
5880         /*
5881          * This value will cause a hex dump of the
5882          * first 32 16-bit words of the EEPROM to
5883          * the screen.
5884          */
5885         if (result == 1) {
5886                 adapter = (struct adapter *)arg1;
5887                 igb_print_nvm_info(adapter);
5888         }
5889
5890         return (error);
5891 }
5892
5893 static void
5894 igb_print_nvm_info(struct adapter *adapter)
5895 {
5896         u16     eeprom_data;
5897         int     i, j, row = 0;
5898
5899         /* Its a bit crude, but it gets the job done */
5900         printf("\nInterface EEPROM Dump:\n");
5901         printf("Offset\n0x0000  ");
5902         for (i = 0, j = 0; i < 32; i++, j++) {
5903                 if (j == 8) { /* Make the offset block */
5904                         j = 0; ++row;
5905                         printf("\n0x00%x0  ",row);
5906                 }
5907                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5908                 printf("%04x ", eeprom_data);
5909         }
5910         printf("\n");
5911 }
5912
5913 static void
5914 igb_set_sysctl_value(struct adapter *adapter, const char *name,
5915         const char *description, int *limit, int value)
5916 {
5917         *limit = value;
5918         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5919             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5920             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5921 }
5922
5923 /*
5924 ** Set flow control using sysctl:
5925 ** Flow control values:
5926 **      0 - off
5927 **      1 - rx pause
5928 **      2 - tx pause
5929 **      3 - full
5930 */
5931 static int
5932 igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5933 {
5934         int             error;
5935         static int      input = 3; /* default is full */
5936         struct adapter  *adapter = (struct adapter *) arg1;
5937
5938         error = sysctl_handle_int(oidp, &input, 0, req);
5939
5940         if ((error) || (req->newptr == NULL))
5941                 return (error);
5942
5943         switch (input) {
5944                 case e1000_fc_rx_pause:
5945                 case e1000_fc_tx_pause:
5946                 case e1000_fc_full:
5947                 case e1000_fc_none:
5948                         adapter->hw.fc.requested_mode = input;
5949                         adapter->fc = input;
5950                         break;
5951                 default:
5952                         /* Do nothing */
5953                         return (error);
5954         }
5955
5956         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5957         e1000_force_mac_fc(&adapter->hw);
5958         return (error);
5959 }
5960
5961 /*
5962 ** Manage DMA Coalesce:
5963 ** Control values:
5964 **      0/1 - off/on
5965 **      Legal timer values are:
5966 **      250,500,1000-10000 in thousands
5967 */
5968 static int
5969 igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
5970 {
5971         struct adapter *adapter = (struct adapter *) arg1;
5972         int             error;
5973
5974         error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
5975
5976         if ((error) || (req->newptr == NULL))
5977                 return (error);
5978
5979         switch (adapter->dmac) {
5980                 case 0:
5981                         /*Disabling */
5982                         break;
5983                 case 1: /* Just enable and use default */
5984                         adapter->dmac = 1000;
5985                         break;
5986                 case 250:
5987                 case 500:
5988                 case 1000:
5989                 case 2000:
5990                 case 3000:
5991                 case 4000:
5992                 case 5000:
5993                 case 6000:
5994                 case 7000:
5995                 case 8000:
5996                 case 9000:
5997                 case 10000:
5998                         /* Legal values - allow */
5999                         break;
6000                 default:
6001                         /* Do nothing, illegal value */
6002                         adapter->dmac = 0;
6003                         return (error);
6004         }
6005         /* Reinit the interface */
6006         igb_init(adapter);
6007         return (error);
6008 }
6009
6010 /*
6011 ** Manage Energy Efficient Ethernet:
6012 ** Control values:
6013 **     0/1 - enabled/disabled
6014 */
6015 static int
6016 igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6017 {
6018         struct adapter  *adapter = (struct adapter *) arg1;
6019         int             error, value;
6020
6021         value = adapter->hw.dev_spec._82575.eee_disable;
6022         error = sysctl_handle_int(oidp, &value, 0, req);
6023         if (error || req->newptr == NULL)
6024                 return (error);
6025         IGB_CORE_LOCK(adapter);
6026         adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6027         igb_init_locked(adapter);
6028         IGB_CORE_UNLOCK(adapter);
6029         return (0);
6030 }