]> CyberLeo.Net >> Repos - FreeBSD/releng/9.2.git/blob - sys/dev/e1000/if_igb.c
Merge r254306:
[FreeBSD/releng/9.2.git] / sys / dev / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2013, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38
39 #ifdef HAVE_KERNEL_OPTION_HEADERS
40 #include "opt_device_polling.h"
41 #include "opt_altq.h"
42 #endif
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifndef IGB_LEGACY_TX
47 #include <sys/buf_ring.h>
48 #endif
49 #include <sys/bus.h>
50 #include <sys/endian.h>
51 #include <sys/kernel.h>
52 #include <sys/kthread.h>
53 #include <sys/malloc.h>
54 #include <sys/mbuf.h>
55 #include <sys/module.h>
56 #include <sys/rman.h>
57 #include <sys/socket.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/taskqueue.h>
61 #include <sys/eventhandler.h>
62 #include <sys/pcpu.h>
63 #include <sys/smp.h>
64 #include <machine/smp.h>
65 #include <machine/bus.h>
66 #include <machine/resource.h>
67
68 #include <net/bpf.h>
69 #include <net/ethernet.h>
70 #include <net/if.h>
71 #include <net/if_arp.h>
72 #include <net/if_dl.h>
73 #include <net/if_media.h>
74
75 #include <net/if_types.h>
76 #include <net/if_vlan_var.h>
77
78 #include <netinet/in_systm.h>
79 #include <netinet/in.h>
80 #include <netinet/if_ether.h>
81 #include <netinet/ip.h>
82 #include <netinet/ip6.h>
83 #include <netinet/tcp.h>
84 #include <netinet/tcp_lro.h>
85 #include <netinet/udp.h>
86
87 #include <machine/in_cksum.h>
88 #include <dev/led/led.h>
89 #include <dev/pci/pcivar.h>
90 #include <dev/pci/pcireg.h>
91
92 #include "e1000_api.h"
93 #include "e1000_82575.h"
94 #include "if_igb.h"
95
96 /*********************************************************************
97  *  Set this to one to display debug statistics
98  *********************************************************************/
99 int     igb_display_debug_stats = 0;
100
101 /*********************************************************************
102  *  Driver version:
103  *********************************************************************/
104 char igb_driver_version[] = "version - 2.3.10";
105
106
107 /*********************************************************************
108  *  PCI Device ID Table
109  *
110  *  Used by probe to select devices to load on
111  *  Last field stores an index into e1000_strings
112  *  Last entry must be all 0s
113  *
114  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
115  *********************************************************************/
116
117 static igb_vendor_info_t igb_vendor_info_array[] =
118 {
119         { 0x8086, E1000_DEV_ID_82575EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
123                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82576,           PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82576_NS,        PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82576_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82576_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
130                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
132                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
134                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82576_VF,        PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82580_COPPER,    PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82580_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_82580_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_82580_SGMII,     PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
141                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
143                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_DH89XXCC_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,  PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_DH89XXCC_SFP,    PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
148                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_I350_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_I350_FIBER,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_I350_SERDES,     PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_I350_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_I350_VF,         PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_I210_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_I210_COPPER_IT,  PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
157                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_I210_FIBER,      PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_I210_SERDES,     PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_I210_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_I211_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
162         /* required last entry */
163         { 0, 0, 0, 0, 0}
164 };
165
166 /*********************************************************************
167  *  Table of branding strings for all supported NICs.
168  *********************************************************************/
169
170 static char *igb_strings[] = {
171         "Intel(R) PRO/1000 Network Connection"
172 };
173
174 /*********************************************************************
175  *  Function prototypes
176  *********************************************************************/
177 static int      igb_probe(device_t);
178 static int      igb_attach(device_t);
179 static int      igb_detach(device_t);
180 static int      igb_shutdown(device_t);
181 static int      igb_suspend(device_t);
182 static int      igb_resume(device_t);
183 #ifndef IGB_LEGACY_TX
184 static int      igb_mq_start(struct ifnet *, struct mbuf *);
185 static int      igb_mq_start_locked(struct ifnet *, struct tx_ring *);
186 static void     igb_qflush(struct ifnet *);
187 static void     igb_deferred_mq_start(void *, int);
188 #else
189 static void     igb_start(struct ifnet *);
190 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
191 #endif
192 static int      igb_ioctl(struct ifnet *, u_long, caddr_t);
193 static void     igb_init(void *);
194 static void     igb_init_locked(struct adapter *);
195 static void     igb_stop(void *);
196 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
197 static int      igb_media_change(struct ifnet *);
198 static void     igb_identify_hardware(struct adapter *);
199 static int      igb_allocate_pci_resources(struct adapter *);
200 static int      igb_allocate_msix(struct adapter *);
201 static int      igb_allocate_legacy(struct adapter *);
202 static int      igb_setup_msix(struct adapter *);
203 static void     igb_free_pci_resources(struct adapter *);
204 static void     igb_local_timer(void *);
205 static void     igb_reset(struct adapter *);
206 static int      igb_setup_interface(device_t, struct adapter *);
207 static int      igb_allocate_queues(struct adapter *);
208 static void     igb_configure_queues(struct adapter *);
209
210 static int      igb_allocate_transmit_buffers(struct tx_ring *);
211 static void     igb_setup_transmit_structures(struct adapter *);
212 static void     igb_setup_transmit_ring(struct tx_ring *);
213 static void     igb_initialize_transmit_units(struct adapter *);
214 static void     igb_free_transmit_structures(struct adapter *);
215 static void     igb_free_transmit_buffers(struct tx_ring *);
216
217 static int      igb_allocate_receive_buffers(struct rx_ring *);
218 static int      igb_setup_receive_structures(struct adapter *);
219 static int      igb_setup_receive_ring(struct rx_ring *);
220 static void     igb_initialize_receive_units(struct adapter *);
221 static void     igb_free_receive_structures(struct adapter *);
222 static void     igb_free_receive_buffers(struct rx_ring *);
223 static void     igb_free_receive_ring(struct rx_ring *);
224
225 static void     igb_enable_intr(struct adapter *);
226 static void     igb_disable_intr(struct adapter *);
227 static void     igb_update_stats_counters(struct adapter *);
228 static bool     igb_txeof(struct tx_ring *);
229
230 static __inline void igb_rx_discard(struct rx_ring *, int);
231 static __inline void igb_rx_input(struct rx_ring *,
232                     struct ifnet *, struct mbuf *, u32);
233
234 static bool     igb_rxeof(struct igb_queue *, int, int *);
235 static void     igb_rx_checksum(u32, struct mbuf *, u32);
236 static bool     igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
237 static bool     igb_tso_setup(struct tx_ring *, struct mbuf *, int,
238                     struct ip *, struct tcphdr *);
239 static void     igb_set_promisc(struct adapter *);
240 static void     igb_disable_promisc(struct adapter *);
241 static void     igb_set_multi(struct adapter *);
242 static void     igb_update_link_status(struct adapter *);
243 static void     igb_refresh_mbufs(struct rx_ring *, int);
244
245 static void     igb_register_vlan(void *, struct ifnet *, u16);
246 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
247 static void     igb_setup_vlan_hw_support(struct adapter *);
248
249 static int      igb_xmit(struct tx_ring *, struct mbuf **);
250 static int      igb_dma_malloc(struct adapter *, bus_size_t,
251                     struct igb_dma_alloc *, int);
252 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
253 static int      igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
254 static void     igb_print_nvm_info(struct adapter *);
255 static int      igb_is_valid_ether_addr(u8 *);
256 static void     igb_add_hw_stats(struct adapter *);
257
258 static void     igb_vf_init_stats(struct adapter *);
259 static void     igb_update_vf_stats_counters(struct adapter *);
260
261 /* Management and WOL Support */
262 static void     igb_init_manageability(struct adapter *);
263 static void     igb_release_manageability(struct adapter *);
264 static void     igb_get_hw_control(struct adapter *);
265 static void     igb_release_hw_control(struct adapter *);
266 static void     igb_enable_wakeup(device_t);
267 static void     igb_led_func(void *, int);
268
269 static int      igb_irq_fast(void *);
270 static void     igb_msix_que(void *);
271 static void     igb_msix_link(void *);
272 static void     igb_handle_que(void *context, int pending);
273 static void     igb_handle_link(void *context, int pending);
274 static void     igb_handle_link_locked(struct adapter *);
275
276 static void     igb_set_sysctl_value(struct adapter *, const char *,
277                     const char *, int *, int);
278 static int      igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
279 static int      igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
280 static int      igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
281
282 #ifdef DEVICE_POLLING
283 static poll_handler_t igb_poll;
284 #endif /* POLLING */
285
286 /*********************************************************************
287  *  FreeBSD Device Interface Entry Points
288  *********************************************************************/
289
290 static device_method_t igb_methods[] = {
291         /* Device interface */
292         DEVMETHOD(device_probe, igb_probe),
293         DEVMETHOD(device_attach, igb_attach),
294         DEVMETHOD(device_detach, igb_detach),
295         DEVMETHOD(device_shutdown, igb_shutdown),
296         DEVMETHOD(device_suspend, igb_suspend),
297         DEVMETHOD(device_resume, igb_resume),
298         DEVMETHOD_END
299 };
300
301 static driver_t igb_driver = {
302         "igb", igb_methods, sizeof(struct adapter),
303 };
304
305 static devclass_t igb_devclass;
306 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
307 MODULE_DEPEND(igb, pci, 1, 1, 1);
308 MODULE_DEPEND(igb, ether, 1, 1, 1);
309
310 /*********************************************************************
311  *  Tunable default values.
312  *********************************************************************/
313
314 static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
315
316 /* Descriptor defaults */
317 static int igb_rxd = IGB_DEFAULT_RXD;
318 static int igb_txd = IGB_DEFAULT_TXD;
319 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
320 TUNABLE_INT("hw.igb.txd", &igb_txd);
321 SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
322     "Number of receive descriptors per queue");
323 SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
324     "Number of transmit descriptors per queue");
325
326 /*
327 ** AIM: Adaptive Interrupt Moderation
328 ** which means that the interrupt rate
329 ** is varied over time based on the
330 ** traffic for that interrupt vector
331 */
332 static int igb_enable_aim = TRUE;
333 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
334 SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
335     "Enable adaptive interrupt moderation");
336
337 /*
338  * MSIX should be the default for best performance,
339  * but this allows it to be forced off for testing.
340  */         
341 static int igb_enable_msix = 1;
342 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
343 SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
344     "Enable MSI-X interrupts");
345
346 /*
347 ** Tuneable Interrupt rate
348 */
349 static int igb_max_interrupt_rate = 8000;
350 TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
351 SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
352     &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
353
354 #if __FreeBSD_version >= 800000
355 /*
356 ** Tuneable number of buffers in the buf-ring (drbr_xxx)
357 */
358 static int igb_buf_ring_size = IGB_BR_SIZE;
359 TUNABLE_INT("hw.igb.buf_ring_size", &igb_buf_ring_size);
360 SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
361     &igb_buf_ring_size, 0, "Size of the bufring");
362 #endif
363
364 /*
365 ** Header split causes the packet header to
366 ** be dma'd to a seperate mbuf from the payload.
367 ** this can have memory alignment benefits. But
368 ** another plus is that small packets often fit
369 ** into the header and thus use no cluster. Its
370 ** a very workload dependent type feature.
371 */
372 static int igb_header_split = FALSE;
373 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
374 SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
375     "Enable receive mbuf header split");
376
377 /*
378 ** This will autoconfigure based on the
379 ** number of CPUs and max supported
380 ** MSIX messages if left at 0.
381 */
382 static int igb_num_queues = 0;
383 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
384 SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
385     "Number of queues to configure, 0 indicates autoconfigure");
386
387 /*
388 ** Global variable to store last used CPU when binding queues
389 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
390 ** queue is bound to a cpu.
391 */
392 static int igb_last_bind_cpu = -1;
393
394 /* How many packets rxeof tries to clean at a time */
395 static int igb_rx_process_limit = 100;
396 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
397 SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
398     &igb_rx_process_limit, 0,
399     "Maximum number of received packets to process at a time, -1 means unlimited");
400
401 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
402 #include <dev/netmap/if_igb_netmap.h>
403 #endif /* DEV_NETMAP */
404 /*********************************************************************
405  *  Device identification routine
406  *
407  *  igb_probe determines if the driver should be loaded on
408  *  adapter based on PCI vendor/device id of the adapter.
409  *
410  *  return BUS_PROBE_DEFAULT on success, positive on failure
411  *********************************************************************/
412
413 static int
414 igb_probe(device_t dev)
415 {
416         char            adapter_name[60];
417         uint16_t        pci_vendor_id = 0;
418         uint16_t        pci_device_id = 0;
419         uint16_t        pci_subvendor_id = 0;
420         uint16_t        pci_subdevice_id = 0;
421         igb_vendor_info_t *ent;
422
423         INIT_DEBUGOUT("igb_probe: begin");
424
425         pci_vendor_id = pci_get_vendor(dev);
426         if (pci_vendor_id != IGB_VENDOR_ID)
427                 return (ENXIO);
428
429         pci_device_id = pci_get_device(dev);
430         pci_subvendor_id = pci_get_subvendor(dev);
431         pci_subdevice_id = pci_get_subdevice(dev);
432
433         ent = igb_vendor_info_array;
434         while (ent->vendor_id != 0) {
435                 if ((pci_vendor_id == ent->vendor_id) &&
436                     (pci_device_id == ent->device_id) &&
437
438                     ((pci_subvendor_id == ent->subvendor_id) ||
439                     (ent->subvendor_id == PCI_ANY_ID)) &&
440
441                     ((pci_subdevice_id == ent->subdevice_id) ||
442                     (ent->subdevice_id == PCI_ANY_ID))) {
443                         sprintf(adapter_name, "%s %s",
444                                 igb_strings[ent->index],
445                                 igb_driver_version);
446                         device_set_desc_copy(dev, adapter_name);
447                         return (BUS_PROBE_DEFAULT);
448                 }
449                 ent++;
450         }
451
452         return (ENXIO);
453 }
454
455 /*********************************************************************
456  *  Device initialization routine
457  *
458  *  The attach entry point is called when the driver is being loaded.
459  *  This routine identifies the type of hardware, allocates all resources
460  *  and initializes the hardware.
461  *
462  *  return 0 on success, positive on failure
463  *********************************************************************/
464
465 static int
466 igb_attach(device_t dev)
467 {
468         struct adapter  *adapter;
469         int             error = 0;
470         u16             eeprom_data;
471
472         INIT_DEBUGOUT("igb_attach: begin");
473
474         if (resource_disabled("igb", device_get_unit(dev))) {
475                 device_printf(dev, "Disabled by device hint\n");
476                 return (ENXIO);
477         }
478
479         adapter = device_get_softc(dev);
480         adapter->dev = adapter->osdep.dev = dev;
481         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
482
483         /* SYSCTL stuff */
484         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
485             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
486             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
487             igb_sysctl_nvm_info, "I", "NVM Information");
488
489         igb_set_sysctl_value(adapter, "enable_aim",
490             "Interrupt Moderation", &adapter->enable_aim,
491             igb_enable_aim);
492
493         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
494             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
495             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
496             adapter, 0, igb_set_flowcntl, "I", "Flow Control");
497
498         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
499
500         /* Determine hardware and mac info */
501         igb_identify_hardware(adapter);
502
503         /* Setup PCI resources */
504         if (igb_allocate_pci_resources(adapter)) {
505                 device_printf(dev, "Allocation of PCI resources failed\n");
506                 error = ENXIO;
507                 goto err_pci;
508         }
509
510         /* Do Shared Code initialization */
511         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
512                 device_printf(dev, "Setup of Shared code failed\n");
513                 error = ENXIO;
514                 goto err_pci;
515         }
516
517         e1000_get_bus_info(&adapter->hw);
518
519         /* Sysctl for limiting the amount of work done in the taskqueue */
520         igb_set_sysctl_value(adapter, "rx_processing_limit",
521             "max number of rx packets to process",
522             &adapter->rx_process_limit, igb_rx_process_limit);
523
524         /*
525          * Validate number of transmit and receive descriptors. It
526          * must not exceed hardware maximum, and must be multiple
527          * of E1000_DBA_ALIGN.
528          */
529         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
530             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
531                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
532                     IGB_DEFAULT_TXD, igb_txd);
533                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
534         } else
535                 adapter->num_tx_desc = igb_txd;
536         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
537             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
538                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
539                     IGB_DEFAULT_RXD, igb_rxd);
540                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
541         } else
542                 adapter->num_rx_desc = igb_rxd;
543
544         adapter->hw.mac.autoneg = DO_AUTO_NEG;
545         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
546         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
547
548         /* Copper options */
549         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
550                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
551                 adapter->hw.phy.disable_polarity_correction = FALSE;
552                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
553         }
554
555         /*
556          * Set the frame limits assuming
557          * standard ethernet sized frames.
558          */
559         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
560         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
561
562         /*
563         ** Allocate and Setup Queues
564         */
565         if (igb_allocate_queues(adapter)) {
566                 error = ENOMEM;
567                 goto err_pci;
568         }
569
570         /* Allocate the appropriate stats memory */
571         if (adapter->vf_ifp) {
572                 adapter->stats =
573                     (struct e1000_vf_stats *)malloc(sizeof \
574                     (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
575                 igb_vf_init_stats(adapter);
576         } else
577                 adapter->stats =
578                     (struct e1000_hw_stats *)malloc(sizeof \
579                     (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
580         if (adapter->stats == NULL) {
581                 device_printf(dev, "Can not allocate stats memory\n");
582                 error = ENOMEM;
583                 goto err_late;
584         }
585
586         /* Allocate multicast array memory. */
587         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
588             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
589         if (adapter->mta == NULL) {
590                 device_printf(dev, "Can not allocate multicast setup array\n");
591                 error = ENOMEM;
592                 goto err_late;
593         }
594
595         /* Some adapter-specific advanced features */
596         if (adapter->hw.mac.type >= e1000_i350) {
597                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
598                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
599                     OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
600                     adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
601                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
602                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
603                     OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
604                     adapter, 0, igb_sysctl_eee, "I",
605                     "Disable Energy Efficient Ethernet");
606                 if (adapter->hw.phy.media_type == e1000_media_type_copper)
607                         e1000_set_eee_i350(&adapter->hw);
608         }
609
610         /*
611         ** Start from a known state, this is
612         ** important in reading the nvm and
613         ** mac from that.
614         */
615         e1000_reset_hw(&adapter->hw);
616
617         /* Make sure we have a good EEPROM before we read from it */
618         if (((adapter->hw.mac.type != e1000_i210) &&
619             (adapter->hw.mac.type != e1000_i211)) &&
620             (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
621                 /*
622                 ** Some PCI-E parts fail the first check due to
623                 ** the link being in sleep state, call it again,
624                 ** if it fails a second time its a real issue.
625                 */
626                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
627                         device_printf(dev,
628                             "The EEPROM Checksum Is Not Valid\n");
629                         error = EIO;
630                         goto err_late;
631                 }
632         }
633
634         /*
635         ** Copy the permanent MAC address out of the EEPROM
636         */
637         if (e1000_read_mac_addr(&adapter->hw) < 0) {
638                 device_printf(dev, "EEPROM read error while reading MAC"
639                     " address\n");
640                 error = EIO;
641                 goto err_late;
642         }
643         /* Check its sanity */
644         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
645                 device_printf(dev, "Invalid MAC address\n");
646                 error = EIO;
647                 goto err_late;
648         }
649
650         /* Setup OS specific network interface */
651         if (igb_setup_interface(dev, adapter) != 0)
652                 goto err_late;
653
654         /* Now get a good starting state */
655         igb_reset(adapter);
656
657         /* Initialize statistics */
658         igb_update_stats_counters(adapter);
659
660         adapter->hw.mac.get_link_status = 1;
661         igb_update_link_status(adapter);
662
663         /* Indicate SOL/IDER usage */
664         if (e1000_check_reset_block(&adapter->hw))
665                 device_printf(dev,
666                     "PHY reset is blocked due to SOL/IDER session.\n");
667
668         /* Determine if we have to control management hardware */
669         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
670
671         /*
672          * Setup Wake-on-Lan
673          */
674         /* APME bit in EEPROM is mapped to WUC.APME */
675         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
676         if (eeprom_data)
677                 adapter->wol = E1000_WUFC_MAG;
678
679         /* Register for VLAN events */
680         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
681              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
682         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
683              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
684
685         igb_add_hw_stats(adapter);
686
687         /* Tell the stack that the interface is not active */
688         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
689         adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
690
691         adapter->led_dev = led_create(igb_led_func, adapter,
692             device_get_nameunit(dev));
693
694         /* 
695         ** Configure Interrupts
696         */
697         if ((adapter->msix > 1) && (igb_enable_msix))
698                 error = igb_allocate_msix(adapter);
699         else /* MSI or Legacy */
700                 error = igb_allocate_legacy(adapter);
701         if (error)
702                 goto err_late;
703
704 #ifdef DEV_NETMAP
705         igb_netmap_attach(adapter);
706 #endif /* DEV_NETMAP */
707         INIT_DEBUGOUT("igb_attach: end");
708
709         return (0);
710
711 err_late:
712         igb_detach(dev);
713         igb_free_transmit_structures(adapter);
714         igb_free_receive_structures(adapter);
715         igb_release_hw_control(adapter);
716 err_pci:
717         igb_free_pci_resources(adapter);
718         if (adapter->ifp != NULL)
719                 if_free(adapter->ifp);
720         free(adapter->mta, M_DEVBUF);
721         IGB_CORE_LOCK_DESTROY(adapter);
722
723         return (error);
724 }
725
726 /*********************************************************************
727  *  Device removal routine
728  *
729  *  The detach entry point is called when the driver is being removed.
730  *  This routine stops the adapter and deallocates all the resources
731  *  that were allocated for driver operation.
732  *
733  *  return 0 on success, positive on failure
734  *********************************************************************/
735
736 static int
737 igb_detach(device_t dev)
738 {
739         struct adapter  *adapter = device_get_softc(dev);
740         struct ifnet    *ifp = adapter->ifp;
741
742         INIT_DEBUGOUT("igb_detach: begin");
743
744         /* Make sure VLANS are not using driver */
745         if (adapter->ifp->if_vlantrunk != NULL) {
746                 device_printf(dev,"Vlan in use, detach first\n");
747                 return (EBUSY);
748         }
749
750         ether_ifdetach(adapter->ifp);
751
752         if (adapter->led_dev != NULL)
753                 led_destroy(adapter->led_dev);
754
755 #ifdef DEVICE_POLLING
756         if (ifp->if_capenable & IFCAP_POLLING)
757                 ether_poll_deregister(ifp);
758 #endif
759
760         IGB_CORE_LOCK(adapter);
761         adapter->in_detach = 1;
762         igb_stop(adapter);
763         IGB_CORE_UNLOCK(adapter);
764
765         e1000_phy_hw_reset(&adapter->hw);
766
767         /* Give control back to firmware */
768         igb_release_manageability(adapter);
769         igb_release_hw_control(adapter);
770
771         if (adapter->wol) {
772                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
773                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
774                 igb_enable_wakeup(dev);
775         }
776
777         /* Unregister VLAN events */
778         if (adapter->vlan_attach != NULL)
779                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
780         if (adapter->vlan_detach != NULL)
781                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
782
783         callout_drain(&adapter->timer);
784
785 #ifdef DEV_NETMAP
786         netmap_detach(adapter->ifp);
787 #endif /* DEV_NETMAP */
788         igb_free_pci_resources(adapter);
789         bus_generic_detach(dev);
790         if_free(ifp);
791
792         igb_free_transmit_structures(adapter);
793         igb_free_receive_structures(adapter);
794         if (adapter->mta != NULL)
795                 free(adapter->mta, M_DEVBUF);
796
797         IGB_CORE_LOCK_DESTROY(adapter);
798
799         return (0);
800 }
801
802 /*********************************************************************
803  *
804  *  Shutdown entry point
805  *
806  **********************************************************************/
807
808 static int
809 igb_shutdown(device_t dev)
810 {
811         return igb_suspend(dev);
812 }
813
814 /*
815  * Suspend/resume device methods.
816  */
817 static int
818 igb_suspend(device_t dev)
819 {
820         struct adapter *adapter = device_get_softc(dev);
821
822         IGB_CORE_LOCK(adapter);
823
824         igb_stop(adapter);
825
826         igb_release_manageability(adapter);
827         igb_release_hw_control(adapter);
828
829         if (adapter->wol) {
830                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
831                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
832                 igb_enable_wakeup(dev);
833         }
834
835         IGB_CORE_UNLOCK(adapter);
836
837         return bus_generic_suspend(dev);
838 }
839
840 static int
841 igb_resume(device_t dev)
842 {
843         struct adapter *adapter = device_get_softc(dev);
844         struct tx_ring  *txr = adapter->tx_rings;
845         struct ifnet *ifp = adapter->ifp;
846
847         IGB_CORE_LOCK(adapter);
848         igb_init_locked(adapter);
849         igb_init_manageability(adapter);
850
851         if ((ifp->if_flags & IFF_UP) &&
852             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
853                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
854                         IGB_TX_LOCK(txr);
855 #ifndef IGB_LEGACY_TX
856                         /* Process the stack queue only if not depleted */
857                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
858                             !drbr_empty(ifp, txr->br))
859                                 igb_mq_start_locked(ifp, txr);
860 #else
861                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
862                                 igb_start_locked(txr, ifp);
863 #endif
864                         IGB_TX_UNLOCK(txr);
865                 }
866         }
867         IGB_CORE_UNLOCK(adapter);
868
869         return bus_generic_resume(dev);
870 }
871
872
873 #ifdef IGB_LEGACY_TX
874
875 /*********************************************************************
876  *  Transmit entry point
877  *
878  *  igb_start is called by the stack to initiate a transmit.
879  *  The driver will remain in this routine as long as there are
880  *  packets to transmit and transmit resources are available.
881  *  In case resources are not available stack is notified and
882  *  the packet is requeued.
883  **********************************************************************/
884
885 static void
886 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
887 {
888         struct adapter  *adapter = ifp->if_softc;
889         struct mbuf     *m_head;
890
891         IGB_TX_LOCK_ASSERT(txr);
892
893         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
894             IFF_DRV_RUNNING)
895                 return;
896         if (!adapter->link_active)
897                 return;
898
899         /* Call cleanup if number of TX descriptors low */
900         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
901                 igb_txeof(txr);
902
903         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
904                 if (txr->tx_avail <= IGB_MAX_SCATTER) {
905                         txr->queue_status |= IGB_QUEUE_DEPLETED;
906                         break;
907                 }
908                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
909                 if (m_head == NULL)
910                         break;
911                 /*
912                  *  Encapsulation can modify our pointer, and or make it
913                  *  NULL on failure.  In that event, we can't requeue.
914                  */
915                 if (igb_xmit(txr, &m_head)) {
916                         if (m_head != NULL)
917                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
918                         if (txr->tx_avail <= IGB_MAX_SCATTER)
919                                 txr->queue_status |= IGB_QUEUE_DEPLETED;
920                         break;
921                 }
922
923                 /* Send a copy of the frame to the BPF listener */
924                 ETHER_BPF_MTAP(ifp, m_head);
925
926                 /* Set watchdog on */
927                 txr->watchdog_time = ticks;
928                 txr->queue_status |= IGB_QUEUE_WORKING;
929         }
930 }
931  
932 /*
933  * Legacy TX driver routine, called from the
934  * stack, always uses tx[0], and spins for it.
935  * Should not be used with multiqueue tx
936  */
937 static void
938 igb_start(struct ifnet *ifp)
939 {
940         struct adapter  *adapter = ifp->if_softc;
941         struct tx_ring  *txr = adapter->tx_rings;
942
943         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
944                 IGB_TX_LOCK(txr);
945                 igb_start_locked(txr, ifp);
946                 IGB_TX_UNLOCK(txr);
947         }
948         return;
949 }
950
951 #else /* ~IGB_LEGACY_TX */
952
953 /*
954 ** Multiqueue Transmit Entry:
955 **  quick turnaround to the stack
956 **
957 */
958 static int
959 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
960 {
961         struct adapter          *adapter = ifp->if_softc;
962         struct igb_queue        *que;
963         struct tx_ring          *txr;
964         int                     i, err = 0;
965
966         /* Which queue to use */
967         if ((m->m_flags & M_FLOWID) != 0)
968                 i = m->m_pkthdr.flowid % adapter->num_queues;
969         else
970                 i = curcpu % adapter->num_queues;
971         txr = &adapter->tx_rings[i];
972         que = &adapter->queues[i];
973
974         err = drbr_enqueue(ifp, txr->br, m);
975         taskqueue_enqueue(que->tq, &txr->txq_task);
976
977         return (err);
978 }
979
980 static int
981 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
982 {
983         struct adapter  *adapter = txr->adapter;
984         struct mbuf     *next;
985         int             err = 0, enq;
986
987         IGB_TX_LOCK_ASSERT(txr);
988
989         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
990             adapter->link_active == 0)
991                 return (ENETDOWN);
992
993         enq = 0;
994
995         /* Process the queue */
996         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
997                 if ((err = igb_xmit(txr, &next)) != 0) {
998                         if (next == NULL) {
999                                 /* It was freed, move forward */
1000                                 drbr_advance(ifp, txr->br);
1001                         } else {
1002                                 /* 
1003                                  * Still have one left, it may not be
1004                                  * the same since the transmit function
1005                                  * may have changed it.
1006                                  */
1007                                 drbr_putback(ifp, txr->br, next);
1008                         }
1009                         break;
1010                 }
1011                 drbr_advance(ifp, txr->br);
1012                 enq++;
1013                 ifp->if_obytes += next->m_pkthdr.len;
1014                 if (next->m_flags & M_MCAST)
1015                         ifp->if_omcasts++;
1016                 ETHER_BPF_MTAP(ifp, next);
1017                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1018                         break;
1019         }
1020         if (enq > 0) {
1021                 /* Set the watchdog */
1022                 txr->queue_status |= IGB_QUEUE_WORKING;
1023                 txr->watchdog_time = ticks;
1024         }
1025         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1026                 igb_txeof(txr);
1027         if (txr->tx_avail <= IGB_MAX_SCATTER)
1028                 txr->queue_status |= IGB_QUEUE_DEPLETED;
1029         return (err);
1030 }
1031
1032 /*
1033  * Called from a taskqueue to drain queued transmit packets.
1034  */
1035 static void
1036 igb_deferred_mq_start(void *arg, int pending)
1037 {
1038         struct tx_ring *txr = arg;
1039         struct adapter *adapter = txr->adapter;
1040         struct ifnet *ifp = adapter->ifp;
1041
1042         IGB_TX_LOCK(txr);
1043         if (!drbr_empty(ifp, txr->br))
1044                 igb_mq_start_locked(ifp, txr);
1045         IGB_TX_UNLOCK(txr);
1046 }
1047
1048 /*
1049 ** Flush all ring buffers
1050 */
1051 static void
1052 igb_qflush(struct ifnet *ifp)
1053 {
1054         struct adapter  *adapter = ifp->if_softc;
1055         struct tx_ring  *txr = adapter->tx_rings;
1056         struct mbuf     *m;
1057
1058         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1059                 IGB_TX_LOCK(txr);
1060                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1061                         m_freem(m);
1062                 IGB_TX_UNLOCK(txr);
1063         }
1064         if_qflush(ifp);
1065 }
1066 #endif /* ~IGB_LEGACY_TX */
1067
1068 /*********************************************************************
1069  *  Ioctl entry point
1070  *
1071  *  igb_ioctl is called when the user wants to configure the
1072  *  interface.
1073  *
1074  *  return 0 on success, positive on failure
1075  **********************************************************************/
1076
1077 static int
1078 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1079 {
1080         struct adapter  *adapter = ifp->if_softc;
1081         struct ifreq    *ifr = (struct ifreq *)data;
1082 #if defined(INET) || defined(INET6)
1083         struct ifaddr   *ifa = (struct ifaddr *)data;
1084 #endif
1085         bool            avoid_reset = FALSE;
1086         int             error = 0;
1087
1088         if (adapter->in_detach)
1089                 return (error);
1090
1091         switch (command) {
1092         case SIOCSIFADDR:
1093 #ifdef INET
1094                 if (ifa->ifa_addr->sa_family == AF_INET)
1095                         avoid_reset = TRUE;
1096 #endif
1097 #ifdef INET6
1098                 if (ifa->ifa_addr->sa_family == AF_INET6)
1099                         avoid_reset = TRUE;
1100 #endif
1101                 /*
1102                 ** Calling init results in link renegotiation,
1103                 ** so we avoid doing it when possible.
1104                 */
1105                 if (avoid_reset) {
1106                         ifp->if_flags |= IFF_UP;
1107                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1108                                 igb_init(adapter);
1109 #ifdef INET
1110                         if (!(ifp->if_flags & IFF_NOARP))
1111                                 arp_ifinit(ifp, ifa);
1112 #endif
1113                 } else
1114                         error = ether_ioctl(ifp, command, data);
1115                 break;
1116         case SIOCSIFMTU:
1117             {
1118                 int max_frame_size;
1119
1120                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1121
1122                 IGB_CORE_LOCK(adapter);
1123                 max_frame_size = 9234;
1124                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1125                     ETHER_CRC_LEN) {
1126                         IGB_CORE_UNLOCK(adapter);
1127                         error = EINVAL;
1128                         break;
1129                 }
1130
1131                 ifp->if_mtu = ifr->ifr_mtu;
1132                 adapter->max_frame_size =
1133                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1134                 igb_init_locked(adapter);
1135                 IGB_CORE_UNLOCK(adapter);
1136                 break;
1137             }
1138         case SIOCSIFFLAGS:
1139                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1140                     SIOCSIFFLAGS (Set Interface Flags)");
1141                 IGB_CORE_LOCK(adapter);
1142                 if (ifp->if_flags & IFF_UP) {
1143                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1144                                 if ((ifp->if_flags ^ adapter->if_flags) &
1145                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1146                                         igb_disable_promisc(adapter);
1147                                         igb_set_promisc(adapter);
1148                                 }
1149                         } else
1150                                 igb_init_locked(adapter);
1151                 } else
1152                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1153                                 igb_stop(adapter);
1154                 adapter->if_flags = ifp->if_flags;
1155                 IGB_CORE_UNLOCK(adapter);
1156                 break;
1157         case SIOCADDMULTI:
1158         case SIOCDELMULTI:
1159                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1160                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1161                         IGB_CORE_LOCK(adapter);
1162                         igb_disable_intr(adapter);
1163                         igb_set_multi(adapter);
1164 #ifdef DEVICE_POLLING
1165                         if (!(ifp->if_capenable & IFCAP_POLLING))
1166 #endif
1167                                 igb_enable_intr(adapter);
1168                         IGB_CORE_UNLOCK(adapter);
1169                 }
1170                 break;
1171         case SIOCSIFMEDIA:
1172                 /* Check SOL/IDER usage */
1173                 IGB_CORE_LOCK(adapter);
1174                 if (e1000_check_reset_block(&adapter->hw)) {
1175                         IGB_CORE_UNLOCK(adapter);
1176                         device_printf(adapter->dev, "Media change is"
1177                             " blocked due to SOL/IDER session.\n");
1178                         break;
1179                 }
1180                 IGB_CORE_UNLOCK(adapter);
1181         case SIOCGIFMEDIA:
1182                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1183                     SIOCxIFMEDIA (Get/Set Interface Media)");
1184                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1185                 break;
1186         case SIOCSIFCAP:
1187             {
1188                 int mask, reinit;
1189
1190                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1191                 reinit = 0;
1192                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1193 #ifdef DEVICE_POLLING
1194                 if (mask & IFCAP_POLLING) {
1195                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1196                                 error = ether_poll_register(igb_poll, ifp);
1197                                 if (error)
1198                                         return (error);
1199                                 IGB_CORE_LOCK(adapter);
1200                                 igb_disable_intr(adapter);
1201                                 ifp->if_capenable |= IFCAP_POLLING;
1202                                 IGB_CORE_UNLOCK(adapter);
1203                         } else {
1204                                 error = ether_poll_deregister(ifp);
1205                                 /* Enable interrupt even in error case */
1206                                 IGB_CORE_LOCK(adapter);
1207                                 igb_enable_intr(adapter);
1208                                 ifp->if_capenable &= ~IFCAP_POLLING;
1209                                 IGB_CORE_UNLOCK(adapter);
1210                         }
1211                 }
1212 #endif
1213                 if (mask & IFCAP_HWCSUM) {
1214                         ifp->if_capenable ^= IFCAP_HWCSUM;
1215                         reinit = 1;
1216                 }
1217                 if (mask & IFCAP_TSO4) {
1218                         ifp->if_capenable ^= IFCAP_TSO4;
1219                         reinit = 1;
1220                 }
1221                 if (mask & IFCAP_VLAN_HWTAGGING) {
1222                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1223                         reinit = 1;
1224                 }
1225                 if (mask & IFCAP_VLAN_HWFILTER) {
1226                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1227                         reinit = 1;
1228                 }
1229                 if (mask & IFCAP_VLAN_HWTSO) {
1230                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1231                         reinit = 1;
1232                 }
1233                 if (mask & IFCAP_LRO) {
1234                         ifp->if_capenable ^= IFCAP_LRO;
1235                         reinit = 1;
1236                 }
1237                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1238                         igb_init(adapter);
1239                 VLAN_CAPABILITIES(ifp);
1240                 break;
1241             }
1242
1243         default:
1244                 error = ether_ioctl(ifp, command, data);
1245                 break;
1246         }
1247
1248         return (error);
1249 }
1250
1251
1252 /*********************************************************************
1253  *  Init entry point
1254  *
1255  *  This routine is used in two ways. It is used by the stack as
1256  *  init entry point in network interface structure. It is also used
1257  *  by the driver as a hw/sw initialization routine to get to a
1258  *  consistent state.
1259  *
1260  *  return 0 on success, positive on failure
1261  **********************************************************************/
1262
1263 static void
1264 igb_init_locked(struct adapter *adapter)
1265 {
1266         struct ifnet    *ifp = adapter->ifp;
1267         device_t        dev = adapter->dev;
1268
1269         INIT_DEBUGOUT("igb_init: begin");
1270
1271         IGB_CORE_LOCK_ASSERT(adapter);
1272
1273         igb_disable_intr(adapter);
1274         callout_stop(&adapter->timer);
1275
1276         /* Get the latest mac address, User can use a LAA */
1277         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1278               ETHER_ADDR_LEN);
1279
1280         /* Put the address into the Receive Address Array */
1281         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1282
1283         igb_reset(adapter);
1284         igb_update_link_status(adapter);
1285
1286         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1287
1288         /* Set hardware offload abilities */
1289         ifp->if_hwassist = 0;
1290         if (ifp->if_capenable & IFCAP_TXCSUM) {
1291                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1292 #if __FreeBSD_version >= 800000
1293                 if (adapter->hw.mac.type == e1000_82576)
1294                         ifp->if_hwassist |= CSUM_SCTP;
1295 #endif
1296         }
1297
1298         if (ifp->if_capenable & IFCAP_TSO4)
1299                 ifp->if_hwassist |= CSUM_TSO;
1300
1301         /* Configure for OS presence */
1302         igb_init_manageability(adapter);
1303
1304         /* Prepare transmit descriptors and buffers */
1305         igb_setup_transmit_structures(adapter);
1306         igb_initialize_transmit_units(adapter);
1307
1308         /* Setup Multicast table */
1309         igb_set_multi(adapter);
1310
1311         /*
1312         ** Figure out the desired mbuf pool
1313         ** for doing jumbo/packetsplit
1314         */
1315         if (adapter->max_frame_size <= 2048)
1316                 adapter->rx_mbuf_sz = MCLBYTES;
1317         else if (adapter->max_frame_size <= 4096)
1318                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1319         else
1320                 adapter->rx_mbuf_sz = MJUM9BYTES;
1321
1322         /* Prepare receive descriptors and buffers */
1323         if (igb_setup_receive_structures(adapter)) {
1324                 device_printf(dev, "Could not setup receive structures\n");
1325                 return;
1326         }
1327         igb_initialize_receive_units(adapter);
1328
1329         /* Enable VLAN support */
1330         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1331                 igb_setup_vlan_hw_support(adapter);
1332                                 
1333         /* Don't lose promiscuous settings */
1334         igb_set_promisc(adapter);
1335
1336         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1337         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1338
1339         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1340         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1341
1342         if (adapter->msix > 1) /* Set up queue routing */
1343                 igb_configure_queues(adapter);
1344
1345         /* this clears any pending interrupts */
1346         E1000_READ_REG(&adapter->hw, E1000_ICR);
1347 #ifdef DEVICE_POLLING
1348         /*
1349          * Only enable interrupts if we are not polling, make sure
1350          * they are off otherwise.
1351          */
1352         if (ifp->if_capenable & IFCAP_POLLING)
1353                 igb_disable_intr(adapter);
1354         else
1355 #endif /* DEVICE_POLLING */
1356         {
1357                 igb_enable_intr(adapter);
1358                 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1359         }
1360
1361         /* Set Energy Efficient Ethernet */
1362         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1363                 e1000_set_eee_i350(&adapter->hw);
1364 }
1365
1366 static void
1367 igb_init(void *arg)
1368 {
1369         struct adapter *adapter = arg;
1370
1371         IGB_CORE_LOCK(adapter);
1372         igb_init_locked(adapter);
1373         IGB_CORE_UNLOCK(adapter);
1374 }
1375
1376
1377 static void
1378 igb_handle_que(void *context, int pending)
1379 {
1380         struct igb_queue *que = context;
1381         struct adapter *adapter = que->adapter;
1382         struct tx_ring *txr = que->txr;
1383         struct ifnet    *ifp = adapter->ifp;
1384
1385         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1386                 bool    more;
1387
1388                 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1389
1390                 IGB_TX_LOCK(txr);
1391                 igb_txeof(txr);
1392 #ifndef IGB_LEGACY_TX
1393                 /* Process the stack queue only if not depleted */
1394                 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1395                     !drbr_empty(ifp, txr->br))
1396                         igb_mq_start_locked(ifp, txr);
1397 #else
1398                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1399                         igb_start_locked(txr, ifp);
1400 #endif
1401                 IGB_TX_UNLOCK(txr);
1402                 /* Do we need another? */
1403                 if (more) {
1404                         taskqueue_enqueue(que->tq, &que->que_task);
1405                         return;
1406                 }
1407         }
1408
1409 #ifdef DEVICE_POLLING
1410         if (ifp->if_capenable & IFCAP_POLLING)
1411                 return;
1412 #endif
1413         /* Reenable this interrupt */
1414         if (que->eims)
1415                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1416         else
1417                 igb_enable_intr(adapter);
1418 }
1419
1420 /* Deal with link in a sleepable context */
1421 static void
1422 igb_handle_link(void *context, int pending)
1423 {
1424         struct adapter *adapter = context;
1425
1426         IGB_CORE_LOCK(adapter);
1427         igb_handle_link_locked(adapter);
1428         IGB_CORE_UNLOCK(adapter);
1429 }
1430
1431 static void
1432 igb_handle_link_locked(struct adapter *adapter)
1433 {
1434         struct tx_ring  *txr = adapter->tx_rings;
1435         struct ifnet *ifp = adapter->ifp;
1436
1437         IGB_CORE_LOCK_ASSERT(adapter);
1438         adapter->hw.mac.get_link_status = 1;
1439         igb_update_link_status(adapter);
1440         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1441                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1442                         IGB_TX_LOCK(txr);
1443 #ifndef IGB_LEGACY_TX
1444                         /* Process the stack queue only if not depleted */
1445                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1446                             !drbr_empty(ifp, txr->br))
1447                                 igb_mq_start_locked(ifp, txr);
1448 #else
1449                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1450                                 igb_start_locked(txr, ifp);
1451 #endif
1452                         IGB_TX_UNLOCK(txr);
1453                 }
1454         }
1455 }
1456
1457 /*********************************************************************
1458  *
1459  *  MSI/Legacy Deferred
1460  *  Interrupt Service routine  
1461  *
1462  *********************************************************************/
1463 static int
1464 igb_irq_fast(void *arg)
1465 {
1466         struct adapter          *adapter = arg;
1467         struct igb_queue        *que = adapter->queues;
1468         u32                     reg_icr;
1469
1470
1471         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1472
1473         /* Hot eject?  */
1474         if (reg_icr == 0xffffffff)
1475                 return FILTER_STRAY;
1476
1477         /* Definitely not our interrupt.  */
1478         if (reg_icr == 0x0)
1479                 return FILTER_STRAY;
1480
1481         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1482                 return FILTER_STRAY;
1483
1484         /*
1485          * Mask interrupts until the taskqueue is finished running.  This is
1486          * cheap, just assume that it is needed.  This also works around the
1487          * MSI message reordering errata on certain systems.
1488          */
1489         igb_disable_intr(adapter);
1490         taskqueue_enqueue(que->tq, &que->que_task);
1491
1492         /* Link status change */
1493         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1494                 taskqueue_enqueue(que->tq, &adapter->link_task);
1495
1496         if (reg_icr & E1000_ICR_RXO)
1497                 adapter->rx_overruns++;
1498         return FILTER_HANDLED;
1499 }
1500
1501 #ifdef DEVICE_POLLING
1502 #if __FreeBSD_version >= 800000
1503 #define POLL_RETURN_COUNT(a) (a)
1504 static int
1505 #else
1506 #define POLL_RETURN_COUNT(a)
1507 static void
1508 #endif
1509 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1510 {
1511         struct adapter          *adapter = ifp->if_softc;
1512         struct igb_queue        *que;
1513         struct tx_ring          *txr;
1514         u32                     reg_icr, rx_done = 0;
1515         u32                     loop = IGB_MAX_LOOP;
1516         bool                    more;
1517
1518         IGB_CORE_LOCK(adapter);
1519         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1520                 IGB_CORE_UNLOCK(adapter);
1521                 return POLL_RETURN_COUNT(rx_done);
1522         }
1523
1524         if (cmd == POLL_AND_CHECK_STATUS) {
1525                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1526                 /* Link status change */
1527                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1528                         igb_handle_link_locked(adapter);
1529
1530                 if (reg_icr & E1000_ICR_RXO)
1531                         adapter->rx_overruns++;
1532         }
1533         IGB_CORE_UNLOCK(adapter);
1534
1535         for (int i = 0; i < adapter->num_queues; i++) {
1536                 que = &adapter->queues[i];
1537                 txr = que->txr;
1538
1539                 igb_rxeof(que, count, &rx_done);
1540
1541                 IGB_TX_LOCK(txr);
1542                 do {
1543                         more = igb_txeof(txr);
1544                 } while (loop-- && more);
1545 #ifndef IGB_LEGACY_TX
1546                 if (!drbr_empty(ifp, txr->br))
1547                         igb_mq_start_locked(ifp, txr);
1548 #else
1549                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1550                         igb_start_locked(txr, ifp);
1551 #endif
1552                 IGB_TX_UNLOCK(txr);
1553         }
1554
1555         return POLL_RETURN_COUNT(rx_done);
1556 }
1557 #endif /* DEVICE_POLLING */
1558
1559 /*********************************************************************
1560  *
1561  *  MSIX Que Interrupt Service routine
1562  *
1563  **********************************************************************/
1564 static void
1565 igb_msix_que(void *arg)
1566 {
1567         struct igb_queue *que = arg;
1568         struct adapter *adapter = que->adapter;
1569         struct ifnet   *ifp = adapter->ifp;
1570         struct tx_ring *txr = que->txr;
1571         struct rx_ring *rxr = que->rxr;
1572         u32             newitr = 0;
1573         bool            more_rx;
1574
1575         /* Ignore spurious interrupts */
1576         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1577                 return;
1578
1579         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1580         ++que->irqs;
1581
1582         IGB_TX_LOCK(txr);
1583         igb_txeof(txr);
1584 #ifndef IGB_LEGACY_TX
1585         /* Process the stack queue only if not depleted */
1586         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1587             !drbr_empty(ifp, txr->br))
1588                 igb_mq_start_locked(ifp, txr);
1589 #else
1590         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1591                 igb_start_locked(txr, ifp);
1592 #endif
1593         IGB_TX_UNLOCK(txr);
1594
1595         more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1596
1597         if (adapter->enable_aim == FALSE)
1598                 goto no_calc;
1599         /*
1600         ** Do Adaptive Interrupt Moderation:
1601         **  - Write out last calculated setting
1602         **  - Calculate based on average size over
1603         **    the last interval.
1604         */
1605         if (que->eitr_setting)
1606                 E1000_WRITE_REG(&adapter->hw,
1607                     E1000_EITR(que->msix), que->eitr_setting);
1608  
1609         que->eitr_setting = 0;
1610
1611         /* Idle, do nothing */
1612         if ((txr->bytes == 0) && (rxr->bytes == 0))
1613                 goto no_calc;
1614                                 
1615         /* Used half Default if sub-gig */
1616         if (adapter->link_speed != 1000)
1617                 newitr = IGB_DEFAULT_ITR / 2;
1618         else {
1619                 if ((txr->bytes) && (txr->packets))
1620                         newitr = txr->bytes/txr->packets;
1621                 if ((rxr->bytes) && (rxr->packets))
1622                         newitr = max(newitr,
1623                             (rxr->bytes / rxr->packets));
1624                 newitr += 24; /* account for hardware frame, crc */
1625                 /* set an upper boundary */
1626                 newitr = min(newitr, 3000);
1627                 /* Be nice to the mid range */
1628                 if ((newitr > 300) && (newitr < 1200))
1629                         newitr = (newitr / 3);
1630                 else
1631                         newitr = (newitr / 2);
1632         }
1633         newitr &= 0x7FFC;  /* Mask invalid bits */
1634         if (adapter->hw.mac.type == e1000_82575)
1635                 newitr |= newitr << 16;
1636         else
1637                 newitr |= E1000_EITR_CNT_IGNR;
1638                  
1639         /* save for next interrupt */
1640         que->eitr_setting = newitr;
1641
1642         /* Reset state */
1643         txr->bytes = 0;
1644         txr->packets = 0;
1645         rxr->bytes = 0;
1646         rxr->packets = 0;
1647
1648 no_calc:
1649         /* Schedule a clean task if needed*/
1650         if (more_rx)
1651                 taskqueue_enqueue(que->tq, &que->que_task);
1652         else
1653                 /* Reenable this interrupt */
1654                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1655         return;
1656 }
1657
1658
1659 /*********************************************************************
1660  *
1661  *  MSIX Link Interrupt Service routine
1662  *
1663  **********************************************************************/
1664
1665 static void
1666 igb_msix_link(void *arg)
1667 {
1668         struct adapter  *adapter = arg;
1669         u32             icr;
1670
1671         ++adapter->link_irq;
1672         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1673         if (!(icr & E1000_ICR_LSC))
1674                 goto spurious;
1675         igb_handle_link(adapter, 0);
1676
1677 spurious:
1678         /* Rearm */
1679         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1680         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1681         return;
1682 }
1683
1684
1685 /*********************************************************************
1686  *
1687  *  Media Ioctl callback
1688  *
1689  *  This routine is called whenever the user queries the status of
1690  *  the interface using ifconfig.
1691  *
1692  **********************************************************************/
1693 static void
1694 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1695 {
1696         struct adapter *adapter = ifp->if_softc;
1697
1698         INIT_DEBUGOUT("igb_media_status: begin");
1699
1700         IGB_CORE_LOCK(adapter);
1701         igb_update_link_status(adapter);
1702
1703         ifmr->ifm_status = IFM_AVALID;
1704         ifmr->ifm_active = IFM_ETHER;
1705
1706         if (!adapter->link_active) {
1707                 IGB_CORE_UNLOCK(adapter);
1708                 return;
1709         }
1710
1711         ifmr->ifm_status |= IFM_ACTIVE;
1712
1713         switch (adapter->link_speed) {
1714         case 10:
1715                 ifmr->ifm_active |= IFM_10_T;
1716                 break;
1717         case 100:
1718                 /*
1719                 ** Support for 100Mb SFP - these are Fiber 
1720                 ** but the media type appears as serdes
1721                 */
1722                 if (adapter->hw.phy.media_type ==
1723                     e1000_media_type_internal_serdes)
1724                         ifmr->ifm_active |= IFM_100_FX;
1725                 else
1726                         ifmr->ifm_active |= IFM_100_TX;
1727                 break;
1728         case 1000:
1729                 ifmr->ifm_active |= IFM_1000_T;
1730                 break;
1731         }
1732
1733         if (adapter->link_duplex == FULL_DUPLEX)
1734                 ifmr->ifm_active |= IFM_FDX;
1735         else
1736                 ifmr->ifm_active |= IFM_HDX;
1737
1738         IGB_CORE_UNLOCK(adapter);
1739 }
1740
1741 /*********************************************************************
1742  *
1743  *  Media Ioctl callback
1744  *
1745  *  This routine is called when the user changes speed/duplex using
1746  *  media/mediopt option with ifconfig.
1747  *
1748  **********************************************************************/
1749 static int
1750 igb_media_change(struct ifnet *ifp)
1751 {
1752         struct adapter *adapter = ifp->if_softc;
1753         struct ifmedia  *ifm = &adapter->media;
1754
1755         INIT_DEBUGOUT("igb_media_change: begin");
1756
1757         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1758                 return (EINVAL);
1759
1760         IGB_CORE_LOCK(adapter);
1761         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1762         case IFM_AUTO:
1763                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1764                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1765                 break;
1766         case IFM_1000_LX:
1767         case IFM_1000_SX:
1768         case IFM_1000_T:
1769                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1770                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1771                 break;
1772         case IFM_100_TX:
1773                 adapter->hw.mac.autoneg = FALSE;
1774                 adapter->hw.phy.autoneg_advertised = 0;
1775                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1776                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1777                 else
1778                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1779                 break;
1780         case IFM_10_T:
1781                 adapter->hw.mac.autoneg = FALSE;
1782                 adapter->hw.phy.autoneg_advertised = 0;
1783                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1784                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1785                 else
1786                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1787                 break;
1788         default:
1789                 device_printf(adapter->dev, "Unsupported media type\n");
1790         }
1791
1792         igb_init_locked(adapter);
1793         IGB_CORE_UNLOCK(adapter);
1794
1795         return (0);
1796 }
1797
1798
1799 /*********************************************************************
1800  *
1801  *  This routine maps the mbufs to Advanced TX descriptors.
1802  *  
1803  **********************************************************************/
1804 static int
1805 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1806 {
1807         struct adapter          *adapter = txr->adapter;
1808         bus_dma_segment_t       segs[IGB_MAX_SCATTER];
1809         bus_dmamap_t            map;
1810         struct igb_tx_buffer    *tx_buffer, *tx_buffer_mapped;
1811         union e1000_adv_tx_desc *txd = NULL;
1812         struct mbuf             *m_head = *m_headp;
1813         struct ether_vlan_header *eh = NULL;
1814         struct ip               *ip = NULL;
1815         struct tcphdr           *th = NULL;
1816         u32                     hdrlen, cmd_type_len, olinfo_status = 0;
1817         int                     ehdrlen, poff;
1818         int                     nsegs, i, first, last = 0;
1819         int                     error, do_tso, remap = 1;
1820
1821         /* Set basic descriptor constants */
1822         cmd_type_len = E1000_ADVTXD_DTYP_DATA;
1823         cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1824         if (m_head->m_flags & M_VLANTAG)
1825                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1826
1827 retry:
1828         m_head = *m_headp;
1829         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1830         hdrlen = ehdrlen = poff = 0;
1831
1832         /*
1833          * Intel recommends entire IP/TCP header length reside in a single
1834          * buffer. If multiple descriptors are used to describe the IP and
1835          * TCP header, each descriptor should describe one or more
1836          * complete headers; descriptors referencing only parts of headers
1837          * are not supported. If all layer headers are not coalesced into
1838          * a single buffer, each buffer should not cross a 4KB boundary,
1839          * or be larger than the maximum read request size.
1840          * Controller also requires modifing IP/TCP header to make TSO work
1841          * so we firstly get a writable mbuf chain then coalesce ethernet/
1842          * IP/TCP header into a single buffer to meet the requirement of
1843          * controller. This also simplifies IP/TCP/UDP checksum offloading
1844          * which also has similiar restrictions.
1845          */
1846         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1847                 if (do_tso || (m_head->m_next != NULL && 
1848                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1849                         if (M_WRITABLE(*m_headp) == 0) {
1850                                 m_head = m_dup(*m_headp, M_NOWAIT);
1851                                 m_freem(*m_headp);
1852                                 if (m_head == NULL) {
1853                                         *m_headp = NULL;
1854                                         return (ENOBUFS);
1855                                 }
1856                                 *m_headp = m_head;
1857                         }
1858                 }
1859                 /*
1860                  * Assume IPv4, we don't have TSO/checksum offload support
1861                  * for IPv6 yet.
1862                  */
1863                 ehdrlen = sizeof(struct ether_header);
1864                 m_head = m_pullup(m_head, ehdrlen);
1865                 if (m_head == NULL) {
1866                         *m_headp = NULL;
1867                         return (ENOBUFS);
1868                 }
1869                 eh = mtod(m_head, struct ether_vlan_header *);
1870                 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1871                         ehdrlen = sizeof(struct ether_vlan_header);
1872                         m_head = m_pullup(m_head, ehdrlen);
1873                         if (m_head == NULL) {
1874                                 *m_headp = NULL;
1875                                 return (ENOBUFS);
1876                         }
1877                 }
1878                 m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip));
1879                 if (m_head == NULL) {
1880                         *m_headp = NULL;
1881                         return (ENOBUFS);
1882                 }
1883                 ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1884                 poff = ehdrlen + (ip->ip_hl << 2);
1885                 if (do_tso) {
1886                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1887                         if (m_head == NULL) {
1888                                 *m_headp = NULL;
1889                                 return (ENOBUFS);
1890                         }
1891                         /*
1892                          * The pseudo TCP checksum does not include TCP payload
1893                          * length so driver should recompute the checksum here
1894                          * what hardware expect to see. This is adherence of
1895                          * Microsoft's Large Send specification.
1896                          */
1897                         th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1898                         th->th_sum = in_pseudo(ip->ip_src.s_addr,
1899                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1900                         /* Keep track of the full header length */
1901                         hdrlen = poff + (th->th_off << 2);
1902                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1903                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1904                         if (m_head == NULL) {
1905                                 *m_headp = NULL;
1906                                 return (ENOBUFS);
1907                         }
1908                         th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1909                         m_head = m_pullup(m_head, poff + (th->th_off << 2));
1910                         if (m_head == NULL) {
1911                                 *m_headp = NULL;
1912                                 return (ENOBUFS);
1913                         }
1914                         ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1915                         th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1916                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1917                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1918                         if (m_head == NULL) {
1919                                 *m_headp = NULL;
1920                                 return (ENOBUFS);
1921                         }
1922                         ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1923                 }
1924                 *m_headp = m_head;
1925         }
1926
1927         /*
1928          * Map the packet for DMA
1929          *
1930          * Capture the first descriptor index,
1931          * this descriptor will have the index
1932          * of the EOP which is the only one that
1933          * now gets a DONE bit writeback.
1934          */
1935         first = txr->next_avail_desc;
1936         tx_buffer = &txr->tx_buffers[first];
1937         tx_buffer_mapped = tx_buffer;
1938         map = tx_buffer->map;
1939
1940         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1941             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1942
1943         /*
1944          * There are two types of errors we can (try) to handle:
1945          * - EFBIG means the mbuf chain was too long and bus_dma ran
1946          *   out of segments.  Defragment the mbuf chain and try again.
1947          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1948          *   at this point in time.  Defer sending and try again later.
1949          * All other errors, in particular EINVAL, are fatal and prevent the
1950          * mbuf chain from ever going through.  Drop it and report error.
1951          */
1952         if (error == EFBIG && remap) {
1953                 struct mbuf *m;
1954
1955                 m = m_defrag(*m_headp, M_NOWAIT);
1956                 if (m == NULL) {
1957                         adapter->mbuf_defrag_failed++;
1958                         m_freem(*m_headp);
1959                         *m_headp = NULL;
1960                         return (ENOBUFS);
1961                 }
1962                 *m_headp = m;
1963
1964                 /* Try it again, but only once */
1965                 remap = 0;
1966                 goto retry;
1967         } else if (error == ENOMEM) {
1968                 adapter->no_tx_dma_setup++;
1969                 return (error);
1970         } else if (error != 0) {
1971                 adapter->no_tx_dma_setup++;
1972                 m_freem(*m_headp);
1973                 *m_headp = NULL;
1974                 return (error);
1975         }
1976
1977         /*
1978         ** Make sure we don't overrun the ring,
1979         ** we need nsegs descriptors and one for
1980         ** the context descriptor used for the
1981         ** offloads.
1982         */
1983         if ((nsegs + 1) > (txr->tx_avail - 2)) {
1984                 txr->no_desc_avail++;
1985                 bus_dmamap_unload(txr->txtag, map);
1986                 return (ENOBUFS);
1987         }
1988         m_head = *m_headp;
1989
1990         /* Do hardware assists:
1991          * Set up the context descriptor, used
1992          * when any hardware offload is done.
1993          * This includes CSUM, VLAN, and TSO.
1994          * It will use the first descriptor.
1995          */
1996
1997         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1998                 if (igb_tso_setup(txr, m_head, ehdrlen, ip, th)) {
1999                         cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
2000                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
2001                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2002                 } else
2003                         return (ENXIO);
2004         } else if (igb_tx_ctx_setup(txr, m_head))
2005                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2006
2007         /* Calculate payload length */
2008         olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
2009             << E1000_ADVTXD_PAYLEN_SHIFT);
2010
2011         /* 82575 needs the queue index added */
2012         if (adapter->hw.mac.type == e1000_82575)
2013                 olinfo_status |= txr->me << 4;
2014
2015         /* Set up our transmit descriptors */
2016         i = txr->next_avail_desc;
2017         for (int j = 0; j < nsegs; j++) {
2018                 bus_size_t seg_len;
2019                 bus_addr_t seg_addr;
2020
2021                 tx_buffer = &txr->tx_buffers[i];
2022                 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
2023                 seg_addr = segs[j].ds_addr;
2024                 seg_len  = segs[j].ds_len;
2025
2026                 txd->read.buffer_addr = htole64(seg_addr);
2027                 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
2028                 txd->read.olinfo_status = htole32(olinfo_status);
2029                 last = i;
2030                 if (++i == adapter->num_tx_desc)
2031                         i = 0;
2032                 tx_buffer->m_head = NULL;
2033                 tx_buffer->next_eop = -1;
2034         }
2035
2036         txr->next_avail_desc = i;
2037         txr->tx_avail -= nsegs;
2038         tx_buffer->m_head = m_head;
2039
2040         /*
2041         ** Here we swap the map so the last descriptor,
2042         ** which gets the completion interrupt has the
2043         ** real map, and the first descriptor gets the
2044         ** unused map from this descriptor.
2045         */
2046         tx_buffer_mapped->map = tx_buffer->map;
2047         tx_buffer->map = map;
2048         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2049
2050         /*
2051          * Last Descriptor of Packet
2052          * needs End Of Packet (EOP)
2053          * and Report Status (RS)
2054          */
2055         txd->read.cmd_type_len |=
2056             htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
2057         /*
2058          * Keep track in the first buffer which
2059          * descriptor will be written back
2060          */
2061         tx_buffer = &txr->tx_buffers[first];
2062         tx_buffer->next_eop = last;
2063         /* Update the watchdog time early and often */
2064         txr->watchdog_time = ticks;
2065
2066         /*
2067          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2068          * that this frame is available to transmit.
2069          */
2070         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2071             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2072         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2073         ++txr->tx_packets;
2074
2075         return (0);
2076 }
2077 static void
2078 igb_set_promisc(struct adapter *adapter)
2079 {
2080         struct ifnet    *ifp = adapter->ifp;
2081         struct e1000_hw *hw = &adapter->hw;
2082         u32             reg;
2083
2084         if (adapter->vf_ifp) {
2085                 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
2086                 return;
2087         }
2088
2089         reg = E1000_READ_REG(hw, E1000_RCTL);
2090         if (ifp->if_flags & IFF_PROMISC) {
2091                 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2092                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
2093         } else if (ifp->if_flags & IFF_ALLMULTI) {
2094                 reg |= E1000_RCTL_MPE;
2095                 reg &= ~E1000_RCTL_UPE;
2096                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
2097         }
2098 }
2099
2100 static void
2101 igb_disable_promisc(struct adapter *adapter)
2102 {
2103         struct e1000_hw *hw = &adapter->hw;
2104         struct ifnet    *ifp = adapter->ifp;
2105         u32             reg;
2106         int             mcnt = 0;
2107
2108         if (adapter->vf_ifp) {
2109                 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2110                 return;
2111         }
2112         reg = E1000_READ_REG(hw, E1000_RCTL);
2113         reg &=  (~E1000_RCTL_UPE);
2114         if (ifp->if_flags & IFF_ALLMULTI)
2115                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2116         else {
2117                 struct  ifmultiaddr *ifma;
2118 #if __FreeBSD_version < 800000
2119                 IF_ADDR_LOCK(ifp);
2120 #else   
2121                 if_maddr_rlock(ifp);
2122 #endif
2123                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2124                         if (ifma->ifma_addr->sa_family != AF_LINK)
2125                                 continue;
2126                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2127                                 break;
2128                         mcnt++;
2129                 }
2130 #if __FreeBSD_version < 800000
2131                 IF_ADDR_UNLOCK(ifp);
2132 #else
2133                 if_maddr_runlock(ifp);
2134 #endif
2135         }
2136         /* Don't disable if in MAX groups */
2137         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2138                 reg &=  (~E1000_RCTL_MPE);
2139         E1000_WRITE_REG(hw, E1000_RCTL, reg);
2140 }
2141
2142
2143 /*********************************************************************
2144  *  Multicast Update
2145  *
2146  *  This routine is called whenever multicast address list is updated.
2147  *
2148  **********************************************************************/
2149
2150 static void
2151 igb_set_multi(struct adapter *adapter)
2152 {
2153         struct ifnet    *ifp = adapter->ifp;
2154         struct ifmultiaddr *ifma;
2155         u32 reg_rctl = 0;
2156         u8  *mta;
2157
2158         int mcnt = 0;
2159
2160         IOCTL_DEBUGOUT("igb_set_multi: begin");
2161
2162         mta = adapter->mta;
2163         bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2164             MAX_NUM_MULTICAST_ADDRESSES);
2165
2166 #if __FreeBSD_version < 800000
2167         IF_ADDR_LOCK(ifp);
2168 #else
2169         if_maddr_rlock(ifp);
2170 #endif
2171         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2172                 if (ifma->ifma_addr->sa_family != AF_LINK)
2173                         continue;
2174
2175                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2176                         break;
2177
2178                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2179                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2180                 mcnt++;
2181         }
2182 #if __FreeBSD_version < 800000
2183         IF_ADDR_UNLOCK(ifp);
2184 #else
2185         if_maddr_runlock(ifp);
2186 #endif
2187
2188         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2189                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2190                 reg_rctl |= E1000_RCTL_MPE;
2191                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2192         } else
2193                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2194 }
2195
2196
2197 /*********************************************************************
2198  *  Timer routine:
2199  *      This routine checks for link status,
2200  *      updates statistics, and does the watchdog.
2201  *
2202  **********************************************************************/
2203
2204 static void
2205 igb_local_timer(void *arg)
2206 {
2207         struct adapter          *adapter = arg;
2208         device_t                dev = adapter->dev;
2209         struct ifnet            *ifp = adapter->ifp;
2210         struct tx_ring          *txr = adapter->tx_rings;
2211         struct igb_queue        *que = adapter->queues;
2212         int                     hung = 0, busy = 0;
2213
2214
2215         IGB_CORE_LOCK_ASSERT(adapter);
2216
2217         igb_update_link_status(adapter);
2218         igb_update_stats_counters(adapter);
2219
2220         /*
2221         ** Check the TX queues status
2222         **      - central locked handling of OACTIVE
2223         **      - watchdog only if all queues show hung
2224         */
2225         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2226                 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2227                     (adapter->pause_frames == 0))
2228                         ++hung;
2229                 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2230                         ++busy;
2231                 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2232                         taskqueue_enqueue(que->tq, &que->que_task);
2233         }
2234         if (hung == adapter->num_queues)
2235                 goto timeout;
2236         if (busy == adapter->num_queues)
2237                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2238         else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2239             (busy < adapter->num_queues))
2240                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2241
2242         adapter->pause_frames = 0;
2243         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2244 #ifndef DEVICE_POLLING
2245         /* Schedule all queue interrupts - deadlock protection */
2246         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2247 #endif
2248         return;
2249
2250 timeout:
2251         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2252         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2253             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2254             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2255         device_printf(dev,"TX(%d) desc avail = %d,"
2256             "Next TX to Clean = %d\n",
2257             txr->me, txr->tx_avail, txr->next_to_clean);
2258         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2259         adapter->watchdog_events++;
2260         igb_init_locked(adapter);
2261 }
2262
2263 static void
2264 igb_update_link_status(struct adapter *adapter)
2265 {
2266         struct e1000_hw         *hw = &adapter->hw;
2267         struct e1000_fc_info    *fc = &hw->fc;
2268         struct ifnet            *ifp = adapter->ifp;
2269         device_t                dev = adapter->dev;
2270         struct tx_ring          *txr = adapter->tx_rings;
2271         u32                     link_check, thstat, ctrl;
2272         char                    *flowctl = NULL;
2273
2274         link_check = thstat = ctrl = 0;
2275
2276         /* Get the cached link value or read for real */
2277         switch (hw->phy.media_type) {
2278         case e1000_media_type_copper:
2279                 if (hw->mac.get_link_status) {
2280                         /* Do the work to read phy */
2281                         e1000_check_for_link(hw);
2282                         link_check = !hw->mac.get_link_status;
2283                 } else
2284                         link_check = TRUE;
2285                 break;
2286         case e1000_media_type_fiber:
2287                 e1000_check_for_link(hw);
2288                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2289                                  E1000_STATUS_LU);
2290                 break;
2291         case e1000_media_type_internal_serdes:
2292                 e1000_check_for_link(hw);
2293                 link_check = adapter->hw.mac.serdes_has_link;
2294                 break;
2295         /* VF device is type_unknown */
2296         case e1000_media_type_unknown:
2297                 e1000_check_for_link(hw);
2298                 link_check = !hw->mac.get_link_status;
2299                 /* Fall thru */
2300         default:
2301                 break;
2302         }
2303
2304         /* Check for thermal downshift or shutdown */
2305         if (hw->mac.type == e1000_i350) {
2306                 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2307                 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2308         }
2309
2310         /* Get the flow control for display */
2311         switch (fc->current_mode) {
2312         case e1000_fc_rx_pause:
2313                 flowctl = "RX";
2314                 break;  
2315         case e1000_fc_tx_pause:
2316                 flowctl = "TX";
2317                 break;  
2318         case e1000_fc_full:
2319                 flowctl = "Full";
2320                 break;  
2321         case e1000_fc_none:
2322         default:
2323                 flowctl = "None";
2324                 break;  
2325         }
2326
2327         /* Now we check if a transition has happened */
2328         if (link_check && (adapter->link_active == 0)) {
2329                 e1000_get_speed_and_duplex(&adapter->hw, 
2330                     &adapter->link_speed, &adapter->link_duplex);
2331                 if (bootverbose)
2332                         device_printf(dev, "Link is up %d Mbps %s,"
2333                             " Flow Control: %s\n",
2334                             adapter->link_speed,
2335                             ((adapter->link_duplex == FULL_DUPLEX) ?
2336                             "Full Duplex" : "Half Duplex"), flowctl);
2337                 adapter->link_active = 1;
2338                 ifp->if_baudrate = adapter->link_speed * 1000000;
2339                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2340                     (thstat & E1000_THSTAT_LINK_THROTTLE))
2341                         device_printf(dev, "Link: thermal downshift\n");
2342                 /* This can sleep */
2343                 if_link_state_change(ifp, LINK_STATE_UP);
2344         } else if (!link_check && (adapter->link_active == 1)) {
2345                 ifp->if_baudrate = adapter->link_speed = 0;
2346                 adapter->link_duplex = 0;
2347                 if (bootverbose)
2348                         device_printf(dev, "Link is Down\n");
2349                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2350                     (thstat & E1000_THSTAT_PWR_DOWN))
2351                         device_printf(dev, "Link: thermal shutdown\n");
2352                 adapter->link_active = 0;
2353                 /* This can sleep */
2354                 if_link_state_change(ifp, LINK_STATE_DOWN);
2355                 /* Reset queue state */
2356                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2357                         txr->queue_status = IGB_QUEUE_IDLE;
2358         }
2359 }
2360
2361 /*********************************************************************
2362  *
2363  *  This routine disables all traffic on the adapter by issuing a
2364  *  global reset on the MAC and deallocates TX/RX buffers.
2365  *
2366  **********************************************************************/
2367
2368 static void
2369 igb_stop(void *arg)
2370 {
2371         struct adapter  *adapter = arg;
2372         struct ifnet    *ifp = adapter->ifp;
2373         struct tx_ring *txr = adapter->tx_rings;
2374
2375         IGB_CORE_LOCK_ASSERT(adapter);
2376
2377         INIT_DEBUGOUT("igb_stop: begin");
2378
2379         igb_disable_intr(adapter);
2380
2381         callout_stop(&adapter->timer);
2382
2383         /* Tell the stack that the interface is no longer active */
2384         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2385         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2386
2387         /* Disarm watchdog timer. */
2388         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2389                 IGB_TX_LOCK(txr);
2390                 txr->queue_status = IGB_QUEUE_IDLE;
2391                 IGB_TX_UNLOCK(txr);
2392         }
2393
2394         e1000_reset_hw(&adapter->hw);
2395         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2396
2397         e1000_led_off(&adapter->hw);
2398         e1000_cleanup_led(&adapter->hw);
2399 }
2400
2401
2402 /*********************************************************************
2403  *
2404  *  Determine hardware revision.
2405  *
2406  **********************************************************************/
2407 static void
2408 igb_identify_hardware(struct adapter *adapter)
2409 {
2410         device_t dev = adapter->dev;
2411
2412         /* Make sure our PCI config space has the necessary stuff set */
2413         pci_enable_busmaster(dev);
2414         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2415
2416         /* Save off the information about this board */
2417         adapter->hw.vendor_id = pci_get_vendor(dev);
2418         adapter->hw.device_id = pci_get_device(dev);
2419         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2420         adapter->hw.subsystem_vendor_id =
2421             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2422         adapter->hw.subsystem_device_id =
2423             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2424
2425         /* Set MAC type early for PCI setup */
2426         e1000_set_mac_type(&adapter->hw);
2427
2428         /* Are we a VF device? */
2429         if ((adapter->hw.mac.type == e1000_vfadapt) ||
2430             (adapter->hw.mac.type == e1000_vfadapt_i350))
2431                 adapter->vf_ifp = 1;
2432         else
2433                 adapter->vf_ifp = 0;
2434 }
2435
2436 static int
2437 igb_allocate_pci_resources(struct adapter *adapter)
2438 {
2439         device_t        dev = adapter->dev;
2440         int             rid;
2441
2442         rid = PCIR_BAR(0);
2443         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2444             &rid, RF_ACTIVE);
2445         if (adapter->pci_mem == NULL) {
2446                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2447                 return (ENXIO);
2448         }
2449         adapter->osdep.mem_bus_space_tag =
2450             rman_get_bustag(adapter->pci_mem);
2451         adapter->osdep.mem_bus_space_handle =
2452             rman_get_bushandle(adapter->pci_mem);
2453         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2454
2455         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2456
2457         /* This will setup either MSI/X or MSI */
2458         adapter->msix = igb_setup_msix(adapter);
2459         adapter->hw.back = &adapter->osdep;
2460
2461         return (0);
2462 }
2463
2464 /*********************************************************************
2465  *
2466  *  Setup the Legacy or MSI Interrupt handler
2467  *
2468  **********************************************************************/
2469 static int
2470 igb_allocate_legacy(struct adapter *adapter)
2471 {
2472         device_t                dev = adapter->dev;
2473         struct igb_queue        *que = adapter->queues;
2474         int                     error, rid = 0;
2475
2476         /* Turn off all interrupts */
2477         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2478
2479         /* MSI RID is 1 */
2480         if (adapter->msix == 1)
2481                 rid = 1;
2482
2483         /* We allocate a single interrupt resource */
2484         adapter->res = bus_alloc_resource_any(dev,
2485             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2486         if (adapter->res == NULL) {
2487                 device_printf(dev, "Unable to allocate bus resource: "
2488                     "interrupt\n");
2489                 return (ENXIO);
2490         }
2491
2492 #ifndef IGB_LEGACY_TX
2493         TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start, que->txr);
2494 #endif
2495
2496         /*
2497          * Try allocating a fast interrupt and the associated deferred
2498          * processing contexts.
2499          */
2500         TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2501         /* Make tasklet for deferred link handling */
2502         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2503         que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2504             taskqueue_thread_enqueue, &que->tq);
2505         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2506             device_get_nameunit(adapter->dev));
2507         if ((error = bus_setup_intr(dev, adapter->res,
2508             INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2509             adapter, &adapter->tag)) != 0) {
2510                 device_printf(dev, "Failed to register fast interrupt "
2511                             "handler: %d\n", error);
2512                 taskqueue_free(que->tq);
2513                 que->tq = NULL;
2514                 return (error);
2515         }
2516
2517         return (0);
2518 }
2519
2520
2521 /*********************************************************************
2522  *
2523  *  Setup the MSIX Queue Interrupt handlers: 
2524  *
2525  **********************************************************************/
2526 static int
2527 igb_allocate_msix(struct adapter *adapter)
2528 {
2529         device_t                dev = adapter->dev;
2530         struct igb_queue        *que = adapter->queues;
2531         int                     error, rid, vector = 0;
2532
2533         /* Be sure to start with all interrupts disabled */
2534         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2535         E1000_WRITE_FLUSH(&adapter->hw);
2536
2537         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2538                 rid = vector +1;
2539                 que->res = bus_alloc_resource_any(dev,
2540                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2541                 if (que->res == NULL) {
2542                         device_printf(dev,
2543                             "Unable to allocate bus resource: "
2544                             "MSIX Queue Interrupt\n");
2545                         return (ENXIO);
2546                 }
2547                 error = bus_setup_intr(dev, que->res,
2548                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2549                     igb_msix_que, que, &que->tag);
2550                 if (error) {
2551                         que->res = NULL;
2552                         device_printf(dev, "Failed to register Queue handler");
2553                         return (error);
2554                 }
2555 #if __FreeBSD_version >= 800504
2556                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2557 #endif
2558                 que->msix = vector;
2559                 if (adapter->hw.mac.type == e1000_82575)
2560                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2561                 else
2562                         que->eims = 1 << vector;
2563                 /*
2564                 ** Bind the msix vector, and thus the
2565                 ** rings to the corresponding cpu.
2566                 */
2567                 if (adapter->num_queues > 1) {
2568                         if (igb_last_bind_cpu < 0)
2569                                 igb_last_bind_cpu = CPU_FIRST();
2570                         bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2571                         device_printf(dev,
2572                                 "Bound queue %d to cpu %d\n",
2573                                 i,igb_last_bind_cpu);
2574                         igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2575                 }
2576 #ifndef IGB_LEGACY_TX
2577                 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2578                     que->txr);
2579 #endif
2580                 /* Make tasklet for deferred handling */
2581                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2582                 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2583                     taskqueue_thread_enqueue, &que->tq);
2584                 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2585                     device_get_nameunit(adapter->dev));
2586         }
2587
2588         /* And Link */
2589         rid = vector + 1;
2590         adapter->res = bus_alloc_resource_any(dev,
2591             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2592         if (adapter->res == NULL) {
2593                 device_printf(dev,
2594                     "Unable to allocate bus resource: "
2595                     "MSIX Link Interrupt\n");
2596                 return (ENXIO);
2597         }
2598         if ((error = bus_setup_intr(dev, adapter->res,
2599             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2600             igb_msix_link, adapter, &adapter->tag)) != 0) {
2601                 device_printf(dev, "Failed to register Link handler");
2602                 return (error);
2603         }
2604 #if __FreeBSD_version >= 800504
2605         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2606 #endif
2607         adapter->linkvec = vector;
2608
2609         return (0);
2610 }
2611
2612
2613 static void
2614 igb_configure_queues(struct adapter *adapter)
2615 {
2616         struct  e1000_hw        *hw = &adapter->hw;
2617         struct  igb_queue       *que;
2618         u32                     tmp, ivar = 0, newitr = 0;
2619
2620         /* First turn on RSS capability */
2621         if (adapter->hw.mac.type != e1000_82575)
2622                 E1000_WRITE_REG(hw, E1000_GPIE,
2623                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2624                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2625
2626         /* Turn on MSIX */
2627         switch (adapter->hw.mac.type) {
2628         case e1000_82580:
2629         case e1000_i350:
2630         case e1000_i210:
2631         case e1000_i211:
2632         case e1000_vfadapt:
2633         case e1000_vfadapt_i350:
2634                 /* RX entries */
2635                 for (int i = 0; i < adapter->num_queues; i++) {
2636                         u32 index = i >> 1;
2637                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2638                         que = &adapter->queues[i];
2639                         if (i & 1) {
2640                                 ivar &= 0xFF00FFFF;
2641                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2642                         } else {
2643                                 ivar &= 0xFFFFFF00;
2644                                 ivar |= que->msix | E1000_IVAR_VALID;
2645                         }
2646                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2647                 }
2648                 /* TX entries */
2649                 for (int i = 0; i < adapter->num_queues; i++) {
2650                         u32 index = i >> 1;
2651                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2652                         que = &adapter->queues[i];
2653                         if (i & 1) {
2654                                 ivar &= 0x00FFFFFF;
2655                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2656                         } else {
2657                                 ivar &= 0xFFFF00FF;
2658                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2659                         }
2660                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2661                         adapter->que_mask |= que->eims;
2662                 }
2663
2664                 /* And for the link interrupt */
2665                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2666                 adapter->link_mask = 1 << adapter->linkvec;
2667                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2668                 break;
2669         case e1000_82576:
2670                 /* RX entries */
2671                 for (int i = 0; i < adapter->num_queues; i++) {
2672                         u32 index = i & 0x7; /* Each IVAR has two entries */
2673                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2674                         que = &adapter->queues[i];
2675                         if (i < 8) {
2676                                 ivar &= 0xFFFFFF00;
2677                                 ivar |= que->msix | E1000_IVAR_VALID;
2678                         } else {
2679                                 ivar &= 0xFF00FFFF;
2680                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2681                         }
2682                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2683                         adapter->que_mask |= que->eims;
2684                 }
2685                 /* TX entries */
2686                 for (int i = 0; i < adapter->num_queues; i++) {
2687                         u32 index = i & 0x7; /* Each IVAR has two entries */
2688                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2689                         que = &adapter->queues[i];
2690                         if (i < 8) {
2691                                 ivar &= 0xFFFF00FF;
2692                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2693                         } else {
2694                                 ivar &= 0x00FFFFFF;
2695                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2696                         }
2697                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2698                         adapter->que_mask |= que->eims;
2699                 }
2700
2701                 /* And for the link interrupt */
2702                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2703                 adapter->link_mask = 1 << adapter->linkvec;
2704                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2705                 break;
2706
2707         case e1000_82575:
2708                 /* enable MSI-X support*/
2709                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2710                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2711                 /* Auto-Mask interrupts upon ICR read. */
2712                 tmp |= E1000_CTRL_EXT_EIAME;
2713                 tmp |= E1000_CTRL_EXT_IRCA;
2714                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2715
2716                 /* Queues */
2717                 for (int i = 0; i < adapter->num_queues; i++) {
2718                         que = &adapter->queues[i];
2719                         tmp = E1000_EICR_RX_QUEUE0 << i;
2720                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2721                         que->eims = tmp;
2722                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2723                             i, que->eims);
2724                         adapter->que_mask |= que->eims;
2725                 }
2726
2727                 /* Link */
2728                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2729                     E1000_EIMS_OTHER);
2730                 adapter->link_mask |= E1000_EIMS_OTHER;
2731         default:
2732                 break;
2733         }
2734
2735         /* Set the starting interrupt rate */
2736         if (igb_max_interrupt_rate > 0)
2737                 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2738
2739         if (hw->mac.type == e1000_82575)
2740                 newitr |= newitr << 16;
2741         else
2742                 newitr |= E1000_EITR_CNT_IGNR;
2743
2744         for (int i = 0; i < adapter->num_queues; i++) {
2745                 que = &adapter->queues[i];
2746                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2747         }
2748
2749         return;
2750 }
2751
2752
2753 static void
2754 igb_free_pci_resources(struct adapter *adapter)
2755 {
2756         struct          igb_queue *que = adapter->queues;
2757         device_t        dev = adapter->dev;
2758         int             rid;
2759
2760         /*
2761         ** There is a slight possibility of a failure mode
2762         ** in attach that will result in entering this function
2763         ** before interrupt resources have been initialized, and
2764         ** in that case we do not want to execute the loops below
2765         ** We can detect this reliably by the state of the adapter
2766         ** res pointer.
2767         */
2768         if (adapter->res == NULL)
2769                 goto mem;
2770
2771         /*
2772          * First release all the interrupt resources:
2773          */
2774         for (int i = 0; i < adapter->num_queues; i++, que++) {
2775                 rid = que->msix + 1;
2776                 if (que->tag != NULL) {
2777                         bus_teardown_intr(dev, que->res, que->tag);
2778                         que->tag = NULL;
2779                 }
2780                 if (que->res != NULL)
2781                         bus_release_resource(dev,
2782                             SYS_RES_IRQ, rid, que->res);
2783         }
2784
2785         /* Clean the Legacy or Link interrupt last */
2786         if (adapter->linkvec) /* we are doing MSIX */
2787                 rid = adapter->linkvec + 1;
2788         else
2789                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2790
2791         que = adapter->queues;
2792         if (adapter->tag != NULL) {
2793                 taskqueue_drain(que->tq, &adapter->link_task);
2794                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2795                 adapter->tag = NULL;
2796         }
2797         if (adapter->res != NULL)
2798                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2799
2800         for (int i = 0; i < adapter->num_queues; i++, que++) {
2801                 if (que->tq != NULL) {
2802 #ifndef IGB_LEGACY_TX
2803                         taskqueue_drain(que->tq, &que->txr->txq_task);
2804 #endif
2805                         taskqueue_drain(que->tq, &que->que_task);
2806                         taskqueue_free(que->tq);
2807                 }
2808         }
2809 mem:
2810         if (adapter->msix)
2811                 pci_release_msi(dev);
2812
2813         if (adapter->msix_mem != NULL)
2814                 bus_release_resource(dev, SYS_RES_MEMORY,
2815                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2816
2817         if (adapter->pci_mem != NULL)
2818                 bus_release_resource(dev, SYS_RES_MEMORY,
2819                     PCIR_BAR(0), adapter->pci_mem);
2820
2821 }
2822
2823 /*
2824  * Setup Either MSI/X or MSI
2825  */
2826 static int
2827 igb_setup_msix(struct adapter *adapter)
2828 {
2829         device_t dev = adapter->dev;
2830         int rid, want, queues, msgs, maxqueues;
2831
2832         /* tuneable override */
2833         if (igb_enable_msix == 0)
2834                 goto msi;
2835
2836         /* First try MSI/X */
2837         rid = PCIR_BAR(IGB_MSIX_BAR);
2838         adapter->msix_mem = bus_alloc_resource_any(dev,
2839             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2840         if (!adapter->msix_mem) {
2841                 /* May not be enabled */
2842                 device_printf(adapter->dev,
2843                     "Unable to map MSIX table \n");
2844                 goto msi;
2845         }
2846
2847         msgs = pci_msix_count(dev); 
2848         if (msgs == 0) { /* system has msix disabled */
2849                 bus_release_resource(dev, SYS_RES_MEMORY,
2850                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2851                 adapter->msix_mem = NULL;
2852                 goto msi;
2853         }
2854
2855         /* Figure out a reasonable auto config value */
2856         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2857
2858         /* Manual override */
2859         if (igb_num_queues != 0)
2860                 queues = igb_num_queues;
2861
2862         /* Sanity check based on HW */
2863         switch (adapter->hw.mac.type) {
2864                 case e1000_82575:
2865                         maxqueues = 4;
2866                         break;
2867                 case e1000_82576:
2868                 case e1000_82580:
2869                 case e1000_i350:
2870                         maxqueues = 8;
2871                         break;
2872                 case e1000_i210:
2873                         maxqueues = 4;
2874                         break;
2875                 case e1000_i211:
2876                         maxqueues = 2;
2877                         break;
2878                 default:  /* VF interfaces */
2879                         maxqueues = 1;
2880                         break;
2881         }
2882         if (queues > maxqueues)
2883                 queues = maxqueues;
2884
2885         /*
2886         ** One vector (RX/TX pair) per queue
2887         ** plus an additional for Link interrupt
2888         */
2889         want = queues + 1;
2890         if (msgs >= want)
2891                 msgs = want;
2892         else {
2893                 device_printf(adapter->dev,
2894                     "MSIX Configuration Problem, "
2895                     "%d vectors configured, but %d queues wanted!\n",
2896                     msgs, want);
2897                 return (0);
2898         }
2899         if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2900                 device_printf(adapter->dev,
2901                     "Using MSIX interrupts with %d vectors\n", msgs);
2902                 adapter->num_queues = queues;
2903                 return (msgs);
2904         }
2905 msi:
2906         msgs = pci_msi_count(dev);
2907         if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0) {
2908                 device_printf(adapter->dev," Using MSI interrupt\n");
2909                 return (msgs);
2910         }
2911         return (0);
2912 }
2913
2914 /*********************************************************************
2915  *
2916  *  Set up an fresh starting state
2917  *
2918  **********************************************************************/
2919 static void
2920 igb_reset(struct adapter *adapter)
2921 {
2922         device_t        dev = adapter->dev;
2923         struct e1000_hw *hw = &adapter->hw;
2924         struct e1000_fc_info *fc = &hw->fc;
2925         struct ifnet    *ifp = adapter->ifp;
2926         u32             pba = 0;
2927         u16             hwm;
2928
2929         INIT_DEBUGOUT("igb_reset: begin");
2930
2931         /* Let the firmware know the OS is in control */
2932         igb_get_hw_control(adapter);
2933
2934         /*
2935          * Packet Buffer Allocation (PBA)
2936          * Writing PBA sets the receive portion of the buffer
2937          * the remainder is used for the transmit buffer.
2938          */
2939         switch (hw->mac.type) {
2940         case e1000_82575:
2941                 pba = E1000_PBA_32K;
2942                 break;
2943         case e1000_82576:
2944         case e1000_vfadapt:
2945                 pba = E1000_READ_REG(hw, E1000_RXPBS);
2946                 pba &= E1000_RXPBS_SIZE_MASK_82576;
2947                 break;
2948         case e1000_82580:
2949         case e1000_i350:
2950         case e1000_vfadapt_i350:
2951                 pba = E1000_READ_REG(hw, E1000_RXPBS);
2952                 pba = e1000_rxpbs_adjust_82580(pba);
2953                 break;
2954         case e1000_i210:
2955         case e1000_i211:
2956                 pba = E1000_PBA_34K;
2957         default:
2958                 break;
2959         }
2960
2961         /* Special needs in case of Jumbo frames */
2962         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2963                 u32 tx_space, min_tx, min_rx;
2964                 pba = E1000_READ_REG(hw, E1000_PBA);
2965                 tx_space = pba >> 16;
2966                 pba &= 0xffff;
2967                 min_tx = (adapter->max_frame_size +
2968                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2969                 min_tx = roundup2(min_tx, 1024);
2970                 min_tx >>= 10;
2971                 min_rx = adapter->max_frame_size;
2972                 min_rx = roundup2(min_rx, 1024);
2973                 min_rx >>= 10;
2974                 if (tx_space < min_tx &&
2975                     ((min_tx - tx_space) < pba)) {
2976                         pba = pba - (min_tx - tx_space);
2977                         /*
2978                          * if short on rx space, rx wins
2979                          * and must trump tx adjustment
2980                          */
2981                         if (pba < min_rx)
2982                                 pba = min_rx;
2983                 }
2984                 E1000_WRITE_REG(hw, E1000_PBA, pba);
2985         }
2986
2987         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2988
2989         /*
2990          * These parameters control the automatic generation (Tx) and
2991          * response (Rx) to Ethernet PAUSE frames.
2992          * - High water mark should allow for at least two frames to be
2993          *   received after sending an XOFF.
2994          * - Low water mark works best when it is very near the high water mark.
2995          *   This allows the receiver to restart by sending XON when it has
2996          *   drained a bit.
2997          */
2998         hwm = min(((pba << 10) * 9 / 10),
2999             ((pba << 10) - 2 * adapter->max_frame_size));
3000
3001         if (hw->mac.type < e1000_82576) {
3002                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3003                 fc->low_water = fc->high_water - 8;
3004         } else {
3005                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3006                 fc->low_water = fc->high_water - 16;
3007         }
3008
3009         fc->pause_time = IGB_FC_PAUSE_TIME;
3010         fc->send_xon = TRUE;
3011         if (adapter->fc)
3012                 fc->requested_mode = adapter->fc;
3013         else
3014                 fc->requested_mode = e1000_fc_default;
3015
3016         /* Issue a global reset */
3017         e1000_reset_hw(hw);
3018         E1000_WRITE_REG(hw, E1000_WUC, 0);
3019
3020         if (e1000_init_hw(hw) < 0)
3021                 device_printf(dev, "Hardware Initialization Failed\n");
3022
3023         /* Setup DMA Coalescing */
3024         if ((hw->mac.type > e1000_82580) &&
3025             (hw->mac.type != e1000_i211)) {
3026                 u32 dmac;
3027                 u32 reg = ~E1000_DMACR_DMAC_EN;
3028
3029                 if (adapter->dmac == 0) { /* Disabling it */
3030                         E1000_WRITE_REG(hw, E1000_DMACR, reg);
3031                         goto reset_out;
3032                 }
3033
3034                 /* Set starting thresholds */
3035                 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
3036                 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
3037
3038                 hwm = 64 * pba - adapter->max_frame_size / 16;
3039                 if (hwm < 64 * (pba - 6))
3040                         hwm = 64 * (pba - 6);
3041                 reg = E1000_READ_REG(hw, E1000_FCRTC);
3042                 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
3043                 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
3044                     & E1000_FCRTC_RTH_COAL_MASK);
3045                 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
3046
3047
3048                 dmac = pba - adapter->max_frame_size / 512;
3049                 if (dmac < pba - 10)
3050                         dmac = pba - 10;
3051                 reg = E1000_READ_REG(hw, E1000_DMACR);
3052                 reg &= ~E1000_DMACR_DMACTHR_MASK;
3053                 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
3054                     & E1000_DMACR_DMACTHR_MASK);
3055                 /* transition to L0x or L1 if available..*/
3056                 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
3057                 /* timer = value in adapter->dmac in 32usec intervals */
3058                 reg |= (adapter->dmac >> 5);
3059                 E1000_WRITE_REG(hw, E1000_DMACR, reg);
3060
3061                 /* Set the interval before transition */
3062                 reg = E1000_READ_REG(hw, E1000_DMCTLX);
3063                 reg |= 0x80000004;
3064                 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3065
3066                 /* free space in tx packet buffer to wake from DMA coal */
3067                 E1000_WRITE_REG(hw, E1000_DMCTXTH,
3068                     (20480 - (2 * adapter->max_frame_size)) >> 6);
3069
3070                 /* make low power state decision controlled by DMA coal */
3071                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3072                 reg &= ~E1000_PCIEMISC_LX_DECISION;
3073                 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3074                 device_printf(dev, "DMA Coalescing enabled\n");
3075
3076         } else if (hw->mac.type == e1000_82580) {
3077                 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3078                 E1000_WRITE_REG(hw, E1000_DMACR, 0);
3079                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
3080                     reg & ~E1000_PCIEMISC_LX_DECISION);
3081         }
3082
3083 reset_out:
3084         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3085         e1000_get_phy_info(hw);
3086         e1000_check_for_link(hw);
3087         return;
3088 }
3089
3090 /*********************************************************************
3091  *
3092  *  Setup networking device structure and register an interface.
3093  *
3094  **********************************************************************/
3095 static int
3096 igb_setup_interface(device_t dev, struct adapter *adapter)
3097 {
3098         struct ifnet   *ifp;
3099
3100         INIT_DEBUGOUT("igb_setup_interface: begin");
3101
3102         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3103         if (ifp == NULL) {
3104                 device_printf(dev, "can not allocate ifnet structure\n");
3105                 return (-1);
3106         }
3107         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3108         ifp->if_init =  igb_init;
3109         ifp->if_softc = adapter;
3110         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3111         ifp->if_ioctl = igb_ioctl;
3112 #ifndef IGB_LEGACY_TX
3113         ifp->if_transmit = igb_mq_start;
3114         ifp->if_qflush = igb_qflush;
3115 #else
3116         ifp->if_start = igb_start;
3117         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3118         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3119         IFQ_SET_READY(&ifp->if_snd);
3120 #endif
3121
3122         ether_ifattach(ifp, adapter->hw.mac.addr);
3123
3124         ifp->if_capabilities = ifp->if_capenable = 0;
3125
3126         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3127         ifp->if_capabilities |= IFCAP_TSO4;
3128         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3129         ifp->if_capenable = ifp->if_capabilities;
3130
3131         /* Don't enable LRO by default */
3132         ifp->if_capabilities |= IFCAP_LRO;
3133
3134 #ifdef DEVICE_POLLING
3135         ifp->if_capabilities |= IFCAP_POLLING;
3136 #endif
3137
3138         /*
3139          * Tell the upper layer(s) we
3140          * support full VLAN capability.
3141          */
3142         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3143         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3144                              |  IFCAP_VLAN_HWTSO
3145                              |  IFCAP_VLAN_MTU;
3146         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3147                           |  IFCAP_VLAN_HWTSO
3148                           |  IFCAP_VLAN_MTU;
3149
3150         /*
3151         ** Don't turn this on by default, if vlans are
3152         ** created on another pseudo device (eg. lagg)
3153         ** then vlan events are not passed thru, breaking
3154         ** operation, but with HW FILTER off it works. If
3155         ** using vlans directly on the igb driver you can
3156         ** enable this and get full hardware tag filtering.
3157         */
3158         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3159
3160         /*
3161          * Specify the media types supported by this adapter and register
3162          * callbacks to update media and link information
3163          */
3164         ifmedia_init(&adapter->media, IFM_IMASK,
3165             igb_media_change, igb_media_status);
3166         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3167             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3168                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
3169                             0, NULL);
3170                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3171         } else {
3172                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3173                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3174                             0, NULL);
3175                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3176                             0, NULL);
3177                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3178                             0, NULL);
3179                 if (adapter->hw.phy.type != e1000_phy_ife) {
3180                         ifmedia_add(&adapter->media,
3181                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3182                         ifmedia_add(&adapter->media,
3183                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3184                 }
3185         }
3186         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3187         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3188         return (0);
3189 }
3190
3191
3192 /*
3193  * Manage DMA'able memory.
3194  */
3195 static void
3196 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3197 {
3198         if (error)
3199                 return;
3200         *(bus_addr_t *) arg = segs[0].ds_addr;
3201 }
3202
3203 static int
3204 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3205         struct igb_dma_alloc *dma, int mapflags)
3206 {
3207         int error;
3208
3209         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3210                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
3211                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3212                                 BUS_SPACE_MAXADDR,      /* highaddr */
3213                                 NULL, NULL,             /* filter, filterarg */
3214                                 size,                   /* maxsize */
3215                                 1,                      /* nsegments */
3216                                 size,                   /* maxsegsize */
3217                                 0,                      /* flags */
3218                                 NULL,                   /* lockfunc */
3219                                 NULL,                   /* lockarg */
3220                                 &dma->dma_tag);
3221         if (error) {
3222                 device_printf(adapter->dev,
3223                     "%s: bus_dma_tag_create failed: %d\n",
3224                     __func__, error);
3225                 goto fail_0;
3226         }
3227
3228         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3229             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3230         if (error) {
3231                 device_printf(adapter->dev,
3232                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3233                     __func__, (uintmax_t)size, error);
3234                 goto fail_2;
3235         }
3236
3237         dma->dma_paddr = 0;
3238         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3239             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3240         if (error || dma->dma_paddr == 0) {
3241                 device_printf(adapter->dev,
3242                     "%s: bus_dmamap_load failed: %d\n",
3243                     __func__, error);
3244                 goto fail_3;
3245         }
3246
3247         return (0);
3248
3249 fail_3:
3250         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3251 fail_2:
3252         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3253         bus_dma_tag_destroy(dma->dma_tag);
3254 fail_0:
3255         dma->dma_map = NULL;
3256         dma->dma_tag = NULL;
3257
3258         return (error);
3259 }
3260
3261 static void
3262 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3263 {
3264         if (dma->dma_tag == NULL)
3265                 return;
3266         if (dma->dma_map != NULL) {
3267                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3268                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3269                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3270                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3271                 dma->dma_map = NULL;
3272         }
3273         bus_dma_tag_destroy(dma->dma_tag);
3274         dma->dma_tag = NULL;
3275 }
3276
3277
3278 /*********************************************************************
3279  *
3280  *  Allocate memory for the transmit and receive rings, and then
3281  *  the descriptors associated with each, called only once at attach.
3282  *
3283  **********************************************************************/
3284 static int
3285 igb_allocate_queues(struct adapter *adapter)
3286 {
3287         device_t dev = adapter->dev;
3288         struct igb_queue        *que = NULL;
3289         struct tx_ring          *txr = NULL;
3290         struct rx_ring          *rxr = NULL;
3291         int rsize, tsize, error = E1000_SUCCESS;
3292         int txconf = 0, rxconf = 0;
3293
3294         /* First allocate the top level queue structs */
3295         if (!(adapter->queues =
3296             (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3297             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3298                 device_printf(dev, "Unable to allocate queue memory\n");
3299                 error = ENOMEM;
3300                 goto fail;
3301         }
3302
3303         /* Next allocate the TX ring struct memory */
3304         if (!(adapter->tx_rings =
3305             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3306             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3307                 device_printf(dev, "Unable to allocate TX ring memory\n");
3308                 error = ENOMEM;
3309                 goto tx_fail;
3310         }
3311
3312         /* Now allocate the RX */
3313         if (!(adapter->rx_rings =
3314             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3315             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3316                 device_printf(dev, "Unable to allocate RX ring memory\n");
3317                 error = ENOMEM;
3318                 goto rx_fail;
3319         }
3320
3321         tsize = roundup2(adapter->num_tx_desc *
3322             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3323         /*
3324          * Now set up the TX queues, txconf is needed to handle the
3325          * possibility that things fail midcourse and we need to
3326          * undo memory gracefully
3327          */ 
3328         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3329                 /* Set up some basics */
3330                 txr = &adapter->tx_rings[i];
3331                 txr->adapter = adapter;
3332                 txr->me = i;
3333
3334                 /* Initialize the TX lock */
3335                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3336                     device_get_nameunit(dev), txr->me);
3337                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3338
3339                 if (igb_dma_malloc(adapter, tsize,
3340                         &txr->txdma, BUS_DMA_NOWAIT)) {
3341                         device_printf(dev,
3342                             "Unable to allocate TX Descriptor memory\n");
3343                         error = ENOMEM;
3344                         goto err_tx_desc;
3345                 }
3346                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3347                 bzero((void *)txr->tx_base, tsize);
3348
3349                 /* Now allocate transmit buffers for the ring */
3350                 if (igb_allocate_transmit_buffers(txr)) {
3351                         device_printf(dev,
3352                             "Critical Failure setting up transmit buffers\n");
3353                         error = ENOMEM;
3354                         goto err_tx_desc;
3355                 }
3356 #ifndef IGB_LEGACY_TX
3357                 /* Allocate a buf ring */
3358                 txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3359                     M_WAITOK, &txr->tx_mtx);
3360 #endif
3361         }
3362
3363         /*
3364          * Next the RX queues...
3365          */ 
3366         rsize = roundup2(adapter->num_rx_desc *
3367             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3368         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3369                 rxr = &adapter->rx_rings[i];
3370                 rxr->adapter = adapter;
3371                 rxr->me = i;
3372
3373                 /* Initialize the RX lock */
3374                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3375                     device_get_nameunit(dev), txr->me);
3376                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3377
3378                 if (igb_dma_malloc(adapter, rsize,
3379                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3380                         device_printf(dev,
3381                             "Unable to allocate RxDescriptor memory\n");
3382                         error = ENOMEM;
3383                         goto err_rx_desc;
3384                 }
3385                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3386                 bzero((void *)rxr->rx_base, rsize);
3387
3388                 /* Allocate receive buffers for the ring*/
3389                 if (igb_allocate_receive_buffers(rxr)) {
3390                         device_printf(dev,
3391                             "Critical Failure setting up receive buffers\n");
3392                         error = ENOMEM;
3393                         goto err_rx_desc;
3394                 }
3395         }
3396
3397         /*
3398         ** Finally set up the queue holding structs
3399         */
3400         for (int i = 0; i < adapter->num_queues; i++) {
3401                 que = &adapter->queues[i];
3402                 que->adapter = adapter;
3403                 que->txr = &adapter->tx_rings[i];
3404                 que->rxr = &adapter->rx_rings[i];
3405         }
3406
3407         return (0);
3408
3409 err_rx_desc:
3410         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3411                 igb_dma_free(adapter, &rxr->rxdma);
3412 err_tx_desc:
3413         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3414                 igb_dma_free(adapter, &txr->txdma);
3415         free(adapter->rx_rings, M_DEVBUF);
3416 rx_fail:
3417 #ifndef IGB_LEGACY_TX
3418         buf_ring_free(txr->br, M_DEVBUF);
3419 #endif
3420         free(adapter->tx_rings, M_DEVBUF);
3421 tx_fail:
3422         free(adapter->queues, M_DEVBUF);
3423 fail:
3424         return (error);
3425 }
3426
3427 /*********************************************************************
3428  *
3429  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3430  *  the information needed to transmit a packet on the wire. This is
3431  *  called only once at attach, setup is done every reset.
3432  *
3433  **********************************************************************/
3434 static int
3435 igb_allocate_transmit_buffers(struct tx_ring *txr)
3436 {
3437         struct adapter *adapter = txr->adapter;
3438         device_t dev = adapter->dev;
3439         struct igb_tx_buffer *txbuf;
3440         int error, i;
3441
3442         /*
3443          * Setup DMA descriptor areas.
3444          */
3445         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3446                                1, 0,                    /* alignment, bounds */
3447                                BUS_SPACE_MAXADDR,       /* lowaddr */
3448                                BUS_SPACE_MAXADDR,       /* highaddr */
3449                                NULL, NULL,              /* filter, filterarg */
3450                                IGB_TSO_SIZE,            /* maxsize */
3451                                IGB_MAX_SCATTER,         /* nsegments */
3452                                PAGE_SIZE,               /* maxsegsize */
3453                                0,                       /* flags */
3454                                NULL,                    /* lockfunc */
3455                                NULL,                    /* lockfuncarg */
3456                                &txr->txtag))) {
3457                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3458                 goto fail;
3459         }
3460
3461         if (!(txr->tx_buffers =
3462             (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3463             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3464                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3465                 error = ENOMEM;
3466                 goto fail;
3467         }
3468
3469         /* Create the descriptor buffer dma maps */
3470         txbuf = txr->tx_buffers;
3471         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3472                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3473                 if (error != 0) {
3474                         device_printf(dev, "Unable to create TX DMA map\n");
3475                         goto fail;
3476                 }
3477         }
3478
3479         return 0;
3480 fail:
3481         /* We free all, it handles case where we are in the middle */
3482         igb_free_transmit_structures(adapter);
3483         return (error);
3484 }
3485
3486 /*********************************************************************
3487  *
3488  *  Initialize a transmit ring.
3489  *
3490  **********************************************************************/
3491 static void
3492 igb_setup_transmit_ring(struct tx_ring *txr)
3493 {
3494         struct adapter *adapter = txr->adapter;
3495         struct igb_tx_buffer *txbuf;
3496         int i;
3497 #ifdef DEV_NETMAP
3498         struct netmap_adapter *na = NA(adapter->ifp);
3499         struct netmap_slot *slot;
3500 #endif /* DEV_NETMAP */
3501
3502         /* Clear the old descriptor contents */
3503         IGB_TX_LOCK(txr);
3504 #ifdef DEV_NETMAP
3505         slot = netmap_reset(na, NR_TX, txr->me, 0);
3506 #endif /* DEV_NETMAP */
3507         bzero((void *)txr->tx_base,
3508               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3509         /* Reset indices */
3510         txr->next_avail_desc = 0;
3511         txr->next_to_clean = 0;
3512
3513         /* Free any existing tx buffers. */
3514         txbuf = txr->tx_buffers;
3515         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3516                 if (txbuf->m_head != NULL) {
3517                         bus_dmamap_sync(txr->txtag, txbuf->map,
3518                             BUS_DMASYNC_POSTWRITE);
3519                         bus_dmamap_unload(txr->txtag, txbuf->map);
3520                         m_freem(txbuf->m_head);
3521                         txbuf->m_head = NULL;
3522                 }
3523 #ifdef DEV_NETMAP
3524                 if (slot) {
3525                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3526                         /* no need to set the address */
3527                         netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3528                 }
3529 #endif /* DEV_NETMAP */
3530                 /* clear the watch index */
3531                 txbuf->next_eop = -1;
3532         }
3533
3534         /* Set number of descriptors available */
3535         txr->tx_avail = adapter->num_tx_desc;
3536
3537         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3538             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3539         IGB_TX_UNLOCK(txr);
3540 }
3541
3542 /*********************************************************************
3543  *
3544  *  Initialize all transmit rings.
3545  *
3546  **********************************************************************/
3547 static void
3548 igb_setup_transmit_structures(struct adapter *adapter)
3549 {
3550         struct tx_ring *txr = adapter->tx_rings;
3551
3552         for (int i = 0; i < adapter->num_queues; i++, txr++)
3553                 igb_setup_transmit_ring(txr);
3554
3555         return;
3556 }
3557
3558 /*********************************************************************
3559  *
3560  *  Enable transmit unit.
3561  *
3562  **********************************************************************/
3563 static void
3564 igb_initialize_transmit_units(struct adapter *adapter)
3565 {
3566         struct tx_ring  *txr = adapter->tx_rings;
3567         struct e1000_hw *hw = &adapter->hw;
3568         u32             tctl, txdctl;
3569
3570         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3571         tctl = txdctl = 0;
3572
3573         /* Setup the Tx Descriptor Rings */
3574         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3575                 u64 bus_addr = txr->txdma.dma_paddr;
3576
3577                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3578                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3579                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3580                     (uint32_t)(bus_addr >> 32));
3581                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3582                     (uint32_t)bus_addr);
3583
3584                 /* Setup the HW Tx Head and Tail descriptor pointers */
3585                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3586                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3587
3588                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3589                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3590                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3591
3592                 txr->queue_status = IGB_QUEUE_IDLE;
3593
3594                 txdctl |= IGB_TX_PTHRESH;
3595                 txdctl |= IGB_TX_HTHRESH << 8;
3596                 txdctl |= IGB_TX_WTHRESH << 16;
3597                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3598                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3599         }
3600
3601         if (adapter->vf_ifp)
3602                 return;
3603
3604         e1000_config_collision_dist(hw);
3605
3606         /* Program the Transmit Control Register */
3607         tctl = E1000_READ_REG(hw, E1000_TCTL);
3608         tctl &= ~E1000_TCTL_CT;
3609         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3610                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3611
3612         /* This write will effectively turn on the transmit unit. */
3613         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3614 }
3615
3616 /*********************************************************************
3617  *
3618  *  Free all transmit rings.
3619  *
3620  **********************************************************************/
3621 static void
3622 igb_free_transmit_structures(struct adapter *adapter)
3623 {
3624         struct tx_ring *txr = adapter->tx_rings;
3625
3626         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3627                 IGB_TX_LOCK(txr);
3628                 igb_free_transmit_buffers(txr);
3629                 igb_dma_free(adapter, &txr->txdma);
3630                 IGB_TX_UNLOCK(txr);
3631                 IGB_TX_LOCK_DESTROY(txr);
3632         }
3633         free(adapter->tx_rings, M_DEVBUF);
3634 }
3635
3636 /*********************************************************************
3637  *
3638  *  Free transmit ring related data structures.
3639  *
3640  **********************************************************************/
3641 static void
3642 igb_free_transmit_buffers(struct tx_ring *txr)
3643 {
3644         struct adapter *adapter = txr->adapter;
3645         struct igb_tx_buffer *tx_buffer;
3646         int             i;
3647
3648         INIT_DEBUGOUT("free_transmit_ring: begin");
3649
3650         if (txr->tx_buffers == NULL)
3651                 return;
3652
3653         tx_buffer = txr->tx_buffers;
3654         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3655                 if (tx_buffer->m_head != NULL) {
3656                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3657                             BUS_DMASYNC_POSTWRITE);
3658                         bus_dmamap_unload(txr->txtag,
3659                             tx_buffer->map);
3660                         m_freem(tx_buffer->m_head);
3661                         tx_buffer->m_head = NULL;
3662                         if (tx_buffer->map != NULL) {
3663                                 bus_dmamap_destroy(txr->txtag,
3664                                     tx_buffer->map);
3665                                 tx_buffer->map = NULL;
3666                         }
3667                 } else if (tx_buffer->map != NULL) {
3668                         bus_dmamap_unload(txr->txtag,
3669                             tx_buffer->map);
3670                         bus_dmamap_destroy(txr->txtag,
3671                             tx_buffer->map);
3672                         tx_buffer->map = NULL;
3673                 }
3674         }
3675 #ifndef IGB_LEGACY_TX
3676         if (txr->br != NULL)
3677                 buf_ring_free(txr->br, M_DEVBUF);
3678 #endif
3679         if (txr->tx_buffers != NULL) {
3680                 free(txr->tx_buffers, M_DEVBUF);
3681                 txr->tx_buffers = NULL;
3682         }
3683         if (txr->txtag != NULL) {
3684                 bus_dma_tag_destroy(txr->txtag);
3685                 txr->txtag = NULL;
3686         }
3687         return;
3688 }
3689
3690 /**********************************************************************
3691  *
3692  *  Setup work for hardware segmentation offload (TSO)
3693  *
3694  **********************************************************************/
3695 static bool
3696 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ehdrlen,
3697         struct ip *ip, struct tcphdr *th)
3698 {
3699         struct adapter *adapter = txr->adapter;
3700         struct e1000_adv_tx_context_desc *TXD;
3701         struct igb_tx_buffer        *tx_buffer;
3702         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3703         u32 mss_l4len_idx = 0;
3704         u16 vtag = 0;
3705         int ctxd, ip_hlen, tcp_hlen;
3706
3707         ctxd = txr->next_avail_desc;
3708         tx_buffer = &txr->tx_buffers[ctxd];
3709         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3710
3711         ip->ip_sum = 0;
3712         ip_hlen = ip->ip_hl << 2;
3713         tcp_hlen = th->th_off << 2;
3714
3715         /* VLAN MACLEN IPLEN */
3716         if (mp->m_flags & M_VLANTAG) {
3717                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3718                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3719         }
3720
3721         vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3722         vlan_macip_lens |= ip_hlen;
3723         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3724
3725         /* ADV DTYPE TUCMD */
3726         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3727         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3728         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3729         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3730
3731         /* MSS L4LEN IDX */
3732         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3733         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3734         /* 82575 needs the queue index added */
3735         if (adapter->hw.mac.type == e1000_82575)
3736                 mss_l4len_idx |= txr->me << 4;
3737         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3738
3739         TXD->seqnum_seed = htole32(0);
3740         tx_buffer->m_head = NULL;
3741         tx_buffer->next_eop = -1;
3742
3743         if (++ctxd == adapter->num_tx_desc)
3744                 ctxd = 0;
3745
3746         txr->tx_avail--;
3747         txr->next_avail_desc = ctxd;
3748         return TRUE;
3749 }
3750
3751
3752 /*********************************************************************
3753  *
3754  *  Context Descriptor setup for VLAN or CSUM
3755  *
3756  **********************************************************************/
3757
3758 static bool
3759 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3760 {
3761         struct adapter *adapter = txr->adapter;
3762         struct e1000_adv_tx_context_desc *TXD;
3763         struct igb_tx_buffer        *tx_buffer;
3764         u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3765         struct ether_vlan_header *eh;
3766         struct ip *ip = NULL;
3767         struct ip6_hdr *ip6;
3768         int  ehdrlen, ctxd, ip_hlen = 0;
3769         u16     etype, vtag = 0;
3770         u8      ipproto = 0;
3771         bool    offload = TRUE;
3772
3773         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3774                 offload = FALSE;
3775
3776         vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3777         ctxd = txr->next_avail_desc;
3778         tx_buffer = &txr->tx_buffers[ctxd];
3779         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3780
3781         /*
3782         ** In advanced descriptors the vlan tag must 
3783         ** be placed into the context descriptor, thus
3784         ** we need to be here just for that setup.
3785         */
3786         if (mp->m_flags & M_VLANTAG) {
3787                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3788                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3789         } else if (offload == FALSE)
3790                 return FALSE;
3791
3792         /*
3793          * Determine where frame payload starts.
3794          * Jump over vlan headers if already present,
3795          * helpful for QinQ too.
3796          */
3797         eh = mtod(mp, struct ether_vlan_header *);
3798         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3799                 etype = ntohs(eh->evl_proto);
3800                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3801         } else {
3802                 etype = ntohs(eh->evl_encap_proto);
3803                 ehdrlen = ETHER_HDR_LEN;
3804         }
3805
3806         /* Set the ether header length */
3807         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3808
3809         switch (etype) {
3810                 case ETHERTYPE_IP:
3811                         ip = (struct ip *)(mp->m_data + ehdrlen);
3812                         ip_hlen = ip->ip_hl << 2;
3813                         if (mp->m_len < ehdrlen + ip_hlen) {
3814                                 offload = FALSE;
3815                                 break;
3816                         }
3817                         ipproto = ip->ip_p;
3818                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3819                         break;
3820                 case ETHERTYPE_IPV6:
3821                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3822                         ip_hlen = sizeof(struct ip6_hdr);
3823                         ipproto = ip6->ip6_nxt;
3824                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3825                         break;
3826                 default:
3827                         offload = FALSE;
3828                         break;
3829         }
3830
3831         vlan_macip_lens |= ip_hlen;
3832         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3833
3834         switch (ipproto) {
3835                 case IPPROTO_TCP:
3836                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3837                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3838                         break;
3839                 case IPPROTO_UDP:
3840                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3841                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3842                         break;
3843 #if __FreeBSD_version >= 800000
3844                 case IPPROTO_SCTP:
3845                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3846                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3847                         break;
3848 #endif
3849                 default:
3850                         offload = FALSE;
3851                         break;
3852         }
3853
3854         /* 82575 needs the queue index added */
3855         if (adapter->hw.mac.type == e1000_82575)
3856                 mss_l4len_idx = txr->me << 4;
3857
3858         /* Now copy bits into descriptor */
3859         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3860         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3861         TXD->seqnum_seed = htole32(0);
3862         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3863
3864         tx_buffer->m_head = NULL;
3865         tx_buffer->next_eop = -1;
3866
3867         /* We've consumed the first desc, adjust counters */
3868         if (++ctxd == adapter->num_tx_desc)
3869                 ctxd = 0;
3870         txr->next_avail_desc = ctxd;
3871         --txr->tx_avail;
3872
3873         return (offload);
3874 }
3875
3876
3877 /**********************************************************************
3878  *
3879  *  Examine each tx_buffer in the used queue. If the hardware is done
3880  *  processing the packet then free associated resources. The
3881  *  tx_buffer is put back on the free queue.
3882  *
3883  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3884  **********************************************************************/
3885 static bool
3886 igb_txeof(struct tx_ring *txr)
3887 {
3888         struct adapter  *adapter = txr->adapter;
3889         int first, last, done, processed;
3890         struct igb_tx_buffer *tx_buffer;
3891         struct e1000_tx_desc   *tx_desc, *eop_desc;
3892         struct ifnet   *ifp = adapter->ifp;
3893
3894         IGB_TX_LOCK_ASSERT(txr);
3895
3896 #ifdef DEV_NETMAP
3897         if (netmap_tx_irq(ifp, txr->me |
3898             (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT)))
3899                 return (FALSE);
3900 #endif /* DEV_NETMAP */
3901         if (txr->tx_avail == adapter->num_tx_desc) {
3902                 txr->queue_status = IGB_QUEUE_IDLE;
3903                 return FALSE;
3904         }
3905
3906         processed = 0;
3907         first = txr->next_to_clean;
3908         tx_desc = &txr->tx_base[first];
3909         tx_buffer = &txr->tx_buffers[first];
3910         last = tx_buffer->next_eop;
3911         eop_desc = &txr->tx_base[last];
3912
3913         /*
3914          * What this does is get the index of the
3915          * first descriptor AFTER the EOP of the 
3916          * first packet, that way we can do the
3917          * simple comparison on the inner while loop.
3918          */
3919         if (++last == adapter->num_tx_desc)
3920                 last = 0;
3921         done = last;
3922
3923         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3924             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3925
3926         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3927                 /* We clean the range of the packet */
3928                 while (first != done) {
3929                         tx_desc->upper.data = 0;
3930                         tx_desc->lower.data = 0;
3931                         tx_desc->buffer_addr = 0;
3932                         ++txr->tx_avail;
3933                         ++processed;
3934
3935                         if (tx_buffer->m_head) {
3936                                 txr->bytes +=
3937                                     tx_buffer->m_head->m_pkthdr.len;
3938                                 bus_dmamap_sync(txr->txtag,
3939                                     tx_buffer->map,
3940                                     BUS_DMASYNC_POSTWRITE);
3941                                 bus_dmamap_unload(txr->txtag,
3942                                     tx_buffer->map);
3943
3944                                 m_freem(tx_buffer->m_head);
3945                                 tx_buffer->m_head = NULL;
3946                         }
3947                         tx_buffer->next_eop = -1;
3948                         txr->watchdog_time = ticks;
3949
3950                         if (++first == adapter->num_tx_desc)
3951                                 first = 0;
3952
3953                         tx_buffer = &txr->tx_buffers[first];
3954                         tx_desc = &txr->tx_base[first];
3955                 }
3956                 ++txr->packets;
3957                 ++ifp->if_opackets;
3958                 /* See if we can continue to the next packet */
3959                 last = tx_buffer->next_eop;
3960                 if (last != -1) {
3961                         eop_desc = &txr->tx_base[last];
3962                         /* Get new done point */
3963                         if (++last == adapter->num_tx_desc) last = 0;
3964                         done = last;
3965                 } else
3966                         break;
3967         }
3968         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3969             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3970
3971         txr->next_to_clean = first;
3972
3973         /*
3974         ** Watchdog calculation, we know there's
3975         ** work outstanding or the first return
3976         ** would have been taken, so none processed
3977         ** for too long indicates a hang.
3978         */
3979         if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3980                 txr->queue_status |= IGB_QUEUE_HUNG;
3981         /*
3982          * If we have a minimum free,
3983          * clear depleted state bit
3984          */
3985         if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)          
3986                 txr->queue_status &= ~IGB_QUEUE_DEPLETED;
3987
3988         /* All clean, turn off the watchdog */
3989         if (txr->tx_avail == adapter->num_tx_desc) {
3990                 txr->queue_status = IGB_QUEUE_IDLE;
3991                 return (FALSE);
3992         }
3993
3994         return (TRUE);
3995 }
3996
3997 /*********************************************************************
3998  *
3999  *  Refresh mbuf buffers for RX descriptor rings
4000  *   - now keeps its own state so discards due to resource
4001  *     exhaustion are unnecessary, if an mbuf cannot be obtained
4002  *     it just returns, keeping its placeholder, thus it can simply
4003  *     be recalled to try again.
4004  *
4005  **********************************************************************/
4006 static void
4007 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4008 {
4009         struct adapter          *adapter = rxr->adapter;
4010         bus_dma_segment_t       hseg[1];
4011         bus_dma_segment_t       pseg[1];
4012         struct igb_rx_buf       *rxbuf;
4013         struct mbuf             *mh, *mp;
4014         int                     i, j, nsegs, error;
4015         bool                    refreshed = FALSE;
4016
4017         i = j = rxr->next_to_refresh;
4018         /*
4019         ** Get one descriptor beyond
4020         ** our work mark to control
4021         ** the loop.
4022         */
4023         if (++j == adapter->num_rx_desc)
4024                 j = 0;
4025
4026         while (j != limit) {
4027                 rxbuf = &rxr->rx_buffers[i];
4028                 /* No hdr mbuf used with header split off */
4029                 if (rxr->hdr_split == FALSE)
4030                         goto no_split;
4031                 if (rxbuf->m_head == NULL) {
4032                         mh = m_gethdr(M_NOWAIT, MT_DATA);
4033                         if (mh == NULL)
4034                                 goto update;
4035                 } else
4036                         mh = rxbuf->m_head;
4037
4038                 mh->m_pkthdr.len = mh->m_len = MHLEN;
4039                 mh->m_len = MHLEN;
4040                 mh->m_flags |= M_PKTHDR;
4041                 /* Get the memory mapping */
4042                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4043                     rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4044                 if (error != 0) {
4045                         printf("Refresh mbufs: hdr dmamap load"
4046                             " failure - %d\n", error);
4047                         m_free(mh);
4048                         rxbuf->m_head = NULL;
4049                         goto update;
4050                 }
4051                 rxbuf->m_head = mh;
4052                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4053                     BUS_DMASYNC_PREREAD);
4054                 rxr->rx_base[i].read.hdr_addr =
4055                     htole64(hseg[0].ds_addr);
4056 no_split:
4057                 if (rxbuf->m_pack == NULL) {
4058                         mp = m_getjcl(M_NOWAIT, MT_DATA,
4059                             M_PKTHDR, adapter->rx_mbuf_sz);
4060                         if (mp == NULL)
4061                                 goto update;
4062                 } else
4063                         mp = rxbuf->m_pack;
4064
4065                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4066                 /* Get the memory mapping */
4067                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4068                     rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4069                 if (error != 0) {
4070                         printf("Refresh mbufs: payload dmamap load"
4071                             " failure - %d\n", error);
4072                         m_free(mp);
4073                         rxbuf->m_pack = NULL;
4074                         goto update;
4075                 }
4076                 rxbuf->m_pack = mp;
4077                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4078                     BUS_DMASYNC_PREREAD);
4079                 rxr->rx_base[i].read.pkt_addr =
4080                     htole64(pseg[0].ds_addr);
4081                 refreshed = TRUE; /* I feel wefreshed :) */
4082
4083                 i = j; /* our next is precalculated */
4084                 rxr->next_to_refresh = i;
4085                 if (++j == adapter->num_rx_desc)
4086                         j = 0;
4087         }
4088 update:
4089         if (refreshed) /* update tail */
4090                 E1000_WRITE_REG(&adapter->hw,
4091                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4092         return;
4093 }
4094
4095
4096 /*********************************************************************
4097  *
4098  *  Allocate memory for rx_buffer structures. Since we use one
4099  *  rx_buffer per received packet, the maximum number of rx_buffer's
4100  *  that we'll need is equal to the number of receive descriptors
4101  *  that we've allocated.
4102  *
4103  **********************************************************************/
4104 static int
4105 igb_allocate_receive_buffers(struct rx_ring *rxr)
4106 {
4107         struct  adapter         *adapter = rxr->adapter;
4108         device_t                dev = adapter->dev;
4109         struct igb_rx_buf       *rxbuf;
4110         int                     i, bsize, error;
4111
4112         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4113         if (!(rxr->rx_buffers =
4114             (struct igb_rx_buf *) malloc(bsize,
4115             M_DEVBUF, M_NOWAIT | M_ZERO))) {
4116                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4117                 error = ENOMEM;
4118                 goto fail;
4119         }
4120
4121         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4122                                    1, 0,                /* alignment, bounds */
4123                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4124                                    BUS_SPACE_MAXADDR,   /* highaddr */
4125                                    NULL, NULL,          /* filter, filterarg */
4126                                    MSIZE,               /* maxsize */
4127                                    1,                   /* nsegments */
4128                                    MSIZE,               /* maxsegsize */
4129                                    0,                   /* flags */
4130                                    NULL,                /* lockfunc */
4131                                    NULL,                /* lockfuncarg */
4132                                    &rxr->htag))) {
4133                 device_printf(dev, "Unable to create RX DMA tag\n");
4134                 goto fail;
4135         }
4136
4137         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4138                                    1, 0,                /* alignment, bounds */
4139                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4140                                    BUS_SPACE_MAXADDR,   /* highaddr */
4141                                    NULL, NULL,          /* filter, filterarg */
4142                                    MJUM9BYTES,          /* maxsize */
4143                                    1,                   /* nsegments */
4144                                    MJUM9BYTES,          /* maxsegsize */
4145                                    0,                   /* flags */
4146                                    NULL,                /* lockfunc */
4147                                    NULL,                /* lockfuncarg */
4148                                    &rxr->ptag))) {
4149                 device_printf(dev, "Unable to create RX payload DMA tag\n");
4150                 goto fail;
4151         }
4152
4153         for (i = 0; i < adapter->num_rx_desc; i++) {
4154                 rxbuf = &rxr->rx_buffers[i];
4155                 error = bus_dmamap_create(rxr->htag,
4156                     BUS_DMA_NOWAIT, &rxbuf->hmap);
4157                 if (error) {
4158                         device_printf(dev,
4159                             "Unable to create RX head DMA maps\n");
4160                         goto fail;
4161                 }
4162                 error = bus_dmamap_create(rxr->ptag,
4163                     BUS_DMA_NOWAIT, &rxbuf->pmap);
4164                 if (error) {
4165                         device_printf(dev,
4166                             "Unable to create RX packet DMA maps\n");
4167                         goto fail;
4168                 }
4169         }
4170
4171         return (0);
4172
4173 fail:
4174         /* Frees all, but can handle partial completion */
4175         igb_free_receive_structures(adapter);
4176         return (error);
4177 }
4178
4179
4180 static void
4181 igb_free_receive_ring(struct rx_ring *rxr)
4182 {
4183         struct  adapter         *adapter = rxr->adapter;
4184         struct igb_rx_buf       *rxbuf;
4185
4186
4187         for (int i = 0; i < adapter->num_rx_desc; i++) {
4188                 rxbuf = &rxr->rx_buffers[i];
4189                 if (rxbuf->m_head != NULL) {
4190                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4191                             BUS_DMASYNC_POSTREAD);
4192                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4193                         rxbuf->m_head->m_flags |= M_PKTHDR;
4194                         m_freem(rxbuf->m_head);
4195                 }
4196                 if (rxbuf->m_pack != NULL) {
4197                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4198                             BUS_DMASYNC_POSTREAD);
4199                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4200                         rxbuf->m_pack->m_flags |= M_PKTHDR;
4201                         m_freem(rxbuf->m_pack);
4202                 }
4203                 rxbuf->m_head = NULL;
4204                 rxbuf->m_pack = NULL;
4205         }
4206 }
4207
4208
4209 /*********************************************************************
4210  *
4211  *  Initialize a receive ring and its buffers.
4212  *
4213  **********************************************************************/
4214 static int
4215 igb_setup_receive_ring(struct rx_ring *rxr)
4216 {
4217         struct  adapter         *adapter;
4218         struct  ifnet           *ifp;
4219         device_t                dev;
4220         struct igb_rx_buf       *rxbuf;
4221         bus_dma_segment_t       pseg[1], hseg[1];
4222         struct lro_ctrl         *lro = &rxr->lro;
4223         int                     rsize, nsegs, error = 0;
4224 #ifdef DEV_NETMAP
4225         struct netmap_adapter *na = NA(rxr->adapter->ifp);
4226         struct netmap_slot *slot;
4227 #endif /* DEV_NETMAP */
4228
4229         adapter = rxr->adapter;
4230         dev = adapter->dev;
4231         ifp = adapter->ifp;
4232
4233         /* Clear the ring contents */
4234         IGB_RX_LOCK(rxr);
4235 #ifdef DEV_NETMAP
4236         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4237 #endif /* DEV_NETMAP */
4238         rsize = roundup2(adapter->num_rx_desc *
4239             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4240         bzero((void *)rxr->rx_base, rsize);
4241
4242         /*
4243         ** Free current RX buffer structures and their mbufs
4244         */
4245         igb_free_receive_ring(rxr);
4246
4247         /* Configure for header split? */
4248         if (igb_header_split)
4249                 rxr->hdr_split = TRUE;
4250
4251         /* Now replenish the ring mbufs */
4252         for (int j = 0; j < adapter->num_rx_desc; ++j) {
4253                 struct mbuf     *mh, *mp;
4254
4255                 rxbuf = &rxr->rx_buffers[j];
4256 #ifdef DEV_NETMAP
4257                 if (slot) {
4258                         /* slot sj is mapped to the i-th NIC-ring entry */
4259                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4260                         uint64_t paddr;
4261                         void *addr;
4262
4263                         addr = PNMB(slot + sj, &paddr);
4264                         netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4265                         /* Update descriptor */
4266                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4267                         continue;
4268                 }
4269 #endif /* DEV_NETMAP */
4270                 if (rxr->hdr_split == FALSE)
4271                         goto skip_head;
4272
4273                 /* First the header */
4274                 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4275                 if (rxbuf->m_head == NULL) {
4276                         error = ENOBUFS;
4277                         goto fail;
4278                 }
4279                 m_adj(rxbuf->m_head, ETHER_ALIGN);
4280                 mh = rxbuf->m_head;
4281                 mh->m_len = mh->m_pkthdr.len = MHLEN;
4282                 mh->m_flags |= M_PKTHDR;
4283                 /* Get the memory mapping */
4284                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4285                     rxbuf->hmap, rxbuf->m_head, hseg,
4286                     &nsegs, BUS_DMA_NOWAIT);
4287                 if (error != 0) /* Nothing elegant to do here */
4288                         goto fail;
4289                 bus_dmamap_sync(rxr->htag,
4290                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
4291                 /* Update descriptor */
4292                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4293
4294 skip_head:
4295                 /* Now the payload cluster */
4296                 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4297                     M_PKTHDR, adapter->rx_mbuf_sz);
4298                 if (rxbuf->m_pack == NULL) {
4299                         error = ENOBUFS;
4300                         goto fail;
4301                 }
4302                 mp = rxbuf->m_pack;
4303                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4304                 /* Get the memory mapping */
4305                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4306                     rxbuf->pmap, mp, pseg,
4307                     &nsegs, BUS_DMA_NOWAIT);
4308                 if (error != 0)
4309                         goto fail;
4310                 bus_dmamap_sync(rxr->ptag,
4311                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4312                 /* Update descriptor */
4313                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4314         }
4315
4316         /* Setup our descriptor indices */
4317         rxr->next_to_check = 0;
4318         rxr->next_to_refresh = adapter->num_rx_desc - 1;
4319         rxr->lro_enabled = FALSE;
4320         rxr->rx_split_packets = 0;
4321         rxr->rx_bytes = 0;
4322
4323         rxr->fmp = NULL;
4324         rxr->lmp = NULL;
4325         rxr->discard = FALSE;
4326
4327         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4328             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4329
4330         /*
4331         ** Now set up the LRO interface, we
4332         ** also only do head split when LRO
4333         ** is enabled, since so often they
4334         ** are undesireable in similar setups.
4335         */
4336         if (ifp->if_capenable & IFCAP_LRO) {
4337                 error = tcp_lro_init(lro);
4338                 if (error) {
4339                         device_printf(dev, "LRO Initialization failed!\n");
4340                         goto fail;
4341                 }
4342                 INIT_DEBUGOUT("RX LRO Initialized\n");
4343                 rxr->lro_enabled = TRUE;
4344                 lro->ifp = adapter->ifp;
4345         }
4346
4347         IGB_RX_UNLOCK(rxr);
4348         return (0);
4349
4350 fail:
4351         igb_free_receive_ring(rxr);
4352         IGB_RX_UNLOCK(rxr);
4353         return (error);
4354 }
4355
4356
4357 /*********************************************************************
4358  *
4359  *  Initialize all receive rings.
4360  *
4361  **********************************************************************/
4362 static int
4363 igb_setup_receive_structures(struct adapter *adapter)
4364 {
4365         struct rx_ring *rxr = adapter->rx_rings;
4366         int i;
4367
4368         for (i = 0; i < adapter->num_queues; i++, rxr++)
4369                 if (igb_setup_receive_ring(rxr))
4370                         goto fail;
4371
4372         return (0);
4373 fail:
4374         /*
4375          * Free RX buffers allocated so far, we will only handle
4376          * the rings that completed, the failing case will have
4377          * cleaned up for itself. 'i' is the endpoint.
4378          */
4379         for (int j = 0; j < i; ++j) {
4380                 rxr = &adapter->rx_rings[j];
4381                 IGB_RX_LOCK(rxr);
4382                 igb_free_receive_ring(rxr);
4383                 IGB_RX_UNLOCK(rxr);
4384         }
4385
4386         return (ENOBUFS);
4387 }
4388
4389 /*********************************************************************
4390  *
4391  *  Enable receive unit.
4392  *
4393  **********************************************************************/
4394 static void
4395 igb_initialize_receive_units(struct adapter *adapter)
4396 {
4397         struct rx_ring  *rxr = adapter->rx_rings;
4398         struct ifnet    *ifp = adapter->ifp;
4399         struct e1000_hw *hw = &adapter->hw;
4400         u32             rctl, rxcsum, psize, srrctl = 0;
4401
4402         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4403
4404         /*
4405          * Make sure receives are disabled while setting
4406          * up the descriptor ring
4407          */
4408         rctl = E1000_READ_REG(hw, E1000_RCTL);
4409         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4410
4411         /*
4412         ** Set up for header split
4413         */
4414         if (igb_header_split) {
4415                 /* Use a standard mbuf for the header */
4416                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4417                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4418         } else
4419                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4420
4421         /*
4422         ** Set up for jumbo frames
4423         */
4424         if (ifp->if_mtu > ETHERMTU) {
4425                 rctl |= E1000_RCTL_LPE;
4426                 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4427                         srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4428                         rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4429                 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4430                         srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4431                         rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4432                 }
4433                 /* Set maximum packet len */
4434                 psize = adapter->max_frame_size;
4435                 /* are we on a vlan? */
4436                 if (adapter->ifp->if_vlantrunk != NULL)
4437                         psize += VLAN_TAG_SIZE;
4438                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4439         } else {
4440                 rctl &= ~E1000_RCTL_LPE;
4441                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4442                 rctl |= E1000_RCTL_SZ_2048;
4443         }
4444
4445         /* Setup the Base and Length of the Rx Descriptor Rings */
4446         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4447                 u64 bus_addr = rxr->rxdma.dma_paddr;
4448                 u32 rxdctl;
4449
4450                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4451                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4452                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4453                     (uint32_t)(bus_addr >> 32));
4454                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4455                     (uint32_t)bus_addr);
4456                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4457                 /* Enable this Queue */
4458                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4459                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4460                 rxdctl &= 0xFFF00000;
4461                 rxdctl |= IGB_RX_PTHRESH;
4462                 rxdctl |= IGB_RX_HTHRESH << 8;
4463                 rxdctl |= IGB_RX_WTHRESH << 16;
4464                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4465         }
4466
4467         /*
4468         ** Setup for RX MultiQueue
4469         */
4470         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4471         if (adapter->num_queues >1) {
4472                 u32 random[10], mrqc, shift = 0;
4473                 union igb_reta {
4474                         u32 dword;
4475                         u8  bytes[4];
4476                 } reta;
4477
4478                 arc4rand(&random, sizeof(random), 0);
4479                 if (adapter->hw.mac.type == e1000_82575)
4480                         shift = 6;
4481                 /* Warning FM follows */
4482                 for (int i = 0; i < 128; i++) {
4483                         reta.bytes[i & 3] =
4484                             (i % adapter->num_queues) << shift;
4485                         if ((i & 3) == 3)
4486                                 E1000_WRITE_REG(hw,
4487                                     E1000_RETA(i >> 2), reta.dword);
4488                 }
4489                 /* Now fill in hash table */
4490                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4491                 for (int i = 0; i < 10; i++)
4492                         E1000_WRITE_REG_ARRAY(hw,
4493                             E1000_RSSRK(0), i, random[i]);
4494
4495                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4496                     E1000_MRQC_RSS_FIELD_IPV4_TCP);
4497                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4498                     E1000_MRQC_RSS_FIELD_IPV6_TCP);
4499                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4500                     E1000_MRQC_RSS_FIELD_IPV6_UDP);
4501                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4502                     E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4503
4504                 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4505
4506                 /*
4507                 ** NOTE: Receive Full-Packet Checksum Offload 
4508                 ** is mutually exclusive with Multiqueue. However
4509                 ** this is not the same as TCP/IP checksums which
4510                 ** still work.
4511                 */
4512                 rxcsum |= E1000_RXCSUM_PCSD;
4513 #if __FreeBSD_version >= 800000
4514                 /* For SCTP Offload */
4515                 if ((hw->mac.type == e1000_82576)
4516                     && (ifp->if_capenable & IFCAP_RXCSUM))
4517                         rxcsum |= E1000_RXCSUM_CRCOFL;
4518 #endif
4519         } else {
4520                 /* Non RSS setup */
4521                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4522                         rxcsum |= E1000_RXCSUM_IPPCSE;
4523 #if __FreeBSD_version >= 800000
4524                         if (adapter->hw.mac.type == e1000_82576)
4525                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4526 #endif
4527                 } else
4528                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4529         }
4530         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4531
4532         /* Setup the Receive Control Register */
4533         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4534         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4535                    E1000_RCTL_RDMTS_HALF |
4536                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4537         /* Strip CRC bytes. */
4538         rctl |= E1000_RCTL_SECRC;
4539         /* Make sure VLAN Filters are off */
4540         rctl &= ~E1000_RCTL_VFE;
4541         /* Don't store bad packets */
4542         rctl &= ~E1000_RCTL_SBP;
4543
4544         /* Enable Receives */
4545         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4546
4547         /*
4548          * Setup the HW Rx Head and Tail Descriptor Pointers
4549          *   - needs to be after enable
4550          */
4551         for (int i = 0; i < adapter->num_queues; i++) {
4552                 rxr = &adapter->rx_rings[i];
4553                 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4554 #ifdef DEV_NETMAP
4555                 /*
4556                  * an init() while a netmap client is active must
4557                  * preserve the rx buffers passed to userspace.
4558                  * In this driver it means we adjust RDT to
4559                  * somthing different from next_to_refresh
4560                  * (which is not used in netmap mode).
4561                  */
4562                 if (ifp->if_capenable & IFCAP_NETMAP) {
4563                         struct netmap_adapter *na = NA(adapter->ifp);
4564                         struct netmap_kring *kring = &na->rx_rings[i];
4565                         int t = rxr->next_to_refresh - kring->nr_hwavail;
4566
4567                         if (t >= adapter->num_rx_desc)
4568                                 t -= adapter->num_rx_desc;
4569                         else if (t < 0)
4570                                 t += adapter->num_rx_desc;
4571                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4572                 } else
4573 #endif /* DEV_NETMAP */
4574                 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4575         }
4576         return;
4577 }
4578
4579 /*********************************************************************
4580  *
4581  *  Free receive rings.
4582  *
4583  **********************************************************************/
4584 static void
4585 igb_free_receive_structures(struct adapter *adapter)
4586 {
4587         struct rx_ring *rxr = adapter->rx_rings;
4588
4589         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4590                 struct lro_ctrl *lro = &rxr->lro;
4591                 igb_free_receive_buffers(rxr);
4592                 tcp_lro_free(lro);
4593                 igb_dma_free(adapter, &rxr->rxdma);
4594         }
4595
4596         free(adapter->rx_rings, M_DEVBUF);
4597 }
4598
4599 /*********************************************************************
4600  *
4601  *  Free receive ring data structures.
4602  *
4603  **********************************************************************/
4604 static void
4605 igb_free_receive_buffers(struct rx_ring *rxr)
4606 {
4607         struct adapter          *adapter = rxr->adapter;
4608         struct igb_rx_buf       *rxbuf;
4609         int i;
4610
4611         INIT_DEBUGOUT("free_receive_structures: begin");
4612
4613         /* Cleanup any existing buffers */
4614         if (rxr->rx_buffers != NULL) {
4615                 for (i = 0; i < adapter->num_rx_desc; i++) {
4616                         rxbuf = &rxr->rx_buffers[i];
4617                         if (rxbuf->m_head != NULL) {
4618                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4619                                     BUS_DMASYNC_POSTREAD);
4620                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4621                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4622                                 m_freem(rxbuf->m_head);
4623                         }
4624                         if (rxbuf->m_pack != NULL) {
4625                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4626                                     BUS_DMASYNC_POSTREAD);
4627                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4628                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4629                                 m_freem(rxbuf->m_pack);
4630                         }
4631                         rxbuf->m_head = NULL;
4632                         rxbuf->m_pack = NULL;
4633                         if (rxbuf->hmap != NULL) {
4634                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4635                                 rxbuf->hmap = NULL;
4636                         }
4637                         if (rxbuf->pmap != NULL) {
4638                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4639                                 rxbuf->pmap = NULL;
4640                         }
4641                 }
4642                 if (rxr->rx_buffers != NULL) {
4643                         free(rxr->rx_buffers, M_DEVBUF);
4644                         rxr->rx_buffers = NULL;
4645                 }
4646         }
4647
4648         if (rxr->htag != NULL) {
4649                 bus_dma_tag_destroy(rxr->htag);
4650                 rxr->htag = NULL;
4651         }
4652         if (rxr->ptag != NULL) {
4653                 bus_dma_tag_destroy(rxr->ptag);
4654                 rxr->ptag = NULL;
4655         }
4656 }
4657
4658 static __inline void
4659 igb_rx_discard(struct rx_ring *rxr, int i)
4660 {
4661         struct igb_rx_buf       *rbuf;
4662
4663         rbuf = &rxr->rx_buffers[i];
4664
4665         /* Partially received? Free the chain */
4666         if (rxr->fmp != NULL) {
4667                 rxr->fmp->m_flags |= M_PKTHDR;
4668                 m_freem(rxr->fmp);
4669                 rxr->fmp = NULL;
4670                 rxr->lmp = NULL;
4671         }
4672
4673         /*
4674         ** With advanced descriptors the writeback
4675         ** clobbers the buffer addrs, so its easier
4676         ** to just free the existing mbufs and take
4677         ** the normal refresh path to get new buffers
4678         ** and mapping.
4679         */
4680         if (rbuf->m_head) {
4681                 m_free(rbuf->m_head);
4682                 rbuf->m_head = NULL;
4683         }
4684
4685         if (rbuf->m_pack) {
4686                 m_free(rbuf->m_pack);
4687                 rbuf->m_pack = NULL;
4688         }
4689
4690         return;
4691 }
4692
4693 static __inline void
4694 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4695 {
4696
4697         /*
4698          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4699          * should be computed by hardware. Also it should not have VLAN tag in
4700          * ethernet header.
4701          */
4702         if (rxr->lro_enabled &&
4703             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4704             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4705             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4706             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4707             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4708             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4709                 /*
4710                  * Send to the stack if:
4711                  **  - LRO not enabled, or
4712                  **  - no LRO resources, or
4713                  **  - lro enqueue fails
4714                  */
4715                 if (rxr->lro.lro_cnt != 0)
4716                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4717                                 return;
4718         }
4719         IGB_RX_UNLOCK(rxr);
4720         (*ifp->if_input)(ifp, m);
4721         IGB_RX_LOCK(rxr);
4722 }
4723
4724 /*********************************************************************
4725  *
4726  *  This routine executes in interrupt context. It replenishes
4727  *  the mbufs in the descriptor and sends data which has been
4728  *  dma'ed into host memory to upper layer.
4729  *
4730  *  We loop at most count times if count is > 0, or until done if
4731  *  count < 0.
4732  *
4733  *  Return TRUE if more to clean, FALSE otherwise
4734  *********************************************************************/
4735 static bool
4736 igb_rxeof(struct igb_queue *que, int count, int *done)
4737 {
4738         struct adapter          *adapter = que->adapter;
4739         struct rx_ring          *rxr = que->rxr;
4740         struct ifnet            *ifp = adapter->ifp;
4741         struct lro_ctrl         *lro = &rxr->lro;
4742         struct lro_entry        *queued;
4743         int                     i, processed = 0, rxdone = 0;
4744         u32                     ptype, staterr = 0;
4745         union e1000_adv_rx_desc *cur;
4746
4747         IGB_RX_LOCK(rxr);
4748         /* Sync the ring. */
4749         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4750             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4751
4752 #ifdef DEV_NETMAP
4753         if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4754                 return (FALSE);
4755 #endif /* DEV_NETMAP */
4756
4757         /* Main clean loop */
4758         for (i = rxr->next_to_check; count != 0;) {
4759                 struct mbuf             *sendmp, *mh, *mp;
4760                 struct igb_rx_buf       *rxbuf;
4761                 u16                     hlen, plen, hdr, vtag;
4762                 bool                    eop = FALSE;
4763  
4764                 cur = &rxr->rx_base[i];
4765                 staterr = le32toh(cur->wb.upper.status_error);
4766                 if ((staterr & E1000_RXD_STAT_DD) == 0)
4767                         break;
4768                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4769                         break;
4770                 count--;
4771                 sendmp = mh = mp = NULL;
4772                 cur->wb.upper.status_error = 0;
4773                 rxbuf = &rxr->rx_buffers[i];
4774                 plen = le16toh(cur->wb.upper.length);
4775                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4776                 if ((adapter->hw.mac.type == e1000_i350) &&
4777                     (staterr & E1000_RXDEXT_STATERR_LB))
4778                         vtag = be16toh(cur->wb.upper.vlan);
4779                 else
4780                         vtag = le16toh(cur->wb.upper.vlan);
4781                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4782                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4783
4784                 /* Make sure all segments of a bad packet are discarded */
4785                 if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4786                     (rxr->discard)) {
4787                         adapter->dropped_pkts++;
4788                         ++rxr->rx_discarded;
4789                         if (!eop) /* Catch subsequent segs */
4790                                 rxr->discard = TRUE;
4791                         else
4792                                 rxr->discard = FALSE;
4793                         igb_rx_discard(rxr, i);
4794                         goto next_desc;
4795                 }
4796
4797                 /*
4798                 ** The way the hardware is configured to
4799                 ** split, it will ONLY use the header buffer
4800                 ** when header split is enabled, otherwise we
4801                 ** get normal behavior, ie, both header and
4802                 ** payload are DMA'd into the payload buffer.
4803                 **
4804                 ** The fmp test is to catch the case where a
4805                 ** packet spans multiple descriptors, in that
4806                 ** case only the first header is valid.
4807                 */
4808                 if (rxr->hdr_split && rxr->fmp == NULL) {
4809                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4810                             E1000_RXDADV_HDRBUFLEN_SHIFT;
4811                         if (hlen > IGB_HDR_BUF)
4812                                 hlen = IGB_HDR_BUF;
4813                         mh = rxr->rx_buffers[i].m_head;
4814                         mh->m_len = hlen;
4815                         /* clear buf pointer for refresh */
4816                         rxbuf->m_head = NULL;
4817                         /*
4818                         ** Get the payload length, this
4819                         ** could be zero if its a small
4820                         ** packet.
4821                         */
4822                         if (plen > 0) {
4823                                 mp = rxr->rx_buffers[i].m_pack;
4824                                 mp->m_len = plen;
4825                                 mh->m_next = mp;
4826                                 /* clear buf pointer */
4827                                 rxbuf->m_pack = NULL;
4828                                 rxr->rx_split_packets++;
4829                         }
4830                 } else {
4831                         /*
4832                         ** Either no header split, or a
4833                         ** secondary piece of a fragmented
4834                         ** split packet.
4835                         */
4836                         mh = rxr->rx_buffers[i].m_pack;
4837                         mh->m_len = plen;
4838                         /* clear buf info for refresh */
4839                         rxbuf->m_pack = NULL;
4840                 }
4841
4842                 ++processed; /* So we know when to refresh */
4843
4844                 /* Initial frame - setup */
4845                 if (rxr->fmp == NULL) {
4846                         mh->m_pkthdr.len = mh->m_len;
4847                         /* Save the head of the chain */
4848                         rxr->fmp = mh;
4849                         rxr->lmp = mh;
4850                         if (mp != NULL) {
4851                                 /* Add payload if split */
4852                                 mh->m_pkthdr.len += mp->m_len;
4853                                 rxr->lmp = mh->m_next;
4854                         }
4855                 } else {
4856                         /* Chain mbuf's together */
4857                         rxr->lmp->m_next = mh;
4858                         rxr->lmp = rxr->lmp->m_next;
4859                         rxr->fmp->m_pkthdr.len += mh->m_len;
4860                 }
4861
4862                 if (eop) {
4863                         rxr->fmp->m_pkthdr.rcvif = ifp;
4864                         ifp->if_ipackets++;
4865                         rxr->rx_packets++;
4866                         /* capture data for AIM */
4867                         rxr->packets++;
4868                         rxr->bytes += rxr->fmp->m_pkthdr.len;
4869                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4870
4871                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4872                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
4873
4874                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4875                             (staterr & E1000_RXD_STAT_VP) != 0) {
4876                                 rxr->fmp->m_pkthdr.ether_vtag = vtag;
4877                                 rxr->fmp->m_flags |= M_VLANTAG;
4878                         }
4879 #ifndef IGB_LEGACY_TX
4880                         rxr->fmp->m_pkthdr.flowid = que->msix;
4881                         rxr->fmp->m_flags |= M_FLOWID;
4882 #endif
4883                         sendmp = rxr->fmp;
4884                         /* Make sure to set M_PKTHDR. */
4885                         sendmp->m_flags |= M_PKTHDR;
4886                         rxr->fmp = NULL;
4887                         rxr->lmp = NULL;
4888                 }
4889
4890 next_desc:
4891                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4892                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4893
4894                 /* Advance our pointers to the next descriptor. */
4895                 if (++i == adapter->num_rx_desc)
4896                         i = 0;
4897                 /*
4898                 ** Send to the stack or LRO
4899                 */
4900                 if (sendmp != NULL) {
4901                         rxr->next_to_check = i;
4902                         igb_rx_input(rxr, ifp, sendmp, ptype);
4903                         i = rxr->next_to_check;
4904                         rxdone++;
4905                 }
4906
4907                 /* Every 8 descriptors we go to refresh mbufs */
4908                 if (processed == 8) {
4909                         igb_refresh_mbufs(rxr, i);
4910                         processed = 0;
4911                 }
4912         }
4913
4914         /* Catch any remainders */
4915         if (igb_rx_unrefreshed(rxr))
4916                 igb_refresh_mbufs(rxr, i);
4917
4918         rxr->next_to_check = i;
4919
4920         /*
4921          * Flush any outstanding LRO work
4922          */
4923         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4924                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4925                 tcp_lro_flush(lro, queued);
4926         }
4927
4928         if (done != NULL)
4929                 *done += rxdone;
4930
4931         IGB_RX_UNLOCK(rxr);
4932         return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4933 }
4934
4935 /*********************************************************************
4936  *
4937  *  Verify that the hardware indicated that the checksum is valid.
4938  *  Inform the stack about the status of checksum so that stack
4939  *  doesn't spend time verifying the checksum.
4940  *
4941  *********************************************************************/
4942 static void
4943 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4944 {
4945         u16 status = (u16)staterr;
4946         u8  errors = (u8) (staterr >> 24);
4947         int sctp;
4948
4949         /* Ignore Checksum bit is set */
4950         if (status & E1000_RXD_STAT_IXSM) {
4951                 mp->m_pkthdr.csum_flags = 0;
4952                 return;
4953         }
4954
4955         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4956             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4957                 sctp = 1;
4958         else
4959                 sctp = 0;
4960         if (status & E1000_RXD_STAT_IPCS) {
4961                 /* Did it pass? */
4962                 if (!(errors & E1000_RXD_ERR_IPE)) {
4963                         /* IP Checksum Good */
4964                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4965                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4966                 } else
4967                         mp->m_pkthdr.csum_flags = 0;
4968         }
4969
4970         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4971                 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4972 #if __FreeBSD_version >= 800000
4973                 if (sctp) /* reassign */
4974                         type = CSUM_SCTP_VALID;
4975 #endif
4976                 /* Did it pass? */
4977                 if (!(errors & E1000_RXD_ERR_TCPE)) {
4978                         mp->m_pkthdr.csum_flags |= type;
4979                         if (sctp == 0)
4980                                 mp->m_pkthdr.csum_data = htons(0xffff);
4981                 }
4982         }
4983         return;
4984 }
4985
4986 /*
4987  * This routine is run via an vlan
4988  * config EVENT
4989  */
4990 static void
4991 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4992 {
4993         struct adapter  *adapter = ifp->if_softc;
4994         u32             index, bit;
4995
4996         if (ifp->if_softc !=  arg)   /* Not our event */
4997                 return;
4998
4999         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5000                 return;
5001
5002         IGB_CORE_LOCK(adapter);
5003         index = (vtag >> 5) & 0x7F;
5004         bit = vtag & 0x1F;
5005         adapter->shadow_vfta[index] |= (1 << bit);
5006         ++adapter->num_vlans;
5007         /* Change hw filter setting */
5008         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5009                 igb_setup_vlan_hw_support(adapter);
5010         IGB_CORE_UNLOCK(adapter);
5011 }
5012
5013 /*
5014  * This routine is run via an vlan
5015  * unconfig EVENT
5016  */
5017 static void
5018 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5019 {
5020         struct adapter  *adapter = ifp->if_softc;
5021         u32             index, bit;
5022
5023         if (ifp->if_softc !=  arg)
5024                 return;
5025
5026         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5027                 return;
5028
5029         IGB_CORE_LOCK(adapter);
5030         index = (vtag >> 5) & 0x7F;
5031         bit = vtag & 0x1F;
5032         adapter->shadow_vfta[index] &= ~(1 << bit);
5033         --adapter->num_vlans;
5034         /* Change hw filter setting */
5035         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5036                 igb_setup_vlan_hw_support(adapter);
5037         IGB_CORE_UNLOCK(adapter);
5038 }
5039
5040 static void
5041 igb_setup_vlan_hw_support(struct adapter *adapter)
5042 {
5043         struct e1000_hw *hw = &adapter->hw;
5044         struct ifnet    *ifp = adapter->ifp;
5045         u32             reg;
5046
5047         if (adapter->vf_ifp) {
5048                 e1000_rlpml_set_vf(hw,
5049                     adapter->max_frame_size + VLAN_TAG_SIZE);
5050                 return;
5051         }
5052
5053         reg = E1000_READ_REG(hw, E1000_CTRL);
5054         reg |= E1000_CTRL_VME;
5055         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5056
5057         /* Enable the Filter Table */
5058         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5059                 reg = E1000_READ_REG(hw, E1000_RCTL);
5060                 reg &= ~E1000_RCTL_CFIEN;
5061                 reg |= E1000_RCTL_VFE;
5062                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5063         }
5064
5065         /* Update the frame size */
5066         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5067             adapter->max_frame_size + VLAN_TAG_SIZE);
5068
5069         /* Don't bother with table if no vlans */
5070         if ((adapter->num_vlans == 0) ||
5071             ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5072                 return;
5073         /*
5074         ** A soft reset zero's out the VFTA, so
5075         ** we need to repopulate it now.
5076         */
5077         for (int i = 0; i < IGB_VFTA_SIZE; i++)
5078                 if (adapter->shadow_vfta[i] != 0) {
5079                         if (adapter->vf_ifp)
5080                                 e1000_vfta_set_vf(hw,
5081                                     adapter->shadow_vfta[i], TRUE);
5082                         else
5083                                 e1000_write_vfta(hw,
5084                                     i, adapter->shadow_vfta[i]);
5085                 }
5086 }
5087
5088 static void
5089 igb_enable_intr(struct adapter *adapter)
5090 {
5091         /* With RSS set up what to auto clear */
5092         if (adapter->msix_mem) {
5093                 u32 mask = (adapter->que_mask | adapter->link_mask);
5094                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5095                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5096                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5097                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5098                     E1000_IMS_LSC);
5099         } else {
5100                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5101                     IMS_ENABLE_MASK);
5102         }
5103         E1000_WRITE_FLUSH(&adapter->hw);
5104
5105         return;
5106 }
5107
5108 static void
5109 igb_disable_intr(struct adapter *adapter)
5110 {
5111         if (adapter->msix_mem) {
5112                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5113                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5114         } 
5115         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5116         E1000_WRITE_FLUSH(&adapter->hw);
5117         return;
5118 }
5119
5120 /*
5121  * Bit of a misnomer, what this really means is
5122  * to enable OS management of the system... aka
5123  * to disable special hardware management features 
5124  */
5125 static void
5126 igb_init_manageability(struct adapter *adapter)
5127 {
5128         if (adapter->has_manage) {
5129                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5130                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5131
5132                 /* disable hardware interception of ARP */
5133                 manc &= ~(E1000_MANC_ARP_EN);
5134
5135                 /* enable receiving management packets to the host */
5136                 manc |= E1000_MANC_EN_MNG2HOST;
5137                 manc2h |= 1 << 5;  /* Mng Port 623 */
5138                 manc2h |= 1 << 6;  /* Mng Port 664 */
5139                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5140                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5141         }
5142 }
5143
5144 /*
5145  * Give control back to hardware management
5146  * controller if there is one.
5147  */
5148 static void
5149 igb_release_manageability(struct adapter *adapter)
5150 {
5151         if (adapter->has_manage) {
5152                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5153
5154                 /* re-enable hardware interception of ARP */
5155                 manc |= E1000_MANC_ARP_EN;
5156                 manc &= ~E1000_MANC_EN_MNG2HOST;
5157
5158                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5159         }
5160 }
5161
5162 /*
5163  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5164  * For ASF and Pass Through versions of f/w this means that
5165  * the driver is loaded. 
5166  *
5167  */
5168 static void
5169 igb_get_hw_control(struct adapter *adapter)
5170 {
5171         u32 ctrl_ext;
5172
5173         if (adapter->vf_ifp)
5174                 return;
5175
5176         /* Let firmware know the driver has taken over */
5177         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5178         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5179             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5180 }
5181
5182 /*
5183  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5184  * For ASF and Pass Through versions of f/w this means that the
5185  * driver is no longer loaded.
5186  *
5187  */
5188 static void
5189 igb_release_hw_control(struct adapter *adapter)
5190 {
5191         u32 ctrl_ext;
5192
5193         if (adapter->vf_ifp)
5194                 return;
5195
5196         /* Let firmware taken over control of h/w */
5197         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5198         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5199             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5200 }
5201
5202 static int
5203 igb_is_valid_ether_addr(uint8_t *addr)
5204 {
5205         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5206
5207         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5208                 return (FALSE);
5209         }
5210
5211         return (TRUE);
5212 }
5213
5214
5215 /*
5216  * Enable PCI Wake On Lan capability
5217  */
5218 static void
5219 igb_enable_wakeup(device_t dev)
5220 {
5221         u16     cap, status;
5222         u8      id;
5223
5224         /* First find the capabilities pointer*/
5225         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5226         /* Read the PM Capabilities */
5227         id = pci_read_config(dev, cap, 1);
5228         if (id != PCIY_PMG)     /* Something wrong */
5229                 return;
5230         /* OK, we have the power capabilities, so
5231            now get the status register */
5232         cap += PCIR_POWER_STATUS;
5233         status = pci_read_config(dev, cap, 2);
5234         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5235         pci_write_config(dev, cap, status, 2);
5236         return;
5237 }
5238
5239 static void
5240 igb_led_func(void *arg, int onoff)
5241 {
5242         struct adapter  *adapter = arg;
5243
5244         IGB_CORE_LOCK(adapter);
5245         if (onoff) {
5246                 e1000_setup_led(&adapter->hw);
5247                 e1000_led_on(&adapter->hw);
5248         } else {
5249                 e1000_led_off(&adapter->hw);
5250                 e1000_cleanup_led(&adapter->hw);
5251         }
5252         IGB_CORE_UNLOCK(adapter);
5253 }
5254
5255 /**********************************************************************
5256  *
5257  *  Update the board statistics counters.
5258  *
5259  **********************************************************************/
5260 static void
5261 igb_update_stats_counters(struct adapter *adapter)
5262 {
5263         struct ifnet            *ifp;
5264         struct e1000_hw         *hw = &adapter->hw;
5265         struct e1000_hw_stats   *stats;
5266
5267         /* 
5268         ** The virtual function adapter has only a
5269         ** small controlled set of stats, do only 
5270         ** those and return.
5271         */
5272         if (adapter->vf_ifp) {
5273                 igb_update_vf_stats_counters(adapter);
5274                 return;
5275         }
5276
5277         stats = (struct e1000_hw_stats  *)adapter->stats;
5278
5279         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5280            (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5281                 stats->symerrs +=
5282                     E1000_READ_REG(hw,E1000_SYMERRS);
5283                 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5284         }
5285
5286         stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5287         stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5288         stats->scc += E1000_READ_REG(hw, E1000_SCC);
5289         stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5290
5291         stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5292         stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5293         stats->colc += E1000_READ_REG(hw, E1000_COLC);
5294         stats->dc += E1000_READ_REG(hw, E1000_DC);
5295         stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5296         stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5297         stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5298         /*
5299         ** For watchdog management we need to know if we have been
5300         ** paused during the last interval, so capture that here.
5301         */ 
5302         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5303         stats->xoffrxc += adapter->pause_frames;
5304         stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5305         stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5306         stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5307         stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5308         stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5309         stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5310         stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5311         stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5312         stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5313         stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5314         stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5315         stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5316
5317         /* For the 64-bit byte counters the low dword must be read first. */
5318         /* Both registers clear on the read of the high dword */
5319
5320         stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5321             ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5322         stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5323             ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5324
5325         stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5326         stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5327         stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5328         stats->roc += E1000_READ_REG(hw, E1000_ROC);
5329         stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5330
5331         stats->tor += E1000_READ_REG(hw, E1000_TORH);
5332         stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5333
5334         stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5335         stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5336         stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5337         stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5338         stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5339         stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5340         stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5341         stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5342         stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5343         stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5344
5345         /* Interrupt Counts */
5346
5347         stats->iac += E1000_READ_REG(hw, E1000_IAC);
5348         stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5349         stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5350         stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5351         stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5352         stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5353         stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5354         stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5355         stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5356
5357         /* Host to Card Statistics */
5358
5359         stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5360         stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5361         stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5362         stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5363         stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5364         stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5365         stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5366         stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5367             ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5368         stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5369             ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5370         stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5371         stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5372         stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5373
5374         stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5375         stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5376         stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5377         stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5378         stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5379         stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5380
5381         ifp = adapter->ifp;
5382         ifp->if_collisions = stats->colc;
5383
5384         /* Rx Errors */
5385         ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5386             stats->crcerrs + stats->algnerrc +
5387             stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5388
5389         /* Tx Errors */
5390         ifp->if_oerrors = stats->ecol +
5391             stats->latecol + adapter->watchdog_events;
5392
5393         /* Driver specific counters */
5394         adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5395         adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5396         adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5397         adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5398         adapter->packet_buf_alloc_tx =
5399             ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5400         adapter->packet_buf_alloc_rx =
5401             (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5402 }
5403
5404
5405 /**********************************************************************
5406  *
5407  *  Initialize the VF board statistics counters.
5408  *
5409  **********************************************************************/
5410 static void
5411 igb_vf_init_stats(struct adapter *adapter)
5412 {
5413         struct e1000_hw *hw = &adapter->hw;
5414         struct e1000_vf_stats   *stats;
5415
5416         stats = (struct e1000_vf_stats  *)adapter->stats;
5417         if (stats == NULL)
5418                 return;
5419         stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5420         stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5421         stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5422         stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5423         stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5424 }
5425  
5426 /**********************************************************************
5427  *
5428  *  Update the VF board statistics counters.
5429  *
5430  **********************************************************************/
5431 static void
5432 igb_update_vf_stats_counters(struct adapter *adapter)
5433 {
5434         struct e1000_hw *hw = &adapter->hw;
5435         struct e1000_vf_stats   *stats;
5436
5437         if (adapter->link_speed == 0)
5438                 return;
5439
5440         stats = (struct e1000_vf_stats  *)adapter->stats;
5441
5442         UPDATE_VF_REG(E1000_VFGPRC,
5443             stats->last_gprc, stats->gprc);
5444         UPDATE_VF_REG(E1000_VFGORC,
5445             stats->last_gorc, stats->gorc);
5446         UPDATE_VF_REG(E1000_VFGPTC,
5447             stats->last_gptc, stats->gptc);
5448         UPDATE_VF_REG(E1000_VFGOTC,
5449             stats->last_gotc, stats->gotc);
5450         UPDATE_VF_REG(E1000_VFMPRC,
5451             stats->last_mprc, stats->mprc);
5452 }
5453
5454 /* Export a single 32-bit register via a read-only sysctl. */
5455 static int
5456 igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5457 {
5458         struct adapter *adapter;
5459         u_int val;
5460
5461         adapter = oidp->oid_arg1;
5462         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5463         return (sysctl_handle_int(oidp, &val, 0, req));
5464 }
5465
5466 /*
5467 **  Tuneable interrupt rate handler
5468 */
5469 static int
5470 igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5471 {
5472         struct igb_queue        *que = ((struct igb_queue *)oidp->oid_arg1);
5473         int                     error;
5474         u32                     reg, usec, rate;
5475                         
5476         reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5477         usec = ((reg & 0x7FFC) >> 2);
5478         if (usec > 0)
5479                 rate = 1000000 / usec;
5480         else
5481                 rate = 0;
5482         error = sysctl_handle_int(oidp, &rate, 0, req);
5483         if (error || !req->newptr)
5484                 return error;
5485         return 0;
5486 }
5487
5488 /*
5489  * Add sysctl variables, one per statistic, to the system.
5490  */
5491 static void
5492 igb_add_hw_stats(struct adapter *adapter)
5493 {
5494         device_t dev = adapter->dev;
5495
5496         struct tx_ring *txr = adapter->tx_rings;
5497         struct rx_ring *rxr = adapter->rx_rings;
5498
5499         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5500         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5501         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5502         struct e1000_hw_stats *stats = adapter->stats;
5503
5504         struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5505         struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5506
5507 #define QUEUE_NAME_LEN 32
5508         char namebuf[QUEUE_NAME_LEN];
5509
5510         /* Driver Statistics */
5511         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq", 
5512                         CTLFLAG_RD, &adapter->link_irq, 0,
5513                         "Link MSIX IRQ Handled");
5514         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5515                         CTLFLAG_RD, &adapter->dropped_pkts,
5516                         "Driver dropped packets");
5517         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5518                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5519                         "Driver tx dma failure in xmit");
5520         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5521                         CTLFLAG_RD, &adapter->rx_overruns,
5522                         "RX overruns");
5523         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5524                         CTLFLAG_RD, &adapter->watchdog_events,
5525                         "Watchdog timeouts");
5526
5527         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", 
5528                         CTLFLAG_RD, &adapter->device_control,
5529                         "Device Control Register");
5530         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", 
5531                         CTLFLAG_RD, &adapter->rx_control,
5532                         "Receiver Control Register");
5533         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", 
5534                         CTLFLAG_RD, &adapter->int_mask,
5535                         "Interrupt Mask");
5536         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", 
5537                         CTLFLAG_RD, &adapter->eint_mask,
5538                         "Extended Interrupt Mask");
5539         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", 
5540                         CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5541                         "Transmit Buffer Packet Allocation");
5542         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", 
5543                         CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5544                         "Receive Buffer Packet Allocation");
5545         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5546                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5547                         "Flow Control High Watermark");
5548         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5549                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5550                         "Flow Control Low Watermark");
5551
5552         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5553                 struct lro_ctrl *lro = &rxr->lro;
5554
5555                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5556                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5557                                             CTLFLAG_RD, NULL, "Queue Name");
5558                 queue_list = SYSCTL_CHILDREN(queue_node);
5559
5560                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", 
5561                                 CTLFLAG_RD, &adapter->queues[i],
5562                                 sizeof(&adapter->queues[i]),
5563                                 igb_sysctl_interrupt_rate_handler,
5564                                 "IU", "Interrupt Rate");
5565
5566                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5567                                 CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5568                                 igb_sysctl_reg_handler, "IU",
5569                                 "Transmit Descriptor Head");
5570                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5571                                 CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5572                                 igb_sysctl_reg_handler, "IU",
5573                                 "Transmit Descriptor Tail");
5574                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5575                                 CTLFLAG_RD, &txr->no_desc_avail,
5576                                 "Queue No Descriptor Available");
5577                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5578                                 CTLFLAG_RD, &txr->tx_packets,
5579                                 "Queue Packets Transmitted");
5580
5581                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5582                                 CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5583                                 igb_sysctl_reg_handler, "IU",
5584                                 "Receive Descriptor Head");
5585                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5586                                 CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5587                                 igb_sysctl_reg_handler, "IU",
5588                                 "Receive Descriptor Tail");
5589                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5590                                 CTLFLAG_RD, &rxr->rx_packets,
5591                                 "Queue Packets Received");
5592                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5593                                 CTLFLAG_RD, &rxr->rx_bytes,
5594                                 "Queue Bytes Received");
5595                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5596                                 CTLFLAG_RD, &lro->lro_queued, 0,
5597                                 "LRO Queued");
5598                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5599                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5600                                 "LRO Flushed");
5601         }
5602
5603         /* MAC stats get their own sub node */
5604
5605         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5606                                     CTLFLAG_RD, NULL, "MAC Statistics");
5607         stat_list = SYSCTL_CHILDREN(stat_node);
5608
5609         /*
5610         ** VF adapter has a very limited set of stats
5611         ** since its not managing the metal, so to speak.
5612         */
5613         if (adapter->vf_ifp) {
5614         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5615                         CTLFLAG_RD, &stats->gprc,
5616                         "Good Packets Received");
5617         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5618                         CTLFLAG_RD, &stats->gptc,
5619                         "Good Packets Transmitted");
5620         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5621                         CTLFLAG_RD, &stats->gorc, 
5622                         "Good Octets Received"); 
5623         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5624                         CTLFLAG_RD, &stats->gotc, 
5625                         "Good Octets Transmitted"); 
5626         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5627                         CTLFLAG_RD, &stats->mprc,
5628                         "Multicast Packets Received");
5629                 return;
5630         }
5631
5632         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
5633                         CTLFLAG_RD, &stats->ecol,
5634                         "Excessive collisions");
5635         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
5636                         CTLFLAG_RD, &stats->scc,
5637                         "Single collisions");
5638         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
5639                         CTLFLAG_RD, &stats->mcc,
5640                         "Multiple collisions");
5641         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
5642                         CTLFLAG_RD, &stats->latecol,
5643                         "Late collisions");
5644         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
5645                         CTLFLAG_RD, &stats->colc,
5646                         "Collision Count");
5647         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5648                         CTLFLAG_RD, &stats->symerrs,
5649                         "Symbol Errors");
5650         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5651                         CTLFLAG_RD, &stats->sec,
5652                         "Sequence Errors");
5653         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5654                         CTLFLAG_RD, &stats->dc,
5655                         "Defer Count");
5656         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5657                         CTLFLAG_RD, &stats->mpc,
5658                         "Missed Packets");
5659         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5660                         CTLFLAG_RD, &stats->rnbc,
5661                         "Receive No Buffers");
5662         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5663                         CTLFLAG_RD, &stats->ruc,
5664                         "Receive Undersize");
5665         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5666                         CTLFLAG_RD, &stats->rfc,
5667                         "Fragmented Packets Received ");
5668         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5669                         CTLFLAG_RD, &stats->roc,
5670                         "Oversized Packets Received");
5671         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5672                         CTLFLAG_RD, &stats->rjc,
5673                         "Recevied Jabber");
5674         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5675                         CTLFLAG_RD, &stats->rxerrc,
5676                         "Receive Errors");
5677         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5678                         CTLFLAG_RD, &stats->crcerrs,
5679                         "CRC errors");
5680         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5681                         CTLFLAG_RD, &stats->algnerrc,
5682                         "Alignment Errors");
5683         /* On 82575 these are collision counts */
5684         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5685                         CTLFLAG_RD, &stats->cexterr,
5686                         "Collision/Carrier extension errors");
5687         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5688                         CTLFLAG_RD, &stats->xonrxc,
5689                         "XON Received");
5690         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5691                         CTLFLAG_RD, &stats->xontxc,
5692                         "XON Transmitted");
5693         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5694                         CTLFLAG_RD, &stats->xoffrxc,
5695                         "XOFF Received");
5696         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5697                         CTLFLAG_RD, &stats->xofftxc,
5698                         "XOFF Transmitted");
5699         /* Packet Reception Stats */
5700         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5701                         CTLFLAG_RD, &stats->tpr,
5702                         "Total Packets Received ");
5703         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5704                         CTLFLAG_RD, &stats->gprc,
5705                         "Good Packets Received");
5706         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5707                         CTLFLAG_RD, &stats->bprc,
5708                         "Broadcast Packets Received");
5709         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5710                         CTLFLAG_RD, &stats->mprc,
5711                         "Multicast Packets Received");
5712         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5713                         CTLFLAG_RD, &stats->prc64,
5714                         "64 byte frames received ");
5715         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5716                         CTLFLAG_RD, &stats->prc127,
5717                         "65-127 byte frames received");
5718         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5719                         CTLFLAG_RD, &stats->prc255,
5720                         "128-255 byte frames received");
5721         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5722                         CTLFLAG_RD, &stats->prc511,
5723                         "256-511 byte frames received");
5724         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5725                         CTLFLAG_RD, &stats->prc1023,
5726                         "512-1023 byte frames received");
5727         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5728                         CTLFLAG_RD, &stats->prc1522,
5729                         "1023-1522 byte frames received");
5730         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5731                         CTLFLAG_RD, &stats->gorc, 
5732                         "Good Octets Received"); 
5733
5734         /* Packet Transmission Stats */
5735         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5736                         CTLFLAG_RD, &stats->gotc, 
5737                         "Good Octets Transmitted"); 
5738         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5739                         CTLFLAG_RD, &stats->tpt,
5740                         "Total Packets Transmitted");
5741         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5742                         CTLFLAG_RD, &stats->gptc,
5743                         "Good Packets Transmitted");
5744         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5745                         CTLFLAG_RD, &stats->bptc,
5746                         "Broadcast Packets Transmitted");
5747         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5748                         CTLFLAG_RD, &stats->mptc,
5749                         "Multicast Packets Transmitted");
5750         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5751                         CTLFLAG_RD, &stats->ptc64,
5752                         "64 byte frames transmitted ");
5753         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5754                         CTLFLAG_RD, &stats->ptc127,
5755                         "65-127 byte frames transmitted");
5756         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5757                         CTLFLAG_RD, &stats->ptc255,
5758                         "128-255 byte frames transmitted");
5759         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5760                         CTLFLAG_RD, &stats->ptc511,
5761                         "256-511 byte frames transmitted");
5762         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5763                         CTLFLAG_RD, &stats->ptc1023,
5764                         "512-1023 byte frames transmitted");
5765         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5766                         CTLFLAG_RD, &stats->ptc1522,
5767                         "1024-1522 byte frames transmitted");
5768         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5769                         CTLFLAG_RD, &stats->tsctc,
5770                         "TSO Contexts Transmitted");
5771         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5772                         CTLFLAG_RD, &stats->tsctfc,
5773                         "TSO Contexts Failed");
5774
5775
5776         /* Interrupt Stats */
5777
5778         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5779                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5780         int_list = SYSCTL_CHILDREN(int_node);
5781
5782         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5783                         CTLFLAG_RD, &stats->iac,
5784                         "Interrupt Assertion Count");
5785
5786         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5787                         CTLFLAG_RD, &stats->icrxptc,
5788                         "Interrupt Cause Rx Pkt Timer Expire Count");
5789
5790         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5791                         CTLFLAG_RD, &stats->icrxatc,
5792                         "Interrupt Cause Rx Abs Timer Expire Count");
5793
5794         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5795                         CTLFLAG_RD, &stats->ictxptc,
5796                         "Interrupt Cause Tx Pkt Timer Expire Count");
5797
5798         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5799                         CTLFLAG_RD, &stats->ictxatc,
5800                         "Interrupt Cause Tx Abs Timer Expire Count");
5801
5802         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5803                         CTLFLAG_RD, &stats->ictxqec,
5804                         "Interrupt Cause Tx Queue Empty Count");
5805
5806         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5807                         CTLFLAG_RD, &stats->ictxqmtc,
5808                         "Interrupt Cause Tx Queue Min Thresh Count");
5809
5810         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5811                         CTLFLAG_RD, &stats->icrxdmtc,
5812                         "Interrupt Cause Rx Desc Min Thresh Count");
5813
5814         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5815                         CTLFLAG_RD, &stats->icrxoc,
5816                         "Interrupt Cause Receiver Overrun Count");
5817
5818         /* Host to Card Stats */
5819
5820         host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", 
5821                                     CTLFLAG_RD, NULL, 
5822                                     "Host to Card Statistics");
5823
5824         host_list = SYSCTL_CHILDREN(host_node);
5825
5826         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5827                         CTLFLAG_RD, &stats->cbtmpc,
5828                         "Circuit Breaker Tx Packet Count");
5829
5830         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5831                         CTLFLAG_RD, &stats->htdpmc,
5832                         "Host Transmit Discarded Packets");
5833
5834         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5835                         CTLFLAG_RD, &stats->rpthc,
5836                         "Rx Packets To Host");
5837
5838         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5839                         CTLFLAG_RD, &stats->cbrmpc,
5840                         "Circuit Breaker Rx Packet Count");
5841
5842         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5843                         CTLFLAG_RD, &stats->cbrdpc,
5844                         "Circuit Breaker Rx Dropped Count");
5845
5846         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5847                         CTLFLAG_RD, &stats->hgptc,
5848                         "Host Good Packets Tx Count");
5849
5850         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5851                         CTLFLAG_RD, &stats->htcbdpc,
5852                         "Host Tx Circuit Breaker Dropped Count");
5853
5854         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5855                         CTLFLAG_RD, &stats->hgorc,
5856                         "Host Good Octets Received Count");
5857
5858         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5859                         CTLFLAG_RD, &stats->hgotc,
5860                         "Host Good Octets Transmit Count");
5861
5862         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5863                         CTLFLAG_RD, &stats->lenerrs,
5864                         "Length Errors");
5865
5866         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5867                         CTLFLAG_RD, &stats->scvpc,
5868                         "SerDes/SGMII Code Violation Pkt Count");
5869
5870         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5871                         CTLFLAG_RD, &stats->hrmpc,
5872                         "Header Redirection Missed Packet Count");
5873 }
5874
5875
5876 /**********************************************************************
5877  *
5878  *  This routine provides a way to dump out the adapter eeprom,
5879  *  often a useful debug/service tool. This only dumps the first
5880  *  32 words, stuff that matters is in that extent.
5881  *
5882  **********************************************************************/
5883 static int
5884 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5885 {
5886         struct adapter *adapter;
5887         int error;
5888         int result;
5889
5890         result = -1;
5891         error = sysctl_handle_int(oidp, &result, 0, req);
5892
5893         if (error || !req->newptr)
5894                 return (error);
5895
5896         /*
5897          * This value will cause a hex dump of the
5898          * first 32 16-bit words of the EEPROM to
5899          * the screen.
5900          */
5901         if (result == 1) {
5902                 adapter = (struct adapter *)arg1;
5903                 igb_print_nvm_info(adapter);
5904         }
5905
5906         return (error);
5907 }
5908
5909 static void
5910 igb_print_nvm_info(struct adapter *adapter)
5911 {
5912         u16     eeprom_data;
5913         int     i, j, row = 0;
5914
5915         /* Its a bit crude, but it gets the job done */
5916         printf("\nInterface EEPROM Dump:\n");
5917         printf("Offset\n0x0000  ");
5918         for (i = 0, j = 0; i < 32; i++, j++) {
5919                 if (j == 8) { /* Make the offset block */
5920                         j = 0; ++row;
5921                         printf("\n0x00%x0  ",row);
5922                 }
5923                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5924                 printf("%04x ", eeprom_data);
5925         }
5926         printf("\n");
5927 }
5928
5929 static void
5930 igb_set_sysctl_value(struct adapter *adapter, const char *name,
5931         const char *description, int *limit, int value)
5932 {
5933         *limit = value;
5934         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5935             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5936             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5937 }
5938
5939 /*
5940 ** Set flow control using sysctl:
5941 ** Flow control values:
5942 **      0 - off
5943 **      1 - rx pause
5944 **      2 - tx pause
5945 **      3 - full
5946 */
5947 static int
5948 igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5949 {
5950         int             error;
5951         static int      input = 3; /* default is full */
5952         struct adapter  *adapter = (struct adapter *) arg1;
5953
5954         error = sysctl_handle_int(oidp, &input, 0, req);
5955
5956         if ((error) || (req->newptr == NULL))
5957                 return (error);
5958
5959         switch (input) {
5960                 case e1000_fc_rx_pause:
5961                 case e1000_fc_tx_pause:
5962                 case e1000_fc_full:
5963                 case e1000_fc_none:
5964                         adapter->hw.fc.requested_mode = input;
5965                         adapter->fc = input;
5966                         break;
5967                 default:
5968                         /* Do nothing */
5969                         return (error);
5970         }
5971
5972         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5973         e1000_force_mac_fc(&adapter->hw);
5974         return (error);
5975 }
5976
5977 /*
5978 ** Manage DMA Coalesce:
5979 ** Control values:
5980 **      0/1 - off/on
5981 **      Legal timer values are:
5982 **      250,500,1000-10000 in thousands
5983 */
5984 static int
5985 igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
5986 {
5987         struct adapter *adapter = (struct adapter *) arg1;
5988         int             error;
5989
5990         error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
5991
5992         if ((error) || (req->newptr == NULL))
5993                 return (error);
5994
5995         switch (adapter->dmac) {
5996                 case 0:
5997                         /*Disabling */
5998                         break;
5999                 case 1: /* Just enable and use default */
6000                         adapter->dmac = 1000;
6001                         break;
6002                 case 250:
6003                 case 500:
6004                 case 1000:
6005                 case 2000:
6006                 case 3000:
6007                 case 4000:
6008                 case 5000:
6009                 case 6000:
6010                 case 7000:
6011                 case 8000:
6012                 case 9000:
6013                 case 10000:
6014                         /* Legal values - allow */
6015                         break;
6016                 default:
6017                         /* Do nothing, illegal value */
6018                         adapter->dmac = 0;
6019                         return (error);
6020         }
6021         /* Reinit the interface */
6022         igb_init(adapter);
6023         return (error);
6024 }
6025
6026 /*
6027 ** Manage Energy Efficient Ethernet:
6028 ** Control values:
6029 **     0/1 - enabled/disabled
6030 */
6031 static int
6032 igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6033 {
6034         struct adapter  *adapter = (struct adapter *) arg1;
6035         int             error, value;
6036
6037         value = adapter->hw.dev_spec._82575.eee_disable;
6038         error = sysctl_handle_int(oidp, &value, 0, req);
6039         if (error || req->newptr == NULL)
6040                 return (error);
6041         IGB_CORE_LOCK(adapter);
6042         adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6043         igb_init_locked(adapter);
6044         IGB_CORE_UNLOCK(adapter);
6045         return (0);
6046 }