]> CyberLeo.Net >> Repos - FreeBSD/stable/9.git/blob - sys/dev/e1000/if_igb.c
Merge r241037 from head:
[FreeBSD/stable/9.git] / sys / dev / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2012, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #ifdef HAVE_KERNEL_OPTION_HEADERS
37 #include "opt_device_polling.h"
38 #include "opt_inet.h"
39 #include "opt_inet6.h"
40 #include "opt_altq.h"
41 #endif
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #if __FreeBSD_version >= 800000
46 #include <sys/buf_ring.h>
47 #endif
48 #include <sys/bus.h>
49 #include <sys/endian.h>
50 #include <sys/kernel.h>
51 #include <sys/kthread.h>
52 #include <sys/malloc.h>
53 #include <sys/mbuf.h>
54 #include <sys/module.h>
55 #include <sys/rman.h>
56 #include <sys/socket.h>
57 #include <sys/sockio.h>
58 #include <sys/sysctl.h>
59 #include <sys/taskqueue.h>
60 #include <sys/eventhandler.h>
61 #include <sys/pcpu.h>
62 #include <sys/smp.h>
63 #include <machine/smp.h>
64 #include <machine/bus.h>
65 #include <machine/resource.h>
66
67 #include <net/bpf.h>
68 #include <net/ethernet.h>
69 #include <net/if.h>
70 #include <net/if_arp.h>
71 #include <net/if_dl.h>
72 #include <net/if_media.h>
73
74 #include <net/if_types.h>
75 #include <net/if_vlan_var.h>
76
77 #include <netinet/in_systm.h>
78 #include <netinet/in.h>
79 #include <netinet/if_ether.h>
80 #include <netinet/ip.h>
81 #include <netinet/ip6.h>
82 #include <netinet/tcp.h>
83 #include <netinet/tcp_lro.h>
84 #include <netinet/udp.h>
85
86 #include <machine/in_cksum.h>
87 #include <dev/led/led.h>
88 #include <dev/pci/pcivar.h>
89 #include <dev/pci/pcireg.h>
90
91 #include "e1000_api.h"
92 #include "e1000_82575.h"
93 #include "if_igb.h"
94
95 /*********************************************************************
96  *  Set this to one to display debug statistics
97  *********************************************************************/
98 int     igb_display_debug_stats = 0;
99
100 /*********************************************************************
101  *  Driver version:
102  *********************************************************************/
103 char igb_driver_version[] = "version - 2.3.4";
104
105
106 /*********************************************************************
107  *  PCI Device ID Table
108  *
109  *  Used by probe to select devices to load on
110  *  Last field stores an index into e1000_strings
111  *  Last entry must be all 0s
112  *
113  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
114  *********************************************************************/
115
116 static igb_vendor_info_t igb_vendor_info_array[] =
117 {
118         { 0x8086, E1000_DEV_ID_82575EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
119         { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
120                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
122                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82576,           PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82576_NS,        PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82576_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82576_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
129                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
131                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
133                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82576_VF,        PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82580_COPPER,    PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82580_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82580_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_82580_SGMII,     PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
140                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
141         { 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
142                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
143         { 0x8086, E1000_DEV_ID_DH89XXCC_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,  PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_DH89XXCC_SFP,    PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
147                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_I350_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_I350_FIBER,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_I350_SERDES,     PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_I350_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_I350_VF,         PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_I210_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_I210_COPPER_IT,  PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
156                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_I210_FIBER,      PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_I210_SERDES,     PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_I210_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_I211_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
161         /* required last entry */
162         { 0, 0, 0, 0, 0}
163 };
164
165 /*********************************************************************
166  *  Table of branding strings for all supported NICs.
167  *********************************************************************/
168
169 static char *igb_strings[] = {
170         "Intel(R) PRO/1000 Network Connection"
171 };
172
173 /*********************************************************************
174  *  Function prototypes
175  *********************************************************************/
176 static int      igb_probe(device_t);
177 static int      igb_attach(device_t);
178 static int      igb_detach(device_t);
179 static int      igb_shutdown(device_t);
180 static int      igb_suspend(device_t);
181 static int      igb_resume(device_t);
182 #if __FreeBSD_version >= 800000
183 static int      igb_mq_start(struct ifnet *, struct mbuf *);
184 static int      igb_mq_start_locked(struct ifnet *,
185                     struct tx_ring *, struct mbuf *);
186 static void     igb_qflush(struct ifnet *);
187 static void     igb_deferred_mq_start(void *, int);
188 #else
189 static void     igb_start(struct ifnet *);
190 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
191 #endif
192 static int      igb_ioctl(struct ifnet *, u_long, caddr_t);
193 static void     igb_init(void *);
194 static void     igb_init_locked(struct adapter *);
195 static void     igb_stop(void *);
196 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
197 static int      igb_media_change(struct ifnet *);
198 static void     igb_identify_hardware(struct adapter *);
199 static int      igb_allocate_pci_resources(struct adapter *);
200 static int      igb_allocate_msix(struct adapter *);
201 static int      igb_allocate_legacy(struct adapter *);
202 static int      igb_setup_msix(struct adapter *);
203 static void     igb_free_pci_resources(struct adapter *);
204 static void     igb_local_timer(void *);
205 static void     igb_reset(struct adapter *);
206 static int      igb_setup_interface(device_t, struct adapter *);
207 static int      igb_allocate_queues(struct adapter *);
208 static void     igb_configure_queues(struct adapter *);
209
210 static int      igb_allocate_transmit_buffers(struct tx_ring *);
211 static void     igb_setup_transmit_structures(struct adapter *);
212 static void     igb_setup_transmit_ring(struct tx_ring *);
213 static void     igb_initialize_transmit_units(struct adapter *);
214 static void     igb_free_transmit_structures(struct adapter *);
215 static void     igb_free_transmit_buffers(struct tx_ring *);
216
217 static int      igb_allocate_receive_buffers(struct rx_ring *);
218 static int      igb_setup_receive_structures(struct adapter *);
219 static int      igb_setup_receive_ring(struct rx_ring *);
220 static void     igb_initialize_receive_units(struct adapter *);
221 static void     igb_free_receive_structures(struct adapter *);
222 static void     igb_free_receive_buffers(struct rx_ring *);
223 static void     igb_free_receive_ring(struct rx_ring *);
224
225 static void     igb_enable_intr(struct adapter *);
226 static void     igb_disable_intr(struct adapter *);
227 static void     igb_update_stats_counters(struct adapter *);
228 static bool     igb_txeof(struct tx_ring *);
229
230 static __inline void igb_rx_discard(struct rx_ring *, int);
231 static __inline void igb_rx_input(struct rx_ring *,
232                     struct ifnet *, struct mbuf *, u32);
233
234 static bool     igb_rxeof(struct igb_queue *, int, int *);
235 static void     igb_rx_checksum(u32, struct mbuf *, u32);
236 static bool     igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
237 static bool     igb_tso_setup(struct tx_ring *, struct mbuf *, int,
238                     struct ip *, struct tcphdr *);
239 static void     igb_set_promisc(struct adapter *);
240 static void     igb_disable_promisc(struct adapter *);
241 static void     igb_set_multi(struct adapter *);
242 static void     igb_update_link_status(struct adapter *);
243 static void     igb_refresh_mbufs(struct rx_ring *, int);
244
245 static void     igb_register_vlan(void *, struct ifnet *, u16);
246 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
247 static void     igb_setup_vlan_hw_support(struct adapter *);
248
249 static int      igb_xmit(struct tx_ring *, struct mbuf **);
250 static int      igb_dma_malloc(struct adapter *, bus_size_t,
251                     struct igb_dma_alloc *, int);
252 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
253 static int      igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
254 static void     igb_print_nvm_info(struct adapter *);
255 static int      igb_is_valid_ether_addr(u8 *);
256 static void     igb_add_hw_stats(struct adapter *);
257
258 static void     igb_vf_init_stats(struct adapter *);
259 static void     igb_update_vf_stats_counters(struct adapter *);
260
261 /* Management and WOL Support */
262 static void     igb_init_manageability(struct adapter *);
263 static void     igb_release_manageability(struct adapter *);
264 static void     igb_get_hw_control(struct adapter *);
265 static void     igb_release_hw_control(struct adapter *);
266 static void     igb_enable_wakeup(device_t);
267 static void     igb_led_func(void *, int);
268
269 static int      igb_irq_fast(void *);
270 static void     igb_msix_que(void *);
271 static void     igb_msix_link(void *);
272 static void     igb_handle_que(void *context, int pending);
273 static void     igb_handle_link(void *context, int pending);
274 static void     igb_handle_link_locked(struct adapter *);
275
276 static void     igb_set_sysctl_value(struct adapter *, const char *,
277                     const char *, int *, int);
278 static int      igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
279 static int      igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
280 static int      igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
281
282 #ifdef DEVICE_POLLING
283 static poll_handler_t igb_poll;
284 #endif /* POLLING */
285
286 /*********************************************************************
287  *  FreeBSD Device Interface Entry Points
288  *********************************************************************/
289
290 static device_method_t igb_methods[] = {
291         /* Device interface */
292         DEVMETHOD(device_probe, igb_probe),
293         DEVMETHOD(device_attach, igb_attach),
294         DEVMETHOD(device_detach, igb_detach),
295         DEVMETHOD(device_shutdown, igb_shutdown),
296         DEVMETHOD(device_suspend, igb_suspend),
297         DEVMETHOD(device_resume, igb_resume),
298         {0, 0}
299 };
300
301 static driver_t igb_driver = {
302         "igb", igb_methods, sizeof(struct adapter),
303 };
304
305 static devclass_t igb_devclass;
306 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
307 MODULE_DEPEND(igb, pci, 1, 1, 1);
308 MODULE_DEPEND(igb, ether, 1, 1, 1);
309
310 /*********************************************************************
311  *  Tunable default values.
312  *********************************************************************/
313
314 static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
315
316 /* Descriptor defaults */
317 static int igb_rxd = IGB_DEFAULT_RXD;
318 static int igb_txd = IGB_DEFAULT_TXD;
319 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
320 TUNABLE_INT("hw.igb.txd", &igb_txd);
321 SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
322     "Number of receive descriptors per queue");
323 SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
324     "Number of transmit descriptors per queue");
325
326 /*
327 ** AIM: Adaptive Interrupt Moderation
328 ** which means that the interrupt rate
329 ** is varied over time based on the
330 ** traffic for that interrupt vector
331 */
332 static int igb_enable_aim = TRUE;
333 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
334 SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
335     "Enable adaptive interrupt moderation");
336
337 /*
338  * MSIX should be the default for best performance,
339  * but this allows it to be forced off for testing.
340  */         
341 static int igb_enable_msix = 1;
342 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
343 SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
344     "Enable MSI-X interrupts");
345
346 /*
347 ** Tuneable Interrupt rate
348 */
349 static int igb_max_interrupt_rate = 8000;
350 TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
351 SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
352     &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
353
354 /*
355 ** Header split causes the packet header to
356 ** be dma'd to a seperate mbuf from the payload.
357 ** this can have memory alignment benefits. But
358 ** another plus is that small packets often fit
359 ** into the header and thus use no cluster. Its
360 ** a very workload dependent type feature.
361 */
362 static int igb_header_split = FALSE;
363 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
364 SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
365     "Enable receive mbuf header split");
366
367 /*
368 ** This will autoconfigure based on
369 ** the number of CPUs if left at 0.
370 */
371 static int igb_num_queues = 0;
372 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
373 SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
374     "Number of queues to configure, 0 indicates autoconfigure");
375
376 /*
377 ** Global variable to store last used CPU when binding queues
378 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
379 ** queue is bound to a cpu.
380 */
381 static int igb_last_bind_cpu = -1;
382
383 /* How many packets rxeof tries to clean at a time */
384 static int igb_rx_process_limit = 100;
385 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
386 SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
387     &igb_rx_process_limit, 0,
388     "Maximum number of received packets to process at a time, -1 means unlimited");
389
390 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
391 #include <dev/netmap/if_igb_netmap.h>
392 #endif /* DEV_NETMAP */
393 /*********************************************************************
394  *  Device identification routine
395  *
396  *  igb_probe determines if the driver should be loaded on
397  *  adapter based on PCI vendor/device id of the adapter.
398  *
399  *  return BUS_PROBE_DEFAULT on success, positive on failure
400  *********************************************************************/
401
402 static int
403 igb_probe(device_t dev)
404 {
405         char            adapter_name[60];
406         uint16_t        pci_vendor_id = 0;
407         uint16_t        pci_device_id = 0;
408         uint16_t        pci_subvendor_id = 0;
409         uint16_t        pci_subdevice_id = 0;
410         igb_vendor_info_t *ent;
411
412         INIT_DEBUGOUT("igb_probe: begin");
413
414         pci_vendor_id = pci_get_vendor(dev);
415         if (pci_vendor_id != IGB_VENDOR_ID)
416                 return (ENXIO);
417
418         pci_device_id = pci_get_device(dev);
419         pci_subvendor_id = pci_get_subvendor(dev);
420         pci_subdevice_id = pci_get_subdevice(dev);
421
422         ent = igb_vendor_info_array;
423         while (ent->vendor_id != 0) {
424                 if ((pci_vendor_id == ent->vendor_id) &&
425                     (pci_device_id == ent->device_id) &&
426
427                     ((pci_subvendor_id == ent->subvendor_id) ||
428                     (ent->subvendor_id == PCI_ANY_ID)) &&
429
430                     ((pci_subdevice_id == ent->subdevice_id) ||
431                     (ent->subdevice_id == PCI_ANY_ID))) {
432                         sprintf(adapter_name, "%s %s",
433                                 igb_strings[ent->index],
434                                 igb_driver_version);
435                         device_set_desc_copy(dev, adapter_name);
436                         return (BUS_PROBE_DEFAULT);
437                 }
438                 ent++;
439         }
440
441         return (ENXIO);
442 }
443
444 /*********************************************************************
445  *  Device initialization routine
446  *
447  *  The attach entry point is called when the driver is being loaded.
448  *  This routine identifies the type of hardware, allocates all resources
449  *  and initializes the hardware.
450  *
451  *  return 0 on success, positive on failure
452  *********************************************************************/
453
454 static int
455 igb_attach(device_t dev)
456 {
457         struct adapter  *adapter;
458         int             error = 0;
459         u16             eeprom_data;
460
461         INIT_DEBUGOUT("igb_attach: begin");
462
463         if (resource_disabled("igb", device_get_unit(dev))) {
464                 device_printf(dev, "Disabled by device hint\n");
465                 return (ENXIO);
466         }
467
468         adapter = device_get_softc(dev);
469         adapter->dev = adapter->osdep.dev = dev;
470         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
471
472         /* SYSCTL stuff */
473         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
474             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
475             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
476             igb_sysctl_nvm_info, "I", "NVM Information");
477
478         igb_set_sysctl_value(adapter, "enable_aim",
479             "Interrupt Moderation", &adapter->enable_aim,
480             igb_enable_aim);
481
482         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
483             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
484             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
485             adapter, 0, igb_set_flowcntl, "I", "Flow Control");
486
487         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
488
489         /* Determine hardware and mac info */
490         igb_identify_hardware(adapter);
491
492         /* Setup PCI resources */
493         if (igb_allocate_pci_resources(adapter)) {
494                 device_printf(dev, "Allocation of PCI resources failed\n");
495                 error = ENXIO;
496                 goto err_pci;
497         }
498
499         /* Do Shared Code initialization */
500         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
501                 device_printf(dev, "Setup of Shared code failed\n");
502                 error = ENXIO;
503                 goto err_pci;
504         }
505
506         e1000_get_bus_info(&adapter->hw);
507
508         /* Sysctl for limiting the amount of work done in the taskqueue */
509         igb_set_sysctl_value(adapter, "rx_processing_limit",
510             "max number of rx packets to process",
511             &adapter->rx_process_limit, igb_rx_process_limit);
512
513         /*
514          * Validate number of transmit and receive descriptors. It
515          * must not exceed hardware maximum, and must be multiple
516          * of E1000_DBA_ALIGN.
517          */
518         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
519             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
520                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
521                     IGB_DEFAULT_TXD, igb_txd);
522                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
523         } else
524                 adapter->num_tx_desc = igb_txd;
525         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
526             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
527                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
528                     IGB_DEFAULT_RXD, igb_rxd);
529                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
530         } else
531                 adapter->num_rx_desc = igb_rxd;
532
533         adapter->hw.mac.autoneg = DO_AUTO_NEG;
534         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
535         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
536
537         /* Copper options */
538         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
539                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
540                 adapter->hw.phy.disable_polarity_correction = FALSE;
541                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
542         }
543
544         /*
545          * Set the frame limits assuming
546          * standard ethernet sized frames.
547          */
548         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
549         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
550
551         /*
552         ** Allocate and Setup Queues
553         */
554         if (igb_allocate_queues(adapter)) {
555                 error = ENOMEM;
556                 goto err_pci;
557         }
558
559         /* Allocate the appropriate stats memory */
560         if (adapter->vf_ifp) {
561                 adapter->stats =
562                     (struct e1000_vf_stats *)malloc(sizeof \
563                     (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
564                 igb_vf_init_stats(adapter);
565         } else
566                 adapter->stats =
567                     (struct e1000_hw_stats *)malloc(sizeof \
568                     (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
569         if (adapter->stats == NULL) {
570                 device_printf(dev, "Can not allocate stats memory\n");
571                 error = ENOMEM;
572                 goto err_late;
573         }
574
575         /* Allocate multicast array memory. */
576         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
577             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
578         if (adapter->mta == NULL) {
579                 device_printf(dev, "Can not allocate multicast setup array\n");
580                 error = ENOMEM;
581                 goto err_late;
582         }
583
584         /* Some adapter-specific advanced features */
585         if (adapter->hw.mac.type >= e1000_i350) {
586                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
587                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
588                     OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
589                     adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
590                 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
591                     SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
592                     OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
593                     adapter, 0, igb_sysctl_eee, "I",
594                     "Disable Energy Efficient Ethernet");
595                 if (adapter->hw.phy.media_type == e1000_media_type_copper)
596                         e1000_set_eee_i350(&adapter->hw);
597         }
598
599         /*
600         ** Start from a known state, this is
601         ** important in reading the nvm and
602         ** mac from that.
603         */
604         e1000_reset_hw(&adapter->hw);
605
606         /* Make sure we have a good EEPROM before we read from it */
607         if (((adapter->hw.mac.type != e1000_i210) &&
608             (adapter->hw.mac.type != e1000_i211)) &&
609             (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
610                 /*
611                 ** Some PCI-E parts fail the first check due to
612                 ** the link being in sleep state, call it again,
613                 ** if it fails a second time its a real issue.
614                 */
615                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
616                         device_printf(dev,
617                             "The EEPROM Checksum Is Not Valid\n");
618                         error = EIO;
619                         goto err_late;
620                 }
621         }
622
623         /*
624         ** Copy the permanent MAC address out of the EEPROM
625         */
626         if (e1000_read_mac_addr(&adapter->hw) < 0) {
627                 device_printf(dev, "EEPROM read error while reading MAC"
628                     " address\n");
629                 error = EIO;
630                 goto err_late;
631         }
632         /* Check its sanity */
633         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
634                 device_printf(dev, "Invalid MAC address\n");
635                 error = EIO;
636                 goto err_late;
637         }
638
639         /* Setup OS specific network interface */
640         if (igb_setup_interface(dev, adapter) != 0)
641                 goto err_late;
642
643         /* Now get a good starting state */
644         igb_reset(adapter);
645
646         /* Initialize statistics */
647         igb_update_stats_counters(adapter);
648
649         adapter->hw.mac.get_link_status = 1;
650         igb_update_link_status(adapter);
651
652         /* Indicate SOL/IDER usage */
653         if (e1000_check_reset_block(&adapter->hw))
654                 device_printf(dev,
655                     "PHY reset is blocked due to SOL/IDER session.\n");
656
657         /* Determine if we have to control management hardware */
658         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
659
660         /*
661          * Setup Wake-on-Lan
662          */
663         /* APME bit in EEPROM is mapped to WUC.APME */
664         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
665         if (eeprom_data)
666                 adapter->wol = E1000_WUFC_MAG;
667
668         /* Register for VLAN events */
669         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
670              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
671         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
672              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
673
674         igb_add_hw_stats(adapter);
675
676         /* Tell the stack that the interface is not active */
677         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
678         adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
679
680         adapter->led_dev = led_create(igb_led_func, adapter,
681             device_get_nameunit(dev));
682
683         /* 
684         ** Configure Interrupts
685         */
686         if ((adapter->msix > 1) && (igb_enable_msix))
687                 error = igb_allocate_msix(adapter);
688         else /* MSI or Legacy */
689                 error = igb_allocate_legacy(adapter);
690         if (error)
691                 goto err_late;
692
693 #ifdef DEV_NETMAP
694         igb_netmap_attach(adapter);
695 #endif /* DEV_NETMAP */
696         INIT_DEBUGOUT("igb_attach: end");
697
698         return (0);
699
700 err_late:
701         igb_detach(dev);
702         igb_free_transmit_structures(adapter);
703         igb_free_receive_structures(adapter);
704         igb_release_hw_control(adapter);
705 err_pci:
706         igb_free_pci_resources(adapter);
707         if (adapter->ifp != NULL)
708                 if_free(adapter->ifp);
709         free(adapter->mta, M_DEVBUF);
710         IGB_CORE_LOCK_DESTROY(adapter);
711
712         return (error);
713 }
714
715 /*********************************************************************
716  *  Device removal routine
717  *
718  *  The detach entry point is called when the driver is being removed.
719  *  This routine stops the adapter and deallocates all the resources
720  *  that were allocated for driver operation.
721  *
722  *  return 0 on success, positive on failure
723  *********************************************************************/
724
725 static int
726 igb_detach(device_t dev)
727 {
728         struct adapter  *adapter = device_get_softc(dev);
729         struct ifnet    *ifp = adapter->ifp;
730
731         INIT_DEBUGOUT("igb_detach: begin");
732
733         /* Make sure VLANS are not using driver */
734         if (adapter->ifp->if_vlantrunk != NULL) {
735                 device_printf(dev,"Vlan in use, detach first\n");
736                 return (EBUSY);
737         }
738
739         ether_ifdetach(adapter->ifp);
740
741         if (adapter->led_dev != NULL)
742                 led_destroy(adapter->led_dev);
743
744 #ifdef DEVICE_POLLING
745         if (ifp->if_capenable & IFCAP_POLLING)
746                 ether_poll_deregister(ifp);
747 #endif
748
749         IGB_CORE_LOCK(adapter);
750         adapter->in_detach = 1;
751         igb_stop(adapter);
752         IGB_CORE_UNLOCK(adapter);
753
754         e1000_phy_hw_reset(&adapter->hw);
755
756         /* Give control back to firmware */
757         igb_release_manageability(adapter);
758         igb_release_hw_control(adapter);
759
760         if (adapter->wol) {
761                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
762                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
763                 igb_enable_wakeup(dev);
764         }
765
766         /* Unregister VLAN events */
767         if (adapter->vlan_attach != NULL)
768                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
769         if (adapter->vlan_detach != NULL)
770                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
771
772         callout_drain(&adapter->timer);
773
774 #ifdef DEV_NETMAP
775         netmap_detach(adapter->ifp);
776 #endif /* DEV_NETMAP */
777         igb_free_pci_resources(adapter);
778         bus_generic_detach(dev);
779         if_free(ifp);
780
781         igb_free_transmit_structures(adapter);
782         igb_free_receive_structures(adapter);
783         if (adapter->mta != NULL)
784                 free(adapter->mta, M_DEVBUF);
785
786         IGB_CORE_LOCK_DESTROY(adapter);
787
788         return (0);
789 }
790
791 /*********************************************************************
792  *
793  *  Shutdown entry point
794  *
795  **********************************************************************/
796
797 static int
798 igb_shutdown(device_t dev)
799 {
800         return igb_suspend(dev);
801 }
802
803 /*
804  * Suspend/resume device methods.
805  */
806 static int
807 igb_suspend(device_t dev)
808 {
809         struct adapter *adapter = device_get_softc(dev);
810
811         IGB_CORE_LOCK(adapter);
812
813         igb_stop(adapter);
814
815         igb_release_manageability(adapter);
816         igb_release_hw_control(adapter);
817
818         if (adapter->wol) {
819                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
820                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
821                 igb_enable_wakeup(dev);
822         }
823
824         IGB_CORE_UNLOCK(adapter);
825
826         return bus_generic_suspend(dev);
827 }
828
829 static int
830 igb_resume(device_t dev)
831 {
832         struct adapter *adapter = device_get_softc(dev);
833         struct tx_ring  *txr = adapter->tx_rings;
834         struct ifnet *ifp = adapter->ifp;
835
836         IGB_CORE_LOCK(adapter);
837         igb_init_locked(adapter);
838         igb_init_manageability(adapter);
839
840         if ((ifp->if_flags & IFF_UP) &&
841             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
842                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
843                         IGB_TX_LOCK(txr);
844 #if __FreeBSD_version >= 800000
845                         /* Process the stack queue only if not depleted */
846                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
847                             !drbr_empty(ifp, txr->br))
848                                 igb_mq_start_locked(ifp, txr, NULL);
849 #else
850                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
851                                 igb_start_locked(txr, ifp);
852 #endif
853                         IGB_TX_UNLOCK(txr);
854                 }
855         }
856         IGB_CORE_UNLOCK(adapter);
857
858         return bus_generic_resume(dev);
859 }
860
861
862 #if __FreeBSD_version < 800000
863
864 /*********************************************************************
865  *  Transmit entry point
866  *
867  *  igb_start is called by the stack to initiate a transmit.
868  *  The driver will remain in this routine as long as there are
869  *  packets to transmit and transmit resources are available.
870  *  In case resources are not available stack is notified and
871  *  the packet is requeued.
872  **********************************************************************/
873
874 static void
875 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
876 {
877         struct adapter  *adapter = ifp->if_softc;
878         struct mbuf     *m_head;
879
880         IGB_TX_LOCK_ASSERT(txr);
881
882         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
883             IFF_DRV_RUNNING)
884                 return;
885         if (!adapter->link_active)
886                 return;
887
888         /* Call cleanup if number of TX descriptors low */
889         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
890                 igb_txeof(txr);
891
892         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
893                 if (txr->tx_avail <= IGB_MAX_SCATTER) {
894                         txr->queue_status |= IGB_QUEUE_DEPLETED;
895                         break;
896                 }
897                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
898                 if (m_head == NULL)
899                         break;
900                 /*
901                  *  Encapsulation can modify our pointer, and or make it
902                  *  NULL on failure.  In that event, we can't requeue.
903                  */
904                 if (igb_xmit(txr, &m_head)) {
905                         if (m_head != NULL)
906                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
907                         if (txr->tx_avail <= IGB_MAX_SCATTER)
908                                 txr->queue_status |= IGB_QUEUE_DEPLETED;
909                         break;
910                 }
911
912                 /* Send a copy of the frame to the BPF listener */
913                 ETHER_BPF_MTAP(ifp, m_head);
914
915                 /* Set watchdog on */
916                 txr->watchdog_time = ticks;
917                 txr->queue_status |= IGB_QUEUE_WORKING;
918         }
919 }
920  
921 /*
922  * Legacy TX driver routine, called from the
923  * stack, always uses tx[0], and spins for it.
924  * Should not be used with multiqueue tx
925  */
926 static void
927 igb_start(struct ifnet *ifp)
928 {
929         struct adapter  *adapter = ifp->if_softc;
930         struct tx_ring  *txr = adapter->tx_rings;
931
932         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
933                 IGB_TX_LOCK(txr);
934                 igb_start_locked(txr, ifp);
935                 IGB_TX_UNLOCK(txr);
936         }
937         return;
938 }
939
940 #else /* __FreeBSD_version >= 800000 */
941
942 /*
943 ** Multiqueue Transmit driver
944 **
945 */
946 static int
947 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
948 {
949         struct adapter          *adapter = ifp->if_softc;
950         struct igb_queue        *que;
951         struct tx_ring          *txr;
952         int                     i, err = 0;
953
954         /* Which queue to use */
955         if ((m->m_flags & M_FLOWID) != 0)
956                 i = m->m_pkthdr.flowid % adapter->num_queues;
957         else
958                 i = curcpu % adapter->num_queues;
959
960         txr = &adapter->tx_rings[i];
961         que = &adapter->queues[i];
962         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
963             IGB_TX_TRYLOCK(txr)) {
964                 err = igb_mq_start_locked(ifp, txr, m);
965                 IGB_TX_UNLOCK(txr);
966         } else {
967                 err = drbr_enqueue(ifp, txr->br, m);
968                 taskqueue_enqueue(que->tq, &txr->txq_task);
969         }
970
971         return (err);
972 }
973
974 static int
975 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
976 {
977         struct adapter  *adapter = txr->adapter;
978         struct mbuf     *next;
979         int             err = 0, enq;
980
981         IGB_TX_LOCK_ASSERT(txr);
982
983         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
984             (txr->queue_status == IGB_QUEUE_DEPLETED) ||
985             adapter->link_active == 0) {
986                 if (m != NULL)
987                         err = drbr_enqueue(ifp, txr->br, m);
988                 return (err);
989         }
990
991         enq = 0;
992         if (m == NULL) {
993                 next = drbr_dequeue(ifp, txr->br);
994         } else if (drbr_needs_enqueue(ifp, txr->br)) {
995                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
996                         return (err);
997                 next = drbr_dequeue(ifp, txr->br);
998         } else
999                 next = m;
1000
1001         /* Process the queue */
1002         while (next != NULL) {
1003                 if ((err = igb_xmit(txr, &next)) != 0) {
1004                         if (next != NULL)
1005                                 err = drbr_enqueue(ifp, txr->br, next);
1006                         break;
1007                 }
1008                 enq++;
1009                 ifp->if_obytes += next->m_pkthdr.len;
1010                 if (next->m_flags & M_MCAST)
1011                         ifp->if_omcasts++;
1012                 ETHER_BPF_MTAP(ifp, next);
1013                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1014                         break;
1015                 next = drbr_dequeue(ifp, txr->br);
1016         }
1017         if (enq > 0) {
1018                 /* Set the watchdog */
1019                 txr->queue_status |= IGB_QUEUE_WORKING;
1020                 txr->watchdog_time = ticks;
1021         }
1022         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1023                 igb_txeof(txr);
1024         if (txr->tx_avail <= IGB_MAX_SCATTER)
1025                 txr->queue_status |= IGB_QUEUE_DEPLETED;
1026         return (err);
1027 }
1028
1029 /*
1030  * Called from a taskqueue to drain queued transmit packets.
1031  */
1032 static void
1033 igb_deferred_mq_start(void *arg, int pending)
1034 {
1035         struct tx_ring *txr = arg;
1036         struct adapter *adapter = txr->adapter;
1037         struct ifnet *ifp = adapter->ifp;
1038
1039         IGB_TX_LOCK(txr);
1040         if (!drbr_empty(ifp, txr->br))
1041                 igb_mq_start_locked(ifp, txr, NULL);
1042         IGB_TX_UNLOCK(txr);
1043 }
1044
1045 /*
1046 ** Flush all ring buffers
1047 */
1048 static void
1049 igb_qflush(struct ifnet *ifp)
1050 {
1051         struct adapter  *adapter = ifp->if_softc;
1052         struct tx_ring  *txr = adapter->tx_rings;
1053         struct mbuf     *m;
1054
1055         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1056                 IGB_TX_LOCK(txr);
1057                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1058                         m_freem(m);
1059                 IGB_TX_UNLOCK(txr);
1060         }
1061         if_qflush(ifp);
1062 }
1063 #endif /* __FreeBSD_version >= 800000 */
1064
1065 /*********************************************************************
1066  *  Ioctl entry point
1067  *
1068  *  igb_ioctl is called when the user wants to configure the
1069  *  interface.
1070  *
1071  *  return 0 on success, positive on failure
1072  **********************************************************************/
1073
1074 static int
1075 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1076 {
1077         struct adapter  *adapter = ifp->if_softc;
1078         struct ifreq    *ifr = (struct ifreq *)data;
1079 #if defined(INET) || defined(INET6)
1080         struct ifaddr   *ifa = (struct ifaddr *)data;
1081 #endif
1082         bool            avoid_reset = FALSE;
1083         int             error = 0;
1084
1085         if (adapter->in_detach)
1086                 return (error);
1087
1088         switch (command) {
1089         case SIOCSIFADDR:
1090 #ifdef INET
1091                 if (ifa->ifa_addr->sa_family == AF_INET)
1092                         avoid_reset = TRUE;
1093 #endif
1094 #ifdef INET6
1095                 if (ifa->ifa_addr->sa_family == AF_INET6)
1096                         avoid_reset = TRUE;
1097 #endif
1098                 /*
1099                 ** Calling init results in link renegotiation,
1100                 ** so we avoid doing it when possible.
1101                 */
1102                 if (avoid_reset) {
1103                         ifp->if_flags |= IFF_UP;
1104                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1105                                 igb_init(adapter);
1106 #ifdef INET
1107                         if (!(ifp->if_flags & IFF_NOARP))
1108                                 arp_ifinit(ifp, ifa);
1109 #endif
1110                 } else
1111                         error = ether_ioctl(ifp, command, data);
1112                 break;
1113         case SIOCSIFMTU:
1114             {
1115                 int max_frame_size;
1116
1117                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1118
1119                 IGB_CORE_LOCK(adapter);
1120                 max_frame_size = 9234;
1121                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1122                     ETHER_CRC_LEN) {
1123                         IGB_CORE_UNLOCK(adapter);
1124                         error = EINVAL;
1125                         break;
1126                 }
1127
1128                 ifp->if_mtu = ifr->ifr_mtu;
1129                 adapter->max_frame_size =
1130                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1131                 igb_init_locked(adapter);
1132                 IGB_CORE_UNLOCK(adapter);
1133                 break;
1134             }
1135         case SIOCSIFFLAGS:
1136                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1137                     SIOCSIFFLAGS (Set Interface Flags)");
1138                 IGB_CORE_LOCK(adapter);
1139                 if (ifp->if_flags & IFF_UP) {
1140                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1141                                 if ((ifp->if_flags ^ adapter->if_flags) &
1142                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1143                                         igb_disable_promisc(adapter);
1144                                         igb_set_promisc(adapter);
1145                                 }
1146                         } else
1147                                 igb_init_locked(adapter);
1148                 } else
1149                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1150                                 igb_stop(adapter);
1151                 adapter->if_flags = ifp->if_flags;
1152                 IGB_CORE_UNLOCK(adapter);
1153                 break;
1154         case SIOCADDMULTI:
1155         case SIOCDELMULTI:
1156                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1157                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1158                         IGB_CORE_LOCK(adapter);
1159                         igb_disable_intr(adapter);
1160                         igb_set_multi(adapter);
1161 #ifdef DEVICE_POLLING
1162                         if (!(ifp->if_capenable & IFCAP_POLLING))
1163 #endif
1164                                 igb_enable_intr(adapter);
1165                         IGB_CORE_UNLOCK(adapter);
1166                 }
1167                 break;
1168         case SIOCSIFMEDIA:
1169                 /* Check SOL/IDER usage */
1170                 IGB_CORE_LOCK(adapter);
1171                 if (e1000_check_reset_block(&adapter->hw)) {
1172                         IGB_CORE_UNLOCK(adapter);
1173                         device_printf(adapter->dev, "Media change is"
1174                             " blocked due to SOL/IDER session.\n");
1175                         break;
1176                 }
1177                 IGB_CORE_UNLOCK(adapter);
1178         case SIOCGIFMEDIA:
1179                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1180                     SIOCxIFMEDIA (Get/Set Interface Media)");
1181                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1182                 break;
1183         case SIOCSIFCAP:
1184             {
1185                 int mask, reinit;
1186
1187                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1188                 reinit = 0;
1189                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1190 #ifdef DEVICE_POLLING
1191                 if (mask & IFCAP_POLLING) {
1192                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1193                                 error = ether_poll_register(igb_poll, ifp);
1194                                 if (error)
1195                                         return (error);
1196                                 IGB_CORE_LOCK(adapter);
1197                                 igb_disable_intr(adapter);
1198                                 ifp->if_capenable |= IFCAP_POLLING;
1199                                 IGB_CORE_UNLOCK(adapter);
1200                         } else {
1201                                 error = ether_poll_deregister(ifp);
1202                                 /* Enable interrupt even in error case */
1203                                 IGB_CORE_LOCK(adapter);
1204                                 igb_enable_intr(adapter);
1205                                 ifp->if_capenable &= ~IFCAP_POLLING;
1206                                 IGB_CORE_UNLOCK(adapter);
1207                         }
1208                 }
1209 #endif
1210                 if (mask & IFCAP_HWCSUM) {
1211                         ifp->if_capenable ^= IFCAP_HWCSUM;
1212                         reinit = 1;
1213                 }
1214                 if (mask & IFCAP_TSO4) {
1215                         ifp->if_capenable ^= IFCAP_TSO4;
1216                         reinit = 1;
1217                 }
1218                 if (mask & IFCAP_VLAN_HWTAGGING) {
1219                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1220                         reinit = 1;
1221                 }
1222                 if (mask & IFCAP_VLAN_HWFILTER) {
1223                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1224                         reinit = 1;
1225                 }
1226                 if (mask & IFCAP_VLAN_HWTSO) {
1227                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1228                         reinit = 1;
1229                 }
1230                 if (mask & IFCAP_LRO) {
1231                         ifp->if_capenable ^= IFCAP_LRO;
1232                         reinit = 1;
1233                 }
1234                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1235                         igb_init(adapter);
1236                 VLAN_CAPABILITIES(ifp);
1237                 break;
1238             }
1239
1240         default:
1241                 error = ether_ioctl(ifp, command, data);
1242                 break;
1243         }
1244
1245         return (error);
1246 }
1247
1248
1249 /*********************************************************************
1250  *  Init entry point
1251  *
1252  *  This routine is used in two ways. It is used by the stack as
1253  *  init entry point in network interface structure. It is also used
1254  *  by the driver as a hw/sw initialization routine to get to a
1255  *  consistent state.
1256  *
1257  *  return 0 on success, positive on failure
1258  **********************************************************************/
1259
1260 static void
1261 igb_init_locked(struct adapter *adapter)
1262 {
1263         struct ifnet    *ifp = adapter->ifp;
1264         device_t        dev = adapter->dev;
1265
1266         INIT_DEBUGOUT("igb_init: begin");
1267
1268         IGB_CORE_LOCK_ASSERT(adapter);
1269
1270         igb_disable_intr(adapter);
1271         callout_stop(&adapter->timer);
1272
1273         /* Get the latest mac address, User can use a LAA */
1274         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1275               ETHER_ADDR_LEN);
1276
1277         /* Put the address into the Receive Address Array */
1278         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1279
1280         igb_reset(adapter);
1281         igb_update_link_status(adapter);
1282
1283         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1284
1285         /* Set hardware offload abilities */
1286         ifp->if_hwassist = 0;
1287         if (ifp->if_capenable & IFCAP_TXCSUM) {
1288                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1289 #if __FreeBSD_version >= 800000
1290                 if (adapter->hw.mac.type == e1000_82576)
1291                         ifp->if_hwassist |= CSUM_SCTP;
1292 #endif
1293         }
1294
1295         if (ifp->if_capenable & IFCAP_TSO4)
1296                 ifp->if_hwassist |= CSUM_TSO;
1297
1298         /* Configure for OS presence */
1299         igb_init_manageability(adapter);
1300
1301         /* Prepare transmit descriptors and buffers */
1302         igb_setup_transmit_structures(adapter);
1303         igb_initialize_transmit_units(adapter);
1304
1305         /* Setup Multicast table */
1306         igb_set_multi(adapter);
1307
1308         /*
1309         ** Figure out the desired mbuf pool
1310         ** for doing jumbo/packetsplit
1311         */
1312         if (adapter->max_frame_size <= 2048)
1313                 adapter->rx_mbuf_sz = MCLBYTES;
1314         else if (adapter->max_frame_size <= 4096)
1315                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1316         else
1317                 adapter->rx_mbuf_sz = MJUM9BYTES;
1318
1319         /* Prepare receive descriptors and buffers */
1320         if (igb_setup_receive_structures(adapter)) {
1321                 device_printf(dev, "Could not setup receive structures\n");
1322                 return;
1323         }
1324         igb_initialize_receive_units(adapter);
1325
1326         /* Enable VLAN support */
1327         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1328                 igb_setup_vlan_hw_support(adapter);
1329                                 
1330         /* Don't lose promiscuous settings */
1331         igb_set_promisc(adapter);
1332
1333         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1334         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1335
1336         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1337         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1338
1339         if (adapter->msix > 1) /* Set up queue routing */
1340                 igb_configure_queues(adapter);
1341
1342         /* this clears any pending interrupts */
1343         E1000_READ_REG(&adapter->hw, E1000_ICR);
1344 #ifdef DEVICE_POLLING
1345         /*
1346          * Only enable interrupts if we are not polling, make sure
1347          * they are off otherwise.
1348          */
1349         if (ifp->if_capenable & IFCAP_POLLING)
1350                 igb_disable_intr(adapter);
1351         else
1352 #endif /* DEVICE_POLLING */
1353         {
1354                 igb_enable_intr(adapter);
1355                 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1356         }
1357
1358         /* Set Energy Efficient Ethernet */
1359         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1360                 e1000_set_eee_i350(&adapter->hw);
1361 }
1362
1363 static void
1364 igb_init(void *arg)
1365 {
1366         struct adapter *adapter = arg;
1367
1368         IGB_CORE_LOCK(adapter);
1369         igb_init_locked(adapter);
1370         IGB_CORE_UNLOCK(adapter);
1371 }
1372
1373
1374 static void
1375 igb_handle_que(void *context, int pending)
1376 {
1377         struct igb_queue *que = context;
1378         struct adapter *adapter = que->adapter;
1379         struct tx_ring *txr = que->txr;
1380         struct ifnet    *ifp = adapter->ifp;
1381
1382         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1383                 bool    more;
1384
1385                 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1386
1387                 IGB_TX_LOCK(txr);
1388                 igb_txeof(txr);
1389 #if __FreeBSD_version >= 800000
1390                 /* Process the stack queue only if not depleted */
1391                 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1392                     !drbr_empty(ifp, txr->br))
1393                         igb_mq_start_locked(ifp, txr, NULL);
1394 #else
1395                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1396                         igb_start_locked(txr, ifp);
1397 #endif
1398                 IGB_TX_UNLOCK(txr);
1399                 /* Do we need another? */
1400                 if (more) {
1401                         taskqueue_enqueue(que->tq, &que->que_task);
1402                         return;
1403                 }
1404         }
1405
1406 #ifdef DEVICE_POLLING
1407         if (ifp->if_capenable & IFCAP_POLLING)
1408                 return;
1409 #endif
1410         /* Reenable this interrupt */
1411         if (que->eims)
1412                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1413         else
1414                 igb_enable_intr(adapter);
1415 }
1416
1417 /* Deal with link in a sleepable context */
1418 static void
1419 igb_handle_link(void *context, int pending)
1420 {
1421         struct adapter *adapter = context;
1422
1423         IGB_CORE_LOCK(adapter);
1424         igb_handle_link_locked(adapter);
1425         IGB_CORE_UNLOCK(adapter);
1426 }
1427
1428 static void
1429 igb_handle_link_locked(struct adapter *adapter)
1430 {
1431         struct tx_ring  *txr = adapter->tx_rings;
1432         struct ifnet *ifp = adapter->ifp;
1433
1434         IGB_CORE_LOCK_ASSERT(adapter);
1435         adapter->hw.mac.get_link_status = 1;
1436         igb_update_link_status(adapter);
1437         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1438                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1439                         IGB_TX_LOCK(txr);
1440 #if __FreeBSD_version >= 800000
1441                         /* Process the stack queue only if not depleted */
1442                         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1443                             !drbr_empty(ifp, txr->br))
1444                                 igb_mq_start_locked(ifp, txr, NULL);
1445 #else
1446                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1447                                 igb_start_locked(txr, ifp);
1448 #endif
1449                         IGB_TX_UNLOCK(txr);
1450                 }
1451         }
1452 }
1453
1454 /*********************************************************************
1455  *
1456  *  MSI/Legacy Deferred
1457  *  Interrupt Service routine  
1458  *
1459  *********************************************************************/
1460 static int
1461 igb_irq_fast(void *arg)
1462 {
1463         struct adapter          *adapter = arg;
1464         struct igb_queue        *que = adapter->queues;
1465         u32                     reg_icr;
1466
1467
1468         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1469
1470         /* Hot eject?  */
1471         if (reg_icr == 0xffffffff)
1472                 return FILTER_STRAY;
1473
1474         /* Definitely not our interrupt.  */
1475         if (reg_icr == 0x0)
1476                 return FILTER_STRAY;
1477
1478         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1479                 return FILTER_STRAY;
1480
1481         /*
1482          * Mask interrupts until the taskqueue is finished running.  This is
1483          * cheap, just assume that it is needed.  This also works around the
1484          * MSI message reordering errata on certain systems.
1485          */
1486         igb_disable_intr(adapter);
1487         taskqueue_enqueue(que->tq, &que->que_task);
1488
1489         /* Link status change */
1490         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1491                 taskqueue_enqueue(que->tq, &adapter->link_task);
1492
1493         if (reg_icr & E1000_ICR_RXO)
1494                 adapter->rx_overruns++;
1495         return FILTER_HANDLED;
1496 }
1497
1498 #ifdef DEVICE_POLLING
1499 /*********************************************************************
1500  *
1501  *  Legacy polling routine : if using this code you MUST be sure that
1502  *  multiqueue is not defined, ie, set igb_num_queues to 1.
1503  *
1504  *********************************************************************/
1505 #if __FreeBSD_version >= 800000
1506 #define POLL_RETURN_COUNT(a) (a)
1507 static int
1508 #else
1509 #define POLL_RETURN_COUNT(a)
1510 static void
1511 #endif
1512 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1513 {
1514         struct adapter          *adapter = ifp->if_softc;
1515         struct igb_queue        *que = adapter->queues;
1516         struct tx_ring          *txr = adapter->tx_rings;
1517         u32                     reg_icr, rx_done = 0;
1518         u32                     loop = IGB_MAX_LOOP;
1519         bool                    more;
1520
1521         IGB_CORE_LOCK(adapter);
1522         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1523                 IGB_CORE_UNLOCK(adapter);
1524                 return POLL_RETURN_COUNT(rx_done);
1525         }
1526
1527         if (cmd == POLL_AND_CHECK_STATUS) {
1528                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1529                 /* Link status change */
1530                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1531                         igb_handle_link_locked(adapter);
1532
1533                 if (reg_icr & E1000_ICR_RXO)
1534                         adapter->rx_overruns++;
1535         }
1536         IGB_CORE_UNLOCK(adapter);
1537
1538         igb_rxeof(que, count, &rx_done);
1539
1540         IGB_TX_LOCK(txr);
1541         do {
1542                 more = igb_txeof(txr);
1543         } while (loop-- && more);
1544 #if __FreeBSD_version >= 800000
1545         if (!drbr_empty(ifp, txr->br))
1546                 igb_mq_start_locked(ifp, txr, NULL);
1547 #else
1548         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1549                 igb_start_locked(txr, ifp);
1550 #endif
1551         IGB_TX_UNLOCK(txr);
1552         return POLL_RETURN_COUNT(rx_done);
1553 }
1554 #endif /* DEVICE_POLLING */
1555
1556 /*********************************************************************
1557  *
1558  *  MSIX Que Interrupt Service routine
1559  *
1560  **********************************************************************/
1561 static void
1562 igb_msix_que(void *arg)
1563 {
1564         struct igb_queue *que = arg;
1565         struct adapter *adapter = que->adapter;
1566         struct ifnet   *ifp = adapter->ifp;
1567         struct tx_ring *txr = que->txr;
1568         struct rx_ring *rxr = que->rxr;
1569         u32             newitr = 0;
1570         bool            more_rx;
1571
1572         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1573         ++que->irqs;
1574
1575         IGB_TX_LOCK(txr);
1576         igb_txeof(txr);
1577 #if __FreeBSD_version >= 800000
1578         /* Process the stack queue only if not depleted */
1579         if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1580             !drbr_empty(ifp, txr->br))
1581                 igb_mq_start_locked(ifp, txr, NULL);
1582 #else
1583         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1584                 igb_start_locked(txr, ifp);
1585 #endif
1586         IGB_TX_UNLOCK(txr);
1587
1588         more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1589
1590         if (adapter->enable_aim == FALSE)
1591                 goto no_calc;
1592         /*
1593         ** Do Adaptive Interrupt Moderation:
1594         **  - Write out last calculated setting
1595         **  - Calculate based on average size over
1596         **    the last interval.
1597         */
1598         if (que->eitr_setting)
1599                 E1000_WRITE_REG(&adapter->hw,
1600                     E1000_EITR(que->msix), que->eitr_setting);
1601  
1602         que->eitr_setting = 0;
1603
1604         /* Idle, do nothing */
1605         if ((txr->bytes == 0) && (rxr->bytes == 0))
1606                 goto no_calc;
1607                                 
1608         /* Used half Default if sub-gig */
1609         if (adapter->link_speed != 1000)
1610                 newitr = IGB_DEFAULT_ITR / 2;
1611         else {
1612                 if ((txr->bytes) && (txr->packets))
1613                         newitr = txr->bytes/txr->packets;
1614                 if ((rxr->bytes) && (rxr->packets))
1615                         newitr = max(newitr,
1616                             (rxr->bytes / rxr->packets));
1617                 newitr += 24; /* account for hardware frame, crc */
1618                 /* set an upper boundary */
1619                 newitr = min(newitr, 3000);
1620                 /* Be nice to the mid range */
1621                 if ((newitr > 300) && (newitr < 1200))
1622                         newitr = (newitr / 3);
1623                 else
1624                         newitr = (newitr / 2);
1625         }
1626         newitr &= 0x7FFC;  /* Mask invalid bits */
1627         if (adapter->hw.mac.type == e1000_82575)
1628                 newitr |= newitr << 16;
1629         else
1630                 newitr |= E1000_EITR_CNT_IGNR;
1631                  
1632         /* save for next interrupt */
1633         que->eitr_setting = newitr;
1634
1635         /* Reset state */
1636         txr->bytes = 0;
1637         txr->packets = 0;
1638         rxr->bytes = 0;
1639         rxr->packets = 0;
1640
1641 no_calc:
1642         /* Schedule a clean task if needed*/
1643         if (more_rx)
1644                 taskqueue_enqueue(que->tq, &que->que_task);
1645         else
1646                 /* Reenable this interrupt */
1647                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1648         return;
1649 }
1650
1651
1652 /*********************************************************************
1653  *
1654  *  MSIX Link Interrupt Service routine
1655  *
1656  **********************************************************************/
1657
1658 static void
1659 igb_msix_link(void *arg)
1660 {
1661         struct adapter  *adapter = arg;
1662         u32             icr;
1663
1664         ++adapter->link_irq;
1665         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1666         if (!(icr & E1000_ICR_LSC))
1667                 goto spurious;
1668         igb_handle_link(adapter, 0);
1669
1670 spurious:
1671         /* Rearm */
1672         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1673         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1674         return;
1675 }
1676
1677
1678 /*********************************************************************
1679  *
1680  *  Media Ioctl callback
1681  *
1682  *  This routine is called whenever the user queries the status of
1683  *  the interface using ifconfig.
1684  *
1685  **********************************************************************/
1686 static void
1687 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1688 {
1689         struct adapter *adapter = ifp->if_softc;
1690         u_char fiber_type = IFM_1000_SX;
1691
1692         INIT_DEBUGOUT("igb_media_status: begin");
1693
1694         IGB_CORE_LOCK(adapter);
1695         igb_update_link_status(adapter);
1696
1697         ifmr->ifm_status = IFM_AVALID;
1698         ifmr->ifm_active = IFM_ETHER;
1699
1700         if (!adapter->link_active) {
1701                 IGB_CORE_UNLOCK(adapter);
1702                 return;
1703         }
1704
1705         ifmr->ifm_status |= IFM_ACTIVE;
1706
1707         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1708             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1709                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1710         else {
1711                 switch (adapter->link_speed) {
1712                 case 10:
1713                         ifmr->ifm_active |= IFM_10_T;
1714                         break;
1715                 case 100:
1716                         ifmr->ifm_active |= IFM_100_TX;
1717                         break;
1718                 case 1000:
1719                         ifmr->ifm_active |= IFM_1000_T;
1720                         break;
1721                 }
1722                 if (adapter->link_duplex == FULL_DUPLEX)
1723                         ifmr->ifm_active |= IFM_FDX;
1724                 else
1725                         ifmr->ifm_active |= IFM_HDX;
1726         }
1727         IGB_CORE_UNLOCK(adapter);
1728 }
1729
1730 /*********************************************************************
1731  *
1732  *  Media Ioctl callback
1733  *
1734  *  This routine is called when the user changes speed/duplex using
1735  *  media/mediopt option with ifconfig.
1736  *
1737  **********************************************************************/
1738 static int
1739 igb_media_change(struct ifnet *ifp)
1740 {
1741         struct adapter *adapter = ifp->if_softc;
1742         struct ifmedia  *ifm = &adapter->media;
1743
1744         INIT_DEBUGOUT("igb_media_change: begin");
1745
1746         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1747                 return (EINVAL);
1748
1749         IGB_CORE_LOCK(adapter);
1750         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1751         case IFM_AUTO:
1752                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1753                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1754                 break;
1755         case IFM_1000_LX:
1756         case IFM_1000_SX:
1757         case IFM_1000_T:
1758                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1759                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1760                 break;
1761         case IFM_100_TX:
1762                 adapter->hw.mac.autoneg = FALSE;
1763                 adapter->hw.phy.autoneg_advertised = 0;
1764                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1765                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1766                 else
1767                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1768                 break;
1769         case IFM_10_T:
1770                 adapter->hw.mac.autoneg = FALSE;
1771                 adapter->hw.phy.autoneg_advertised = 0;
1772                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1773                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1774                 else
1775                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1776                 break;
1777         default:
1778                 device_printf(adapter->dev, "Unsupported media type\n");
1779         }
1780
1781         igb_init_locked(adapter);
1782         IGB_CORE_UNLOCK(adapter);
1783
1784         return (0);
1785 }
1786
1787
1788 /*********************************************************************
1789  *
1790  *  This routine maps the mbufs to Advanced TX descriptors.
1791  *  
1792  **********************************************************************/
1793 static int
1794 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1795 {
1796         struct adapter          *adapter = txr->adapter;
1797         bus_dma_segment_t       segs[IGB_MAX_SCATTER];
1798         bus_dmamap_t            map;
1799         struct igb_tx_buffer    *tx_buffer, *tx_buffer_mapped;
1800         union e1000_adv_tx_desc *txd = NULL;
1801         struct mbuf             *m_head = *m_headp;
1802         struct ether_vlan_header *eh = NULL;
1803         struct ip               *ip = NULL;
1804         struct tcphdr           *th = NULL;
1805         u32                     hdrlen, cmd_type_len, olinfo_status = 0;
1806         int                     ehdrlen, poff;
1807         int                     nsegs, i, first, last = 0;
1808         int                     error, do_tso, remap = 1;
1809
1810         /* Set basic descriptor constants */
1811         cmd_type_len = E1000_ADVTXD_DTYP_DATA;
1812         cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1813         if (m_head->m_flags & M_VLANTAG)
1814                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1815
1816 retry:
1817         m_head = *m_headp;
1818         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1819         hdrlen = ehdrlen = poff = 0;
1820
1821         /*
1822          * Intel recommends entire IP/TCP header length reside in a single
1823          * buffer. If multiple descriptors are used to describe the IP and
1824          * TCP header, each descriptor should describe one or more
1825          * complete headers; descriptors referencing only parts of headers
1826          * are not supported. If all layer headers are not coalesced into
1827          * a single buffer, each buffer should not cross a 4KB boundary,
1828          * or be larger than the maximum read request size.
1829          * Controller also requires modifing IP/TCP header to make TSO work
1830          * so we firstly get a writable mbuf chain then coalesce ethernet/
1831          * IP/TCP header into a single buffer to meet the requirement of
1832          * controller. This also simplifies IP/TCP/UDP checksum offloading
1833          * which also has similiar restrictions.
1834          */
1835         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1836                 if (do_tso || (m_head->m_next != NULL && 
1837                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1838                         if (M_WRITABLE(*m_headp) == 0) {
1839                                 m_head = m_dup(*m_headp, M_DONTWAIT);
1840                                 m_freem(*m_headp);
1841                                 if (m_head == NULL) {
1842                                         *m_headp = NULL;
1843                                         return (ENOBUFS);
1844                                 }
1845                                 *m_headp = m_head;
1846                         }
1847                 }
1848                 /*
1849                  * Assume IPv4, we don't have TSO/checksum offload support
1850                  * for IPv6 yet.
1851                  */
1852                 ehdrlen = sizeof(struct ether_header);
1853                 m_head = m_pullup(m_head, ehdrlen);
1854                 if (m_head == NULL) {
1855                         *m_headp = NULL;
1856                         return (ENOBUFS);
1857                 }
1858                 eh = mtod(m_head, struct ether_vlan_header *);
1859                 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1860                         ehdrlen = sizeof(struct ether_vlan_header);
1861                         m_head = m_pullup(m_head, ehdrlen);
1862                         if (m_head == NULL) {
1863                                 *m_headp = NULL;
1864                                 return (ENOBUFS);
1865                         }
1866                 }
1867                 m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip));
1868                 if (m_head == NULL) {
1869                         *m_headp = NULL;
1870                         return (ENOBUFS);
1871                 }
1872                 ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1873                 poff = ehdrlen + (ip->ip_hl << 2);
1874                 if (do_tso) {
1875                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1876                         if (m_head == NULL) {
1877                                 *m_headp = NULL;
1878                                 return (ENOBUFS);
1879                         }
1880                         /*
1881                          * The pseudo TCP checksum does not include TCP payload
1882                          * length so driver should recompute the checksum here
1883                          * what hardware expect to see. This is adherence of
1884                          * Microsoft's Large Send specification.
1885                          */
1886                         th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1887                         th->th_sum = in_pseudo(ip->ip_src.s_addr,
1888                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1889                         /* Keep track of the full header length */
1890                         hdrlen = poff + (th->th_off << 2);
1891                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1892                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1893                         if (m_head == NULL) {
1894                                 *m_headp = NULL;
1895                                 return (ENOBUFS);
1896                         }
1897                         th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1898                         m_head = m_pullup(m_head, poff + (th->th_off << 2));
1899                         if (m_head == NULL) {
1900                                 *m_headp = NULL;
1901                                 return (ENOBUFS);
1902                         }
1903                         ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1904                         th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1905                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1906                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1907                         if (m_head == NULL) {
1908                                 *m_headp = NULL;
1909                                 return (ENOBUFS);
1910                         }
1911                         ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1912                 }
1913                 *m_headp = m_head;
1914         }
1915
1916         /*
1917          * Map the packet for DMA
1918          *
1919          * Capture the first descriptor index,
1920          * this descriptor will have the index
1921          * of the EOP which is the only one that
1922          * now gets a DONE bit writeback.
1923          */
1924         first = txr->next_avail_desc;
1925         tx_buffer = &txr->tx_buffers[first];
1926         tx_buffer_mapped = tx_buffer;
1927         map = tx_buffer->map;
1928
1929         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1930             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1931
1932         /*
1933          * There are two types of errors we can (try) to handle:
1934          * - EFBIG means the mbuf chain was too long and bus_dma ran
1935          *   out of segments.  Defragment the mbuf chain and try again.
1936          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1937          *   at this point in time.  Defer sending and try again later.
1938          * All other errors, in particular EINVAL, are fatal and prevent the
1939          * mbuf chain from ever going through.  Drop it and report error.
1940          */
1941         if (error == EFBIG && remap) {
1942                 struct mbuf *m;
1943
1944                 m = m_defrag(*m_headp, M_DONTWAIT);
1945                 if (m == NULL) {
1946                         adapter->mbuf_defrag_failed++;
1947                         m_freem(*m_headp);
1948                         *m_headp = NULL;
1949                         return (ENOBUFS);
1950                 }
1951                 *m_headp = m;
1952
1953                 /* Try it again, but only once */
1954                 remap = 0;
1955                 goto retry;
1956         } else if (error == ENOMEM) {
1957                 adapter->no_tx_dma_setup++;
1958                 return (error);
1959         } else if (error != 0) {
1960                 adapter->no_tx_dma_setup++;
1961                 m_freem(*m_headp);
1962                 *m_headp = NULL;
1963                 return (error);
1964         }
1965
1966         /*
1967         ** Make sure we don't overrun the ring,
1968         ** we need nsegs descriptors and one for
1969         ** the context descriptor used for the
1970         ** offloads.
1971         */
1972         if ((nsegs + 1) > (txr->tx_avail - 2)) {
1973                 txr->no_desc_avail++;
1974                 bus_dmamap_unload(txr->txtag, map);
1975                 return (ENOBUFS);
1976         }
1977         m_head = *m_headp;
1978
1979         /* Do hardware assists:
1980          * Set up the context descriptor, used
1981          * when any hardware offload is done.
1982          * This includes CSUM, VLAN, and TSO.
1983          * It will use the first descriptor.
1984          */
1985
1986         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1987                 if (igb_tso_setup(txr, m_head, ehdrlen, ip, th)) {
1988                         cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1989                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1990                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1991                 } else
1992                         return (ENXIO);
1993         } else if (igb_tx_ctx_setup(txr, m_head))
1994                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1995
1996         /* Calculate payload length */
1997         olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1998             << E1000_ADVTXD_PAYLEN_SHIFT);
1999
2000         /* 82575 needs the queue index added */
2001         if (adapter->hw.mac.type == e1000_82575)
2002                 olinfo_status |= txr->me << 4;
2003
2004         /* Set up our transmit descriptors */
2005         i = txr->next_avail_desc;
2006         for (int j = 0; j < nsegs; j++) {
2007                 bus_size_t seg_len;
2008                 bus_addr_t seg_addr;
2009
2010                 tx_buffer = &txr->tx_buffers[i];
2011                 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
2012                 seg_addr = segs[j].ds_addr;
2013                 seg_len  = segs[j].ds_len;
2014
2015                 txd->read.buffer_addr = htole64(seg_addr);
2016                 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
2017                 txd->read.olinfo_status = htole32(olinfo_status);
2018                 last = i;
2019                 if (++i == adapter->num_tx_desc)
2020                         i = 0;
2021                 tx_buffer->m_head = NULL;
2022                 tx_buffer->next_eop = -1;
2023         }
2024
2025         txr->next_avail_desc = i;
2026         txr->tx_avail -= nsegs;
2027         tx_buffer->m_head = m_head;
2028
2029         /*
2030         ** Here we swap the map so the last descriptor,
2031         ** which gets the completion interrupt has the
2032         ** real map, and the first descriptor gets the
2033         ** unused map from this descriptor.
2034         */
2035         tx_buffer_mapped->map = tx_buffer->map;
2036         tx_buffer->map = map;
2037         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2038
2039         /*
2040          * Last Descriptor of Packet
2041          * needs End Of Packet (EOP)
2042          * and Report Status (RS)
2043          */
2044         txd->read.cmd_type_len |=
2045             htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
2046         /*
2047          * Keep track in the first buffer which
2048          * descriptor will be written back
2049          */
2050         tx_buffer = &txr->tx_buffers[first];
2051         tx_buffer->next_eop = last;
2052         /* Update the watchdog time early and often */
2053         txr->watchdog_time = ticks;
2054
2055         /*
2056          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2057          * that this frame is available to transmit.
2058          */
2059         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2060             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2061         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2062         ++txr->tx_packets;
2063
2064         return (0);
2065 }
2066 static void
2067 igb_set_promisc(struct adapter *adapter)
2068 {
2069         struct ifnet    *ifp = adapter->ifp;
2070         struct e1000_hw *hw = &adapter->hw;
2071         u32             reg;
2072
2073         if (adapter->vf_ifp) {
2074                 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
2075                 return;
2076         }
2077
2078         reg = E1000_READ_REG(hw, E1000_RCTL);
2079         if (ifp->if_flags & IFF_PROMISC) {
2080                 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2081                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
2082         } else if (ifp->if_flags & IFF_ALLMULTI) {
2083                 reg |= E1000_RCTL_MPE;
2084                 reg &= ~E1000_RCTL_UPE;
2085                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
2086         }
2087 }
2088
2089 static void
2090 igb_disable_promisc(struct adapter *adapter)
2091 {
2092         struct e1000_hw *hw = &adapter->hw;
2093         u32             reg;
2094
2095         if (adapter->vf_ifp) {
2096                 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2097                 return;
2098         }
2099         reg = E1000_READ_REG(hw, E1000_RCTL);
2100         reg &=  (~E1000_RCTL_UPE);
2101         reg &=  (~E1000_RCTL_MPE);
2102         E1000_WRITE_REG(hw, E1000_RCTL, reg);
2103 }
2104
2105
2106 /*********************************************************************
2107  *  Multicast Update
2108  *
2109  *  This routine is called whenever multicast address list is updated.
2110  *
2111  **********************************************************************/
2112
2113 static void
2114 igb_set_multi(struct adapter *adapter)
2115 {
2116         struct ifnet    *ifp = adapter->ifp;
2117         struct ifmultiaddr *ifma;
2118         u32 reg_rctl = 0;
2119         u8  *mta;
2120
2121         int mcnt = 0;
2122
2123         IOCTL_DEBUGOUT("igb_set_multi: begin");
2124
2125         mta = adapter->mta;
2126         bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2127             MAX_NUM_MULTICAST_ADDRESSES);
2128
2129 #if __FreeBSD_version < 800000
2130         IF_ADDR_LOCK(ifp);
2131 #else
2132         if_maddr_rlock(ifp);
2133 #endif
2134         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2135                 if (ifma->ifma_addr->sa_family != AF_LINK)
2136                         continue;
2137
2138                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2139                         break;
2140
2141                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2142                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2143                 mcnt++;
2144         }
2145 #if __FreeBSD_version < 800000
2146         IF_ADDR_UNLOCK(ifp);
2147 #else
2148         if_maddr_runlock(ifp);
2149 #endif
2150
2151         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2152                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2153                 reg_rctl |= E1000_RCTL_MPE;
2154                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2155         } else
2156                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2157 }
2158
2159
2160 /*********************************************************************
2161  *  Timer routine:
2162  *      This routine checks for link status,
2163  *      updates statistics, and does the watchdog.
2164  *
2165  **********************************************************************/
2166
2167 static void
2168 igb_local_timer(void *arg)
2169 {
2170         struct adapter          *adapter = arg;
2171         device_t                dev = adapter->dev;
2172         struct ifnet            *ifp = adapter->ifp;
2173         struct tx_ring          *txr = adapter->tx_rings;
2174         struct igb_queue        *que = adapter->queues;
2175         int                     hung = 0, busy = 0;
2176
2177
2178         IGB_CORE_LOCK_ASSERT(adapter);
2179
2180         igb_update_link_status(adapter);
2181         igb_update_stats_counters(adapter);
2182
2183         /*
2184         ** Check the TX queues status
2185         **      - central locked handling of OACTIVE
2186         **      - watchdog only if all queues show hung
2187         */
2188         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2189                 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2190                     (adapter->pause_frames == 0))
2191                         ++hung;
2192                 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2193                         ++busy;
2194                 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2195                         taskqueue_enqueue(que->tq, &que->que_task);
2196         }
2197         if (hung == adapter->num_queues)
2198                 goto timeout;
2199         if (busy == adapter->num_queues)
2200                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2201         else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2202             (busy < adapter->num_queues))
2203                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2204
2205         adapter->pause_frames = 0;
2206         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2207 #ifndef DEVICE_POLLING
2208         /* Schedule all queue interrupts - deadlock protection */
2209         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2210 #endif
2211         return;
2212
2213 timeout:
2214         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2215         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2216             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2217             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2218         device_printf(dev,"TX(%d) desc avail = %d,"
2219             "Next TX to Clean = %d\n",
2220             txr->me, txr->tx_avail, txr->next_to_clean);
2221         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2222         adapter->watchdog_events++;
2223         igb_init_locked(adapter);
2224 }
2225
2226 static void
2227 igb_update_link_status(struct adapter *adapter)
2228 {
2229         struct e1000_hw *hw = &adapter->hw;
2230         struct ifnet *ifp = adapter->ifp;
2231         device_t dev = adapter->dev;
2232         struct tx_ring *txr = adapter->tx_rings;
2233         u32 link_check, thstat, ctrl;
2234
2235         link_check = thstat = ctrl = 0;
2236
2237         /* Get the cached link value or read for real */
2238         switch (hw->phy.media_type) {
2239         case e1000_media_type_copper:
2240                 if (hw->mac.get_link_status) {
2241                         /* Do the work to read phy */
2242                         e1000_check_for_link(hw);
2243                         link_check = !hw->mac.get_link_status;
2244                 } else
2245                         link_check = TRUE;
2246                 break;
2247         case e1000_media_type_fiber:
2248                 e1000_check_for_link(hw);
2249                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2250                                  E1000_STATUS_LU);
2251                 break;
2252         case e1000_media_type_internal_serdes:
2253                 e1000_check_for_link(hw);
2254                 link_check = adapter->hw.mac.serdes_has_link;
2255                 break;
2256         /* VF device is type_unknown */
2257         case e1000_media_type_unknown:
2258                 e1000_check_for_link(hw);
2259                 link_check = !hw->mac.get_link_status;
2260                 /* Fall thru */
2261         default:
2262                 break;
2263         }
2264
2265         /* Check for thermal downshift or shutdown */
2266         if (hw->mac.type == e1000_i350) {
2267                 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2268                 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2269         }
2270
2271         /* Now we check if a transition has happened */
2272         if (link_check && (adapter->link_active == 0)) {
2273                 e1000_get_speed_and_duplex(&adapter->hw, 
2274                     &adapter->link_speed, &adapter->link_duplex);
2275                 if (bootverbose)
2276                         device_printf(dev, "Link is up %d Mbps %s\n",
2277                             adapter->link_speed,
2278                             ((adapter->link_duplex == FULL_DUPLEX) ?
2279                             "Full Duplex" : "Half Duplex"));
2280                 adapter->link_active = 1;
2281                 ifp->if_baudrate = adapter->link_speed * 1000000;
2282                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2283                     (thstat & E1000_THSTAT_LINK_THROTTLE))
2284                         device_printf(dev, "Link: thermal downshift\n");
2285                 /* This can sleep */
2286                 if_link_state_change(ifp, LINK_STATE_UP);
2287         } else if (!link_check && (adapter->link_active == 1)) {
2288                 ifp->if_baudrate = adapter->link_speed = 0;
2289                 adapter->link_duplex = 0;
2290                 if (bootverbose)
2291                         device_printf(dev, "Link is Down\n");
2292                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2293                     (thstat & E1000_THSTAT_PWR_DOWN))
2294                         device_printf(dev, "Link: thermal shutdown\n");
2295                 adapter->link_active = 0;
2296                 /* This can sleep */
2297                 if_link_state_change(ifp, LINK_STATE_DOWN);
2298                 /* Reset queue state */
2299                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2300                         txr->queue_status = IGB_QUEUE_IDLE;
2301         }
2302 }
2303
2304 /*********************************************************************
2305  *
2306  *  This routine disables all traffic on the adapter by issuing a
2307  *  global reset on the MAC and deallocates TX/RX buffers.
2308  *
2309  **********************************************************************/
2310
2311 static void
2312 igb_stop(void *arg)
2313 {
2314         struct adapter  *adapter = arg;
2315         struct ifnet    *ifp = adapter->ifp;
2316         struct tx_ring *txr = adapter->tx_rings;
2317
2318         IGB_CORE_LOCK_ASSERT(adapter);
2319
2320         INIT_DEBUGOUT("igb_stop: begin");
2321
2322         igb_disable_intr(adapter);
2323
2324         callout_stop(&adapter->timer);
2325
2326         /* Tell the stack that the interface is no longer active */
2327         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2328         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2329
2330         /* Disarm watchdog timer. */
2331         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2332                 IGB_TX_LOCK(txr);
2333                 txr->queue_status = IGB_QUEUE_IDLE;
2334                 IGB_TX_UNLOCK(txr);
2335         }
2336
2337         e1000_reset_hw(&adapter->hw);
2338         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2339
2340         e1000_led_off(&adapter->hw);
2341         e1000_cleanup_led(&adapter->hw);
2342 }
2343
2344
2345 /*********************************************************************
2346  *
2347  *  Determine hardware revision.
2348  *
2349  **********************************************************************/
2350 static void
2351 igb_identify_hardware(struct adapter *adapter)
2352 {
2353         device_t dev = adapter->dev;
2354
2355         /* Make sure our PCI config space has the necessary stuff set */
2356         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2357         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2358             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2359                 INIT_DEBUGOUT("Memory Access and/or Bus Master "
2360                     "bits were not set!\n");
2361                 adapter->hw.bus.pci_cmd_word |=
2362                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2363                 pci_write_config(dev, PCIR_COMMAND,
2364                     adapter->hw.bus.pci_cmd_word, 2);
2365         }
2366
2367         /* Save off the information about this board */
2368         adapter->hw.vendor_id = pci_get_vendor(dev);
2369         adapter->hw.device_id = pci_get_device(dev);
2370         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2371         adapter->hw.subsystem_vendor_id =
2372             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2373         adapter->hw.subsystem_device_id =
2374             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2375
2376         /* Set MAC type early for PCI setup */
2377         e1000_set_mac_type(&adapter->hw);
2378
2379         /* Are we a VF device? */
2380         if ((adapter->hw.mac.type == e1000_vfadapt) ||
2381             (adapter->hw.mac.type == e1000_vfadapt_i350))
2382                 adapter->vf_ifp = 1;
2383         else
2384                 adapter->vf_ifp = 0;
2385 }
2386
2387 static int
2388 igb_allocate_pci_resources(struct adapter *adapter)
2389 {
2390         device_t        dev = adapter->dev;
2391         int             rid;
2392
2393         rid = PCIR_BAR(0);
2394         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2395             &rid, RF_ACTIVE);
2396         if (adapter->pci_mem == NULL) {
2397                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2398                 return (ENXIO);
2399         }
2400         adapter->osdep.mem_bus_space_tag =
2401             rman_get_bustag(adapter->pci_mem);
2402         adapter->osdep.mem_bus_space_handle =
2403             rman_get_bushandle(adapter->pci_mem);
2404         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2405
2406         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2407
2408         /* This will setup either MSI/X or MSI */
2409         adapter->msix = igb_setup_msix(adapter);
2410         adapter->hw.back = &adapter->osdep;
2411
2412         return (0);
2413 }
2414
2415 /*********************************************************************
2416  *
2417  *  Setup the Legacy or MSI Interrupt handler
2418  *
2419  **********************************************************************/
2420 static int
2421 igb_allocate_legacy(struct adapter *adapter)
2422 {
2423         device_t                dev = adapter->dev;
2424         struct igb_queue        *que = adapter->queues;
2425         struct tx_ring          *txr = adapter->tx_rings;
2426         int                     error, rid = 0;
2427
2428         /* Turn off all interrupts */
2429         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2430
2431         /* MSI RID is 1 */
2432         if (adapter->msix == 1)
2433                 rid = 1;
2434
2435         /* We allocate a single interrupt resource */
2436         adapter->res = bus_alloc_resource_any(dev,
2437             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2438         if (adapter->res == NULL) {
2439                 device_printf(dev, "Unable to allocate bus resource: "
2440                     "interrupt\n");
2441                 return (ENXIO);
2442         }
2443
2444 #if __FreeBSD_version >= 800000
2445         TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2446 #endif
2447
2448         /*
2449          * Try allocating a fast interrupt and the associated deferred
2450          * processing contexts.
2451          */
2452         TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2453         /* Make tasklet for deferred link handling */
2454         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2455         que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2456             taskqueue_thread_enqueue, &que->tq);
2457         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2458             device_get_nameunit(adapter->dev));
2459         if ((error = bus_setup_intr(dev, adapter->res,
2460             INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2461             adapter, &adapter->tag)) != 0) {
2462                 device_printf(dev, "Failed to register fast interrupt "
2463                             "handler: %d\n", error);
2464                 taskqueue_free(que->tq);
2465                 que->tq = NULL;
2466                 return (error);
2467         }
2468
2469         return (0);
2470 }
2471
2472
2473 /*********************************************************************
2474  *
2475  *  Setup the MSIX Queue Interrupt handlers: 
2476  *
2477  **********************************************************************/
2478 static int
2479 igb_allocate_msix(struct adapter *adapter)
2480 {
2481         device_t                dev = adapter->dev;
2482         struct igb_queue        *que = adapter->queues;
2483         int                     error, rid, vector = 0;
2484
2485         /* Be sure to start with all interrupts disabled */
2486         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2487         E1000_WRITE_FLUSH(&adapter->hw);
2488
2489         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2490                 rid = vector +1;
2491                 que->res = bus_alloc_resource_any(dev,
2492                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2493                 if (que->res == NULL) {
2494                         device_printf(dev,
2495                             "Unable to allocate bus resource: "
2496                             "MSIX Queue Interrupt\n");
2497                         return (ENXIO);
2498                 }
2499                 error = bus_setup_intr(dev, que->res,
2500                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2501                     igb_msix_que, que, &que->tag);
2502                 if (error) {
2503                         que->res = NULL;
2504                         device_printf(dev, "Failed to register Queue handler");
2505                         return (error);
2506                 }
2507 #if __FreeBSD_version >= 800504
2508                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2509 #endif
2510                 que->msix = vector;
2511                 if (adapter->hw.mac.type == e1000_82575)
2512                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2513                 else
2514                         que->eims = 1 << vector;
2515                 /*
2516                 ** Bind the msix vector, and thus the
2517                 ** rings to the corresponding cpu.
2518                 */
2519                 if (adapter->num_queues > 1) {
2520                         if (igb_last_bind_cpu < 0)
2521                                 igb_last_bind_cpu = CPU_FIRST();
2522                         bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2523                         device_printf(dev,
2524                                 "Bound queue %d to cpu %d\n",
2525                                 i,igb_last_bind_cpu);
2526                         igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2527                         igb_last_bind_cpu = igb_last_bind_cpu % mp_ncpus;
2528                 }
2529 #if __FreeBSD_version >= 800000
2530                 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2531                     que->txr);
2532 #endif
2533                 /* Make tasklet for deferred handling */
2534                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2535                 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2536                     taskqueue_thread_enqueue, &que->tq);
2537                 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2538                     device_get_nameunit(adapter->dev));
2539         }
2540
2541         /* And Link */
2542         rid = vector + 1;
2543         adapter->res = bus_alloc_resource_any(dev,
2544             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2545         if (adapter->res == NULL) {
2546                 device_printf(dev,
2547                     "Unable to allocate bus resource: "
2548                     "MSIX Link Interrupt\n");
2549                 return (ENXIO);
2550         }
2551         if ((error = bus_setup_intr(dev, adapter->res,
2552             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2553             igb_msix_link, adapter, &adapter->tag)) != 0) {
2554                 device_printf(dev, "Failed to register Link handler");
2555                 return (error);
2556         }
2557 #if __FreeBSD_version >= 800504
2558         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2559 #endif
2560         adapter->linkvec = vector;
2561
2562         return (0);
2563 }
2564
2565
2566 static void
2567 igb_configure_queues(struct adapter *adapter)
2568 {
2569         struct  e1000_hw        *hw = &adapter->hw;
2570         struct  igb_queue       *que;
2571         u32                     tmp, ivar = 0, newitr = 0;
2572
2573         /* First turn on RSS capability */
2574         if (adapter->hw.mac.type != e1000_82575)
2575                 E1000_WRITE_REG(hw, E1000_GPIE,
2576                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2577                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2578
2579         /* Turn on MSIX */
2580         switch (adapter->hw.mac.type) {
2581         case e1000_82580:
2582         case e1000_i350:
2583         case e1000_i210:
2584         case e1000_i211:
2585         case e1000_vfadapt:
2586         case e1000_vfadapt_i350:
2587                 /* RX entries */
2588                 for (int i = 0; i < adapter->num_queues; i++) {
2589                         u32 index = i >> 1;
2590                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2591                         que = &adapter->queues[i];
2592                         if (i & 1) {
2593                                 ivar &= 0xFF00FFFF;
2594                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2595                         } else {
2596                                 ivar &= 0xFFFFFF00;
2597                                 ivar |= que->msix | E1000_IVAR_VALID;
2598                         }
2599                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2600                 }
2601                 /* TX entries */
2602                 for (int i = 0; i < adapter->num_queues; i++) {
2603                         u32 index = i >> 1;
2604                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2605                         que = &adapter->queues[i];
2606                         if (i & 1) {
2607                                 ivar &= 0x00FFFFFF;
2608                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2609                         } else {
2610                                 ivar &= 0xFFFF00FF;
2611                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2612                         }
2613                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2614                         adapter->que_mask |= que->eims;
2615                 }
2616
2617                 /* And for the link interrupt */
2618                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2619                 adapter->link_mask = 1 << adapter->linkvec;
2620                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2621                 break;
2622         case e1000_82576:
2623                 /* RX entries */
2624                 for (int i = 0; i < adapter->num_queues; i++) {
2625                         u32 index = i & 0x7; /* Each IVAR has two entries */
2626                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2627                         que = &adapter->queues[i];
2628                         if (i < 8) {
2629                                 ivar &= 0xFFFFFF00;
2630                                 ivar |= que->msix | E1000_IVAR_VALID;
2631                         } else {
2632                                 ivar &= 0xFF00FFFF;
2633                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2634                         }
2635                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2636                         adapter->que_mask |= que->eims;
2637                 }
2638                 /* TX entries */
2639                 for (int i = 0; i < adapter->num_queues; i++) {
2640                         u32 index = i & 0x7; /* Each IVAR has two entries */
2641                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2642                         que = &adapter->queues[i];
2643                         if (i < 8) {
2644                                 ivar &= 0xFFFF00FF;
2645                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2646                         } else {
2647                                 ivar &= 0x00FFFFFF;
2648                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2649                         }
2650                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2651                         adapter->que_mask |= que->eims;
2652                 }
2653
2654                 /* And for the link interrupt */
2655                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2656                 adapter->link_mask = 1 << adapter->linkvec;
2657                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2658                 break;
2659
2660         case e1000_82575:
2661                 /* enable MSI-X support*/
2662                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2663                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2664                 /* Auto-Mask interrupts upon ICR read. */
2665                 tmp |= E1000_CTRL_EXT_EIAME;
2666                 tmp |= E1000_CTRL_EXT_IRCA;
2667                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2668
2669                 /* Queues */
2670                 for (int i = 0; i < adapter->num_queues; i++) {
2671                         que = &adapter->queues[i];
2672                         tmp = E1000_EICR_RX_QUEUE0 << i;
2673                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2674                         que->eims = tmp;
2675                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2676                             i, que->eims);
2677                         adapter->que_mask |= que->eims;
2678                 }
2679
2680                 /* Link */
2681                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2682                     E1000_EIMS_OTHER);
2683                 adapter->link_mask |= E1000_EIMS_OTHER;
2684         default:
2685                 break;
2686         }
2687
2688         /* Set the starting interrupt rate */
2689         if (igb_max_interrupt_rate > 0)
2690                 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2691
2692         if (hw->mac.type == e1000_82575)
2693                 newitr |= newitr << 16;
2694         else
2695                 newitr |= E1000_EITR_CNT_IGNR;
2696
2697         for (int i = 0; i < adapter->num_queues; i++) {
2698                 que = &adapter->queues[i];
2699                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2700         }
2701
2702         return;
2703 }
2704
2705
2706 static void
2707 igb_free_pci_resources(struct adapter *adapter)
2708 {
2709         struct          igb_queue *que = adapter->queues;
2710         device_t        dev = adapter->dev;
2711         int             rid;
2712
2713         /*
2714         ** There is a slight possibility of a failure mode
2715         ** in attach that will result in entering this function
2716         ** before interrupt resources have been initialized, and
2717         ** in that case we do not want to execute the loops below
2718         ** We can detect this reliably by the state of the adapter
2719         ** res pointer.
2720         */
2721         if (adapter->res == NULL)
2722                 goto mem;
2723
2724         /*
2725          * First release all the interrupt resources:
2726          */
2727         for (int i = 0; i < adapter->num_queues; i++, que++) {
2728                 rid = que->msix + 1;
2729                 if (que->tag != NULL) {
2730                         bus_teardown_intr(dev, que->res, que->tag);
2731                         que->tag = NULL;
2732                 }
2733                 if (que->res != NULL)
2734                         bus_release_resource(dev,
2735                             SYS_RES_IRQ, rid, que->res);
2736         }
2737
2738         /* Clean the Legacy or Link interrupt last */
2739         if (adapter->linkvec) /* we are doing MSIX */
2740                 rid = adapter->linkvec + 1;
2741         else
2742                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2743
2744         que = adapter->queues;
2745         if (adapter->tag != NULL) {
2746                 taskqueue_drain(que->tq, &adapter->link_task);
2747                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2748                 adapter->tag = NULL;
2749         }
2750         if (adapter->res != NULL)
2751                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2752
2753         for (int i = 0; i < adapter->num_queues; i++, que++) {
2754                 if (que->tq != NULL) {
2755 #if __FreeBSD_version >= 800000
2756                         taskqueue_drain(que->tq, &que->txr->txq_task);
2757 #endif
2758                         taskqueue_drain(que->tq, &que->que_task);
2759                         taskqueue_free(que->tq);
2760                 }
2761         }
2762 mem:
2763         if (adapter->msix)
2764                 pci_release_msi(dev);
2765
2766         if (adapter->msix_mem != NULL)
2767                 bus_release_resource(dev, SYS_RES_MEMORY,
2768                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2769
2770         if (adapter->pci_mem != NULL)
2771                 bus_release_resource(dev, SYS_RES_MEMORY,
2772                     PCIR_BAR(0), adapter->pci_mem);
2773
2774 }
2775
2776 /*
2777  * Setup Either MSI/X or MSI
2778  */
2779 static int
2780 igb_setup_msix(struct adapter *adapter)
2781 {
2782         device_t dev = adapter->dev;
2783         int rid, want, queues, msgs, maxqueues;
2784
2785         /* tuneable override */
2786         if (igb_enable_msix == 0)
2787                 goto msi;
2788
2789         /* First try MSI/X */
2790         rid = PCIR_BAR(IGB_MSIX_BAR);
2791         adapter->msix_mem = bus_alloc_resource_any(dev,
2792             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2793         if (!adapter->msix_mem) {
2794                 /* May not be enabled */
2795                 device_printf(adapter->dev,
2796                     "Unable to map MSIX table \n");
2797                 goto msi;
2798         }
2799
2800         msgs = pci_msix_count(dev); 
2801         if (msgs == 0) { /* system has msix disabled */
2802                 bus_release_resource(dev, SYS_RES_MEMORY,
2803                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2804                 adapter->msix_mem = NULL;
2805                 goto msi;
2806         }
2807
2808         /* Figure out a reasonable auto config value */
2809         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2810
2811         /* Manual override */
2812         if (igb_num_queues != 0)
2813                 queues = igb_num_queues;
2814
2815         /* Sanity check based on HW */
2816         switch (adapter->hw.mac.type) {
2817                 case e1000_82575:
2818                         maxqueues = 4;
2819                         break;
2820                 case e1000_82576:
2821                 case e1000_82580:
2822                 case e1000_i350:
2823                         maxqueues = 8;
2824                         break;
2825                 case e1000_i210:
2826                         maxqueues = 4;
2827                         break;
2828                 case e1000_i211:
2829                         maxqueues = 2;
2830                         break;
2831                 default:  /* VF interfaces */
2832                         maxqueues = 1;
2833                         break;
2834         }
2835         if (queues > maxqueues)
2836                 queues = maxqueues;
2837
2838         /*
2839         ** One vector (RX/TX pair) per queue
2840         ** plus an additional for Link interrupt
2841         */
2842         want = queues + 1;
2843         if (msgs >= want)
2844                 msgs = want;
2845         else {
2846                 device_printf(adapter->dev,
2847                     "MSIX Configuration Problem, "
2848                     "%d vectors configured, but %d queues wanted!\n",
2849                     msgs, want);
2850                 return (0);
2851         }
2852         if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2853                 device_printf(adapter->dev,
2854                     "Using MSIX interrupts with %d vectors\n", msgs);
2855                 adapter->num_queues = queues;
2856                 return (msgs);
2857         }
2858 msi:
2859         msgs = pci_msi_count(dev);
2860         if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0) {
2861                 device_printf(adapter->dev," Using MSI interrupt\n");
2862                 return (msgs);
2863         }
2864         return (0);
2865 }
2866
2867 /*********************************************************************
2868  *
2869  *  Set up an fresh starting state
2870  *
2871  **********************************************************************/
2872 static void
2873 igb_reset(struct adapter *adapter)
2874 {
2875         device_t        dev = adapter->dev;
2876         struct e1000_hw *hw = &adapter->hw;
2877         struct e1000_fc_info *fc = &hw->fc;
2878         struct ifnet    *ifp = adapter->ifp;
2879         u32             pba = 0;
2880         u16             hwm;
2881
2882         INIT_DEBUGOUT("igb_reset: begin");
2883
2884         /* Let the firmware know the OS is in control */
2885         igb_get_hw_control(adapter);
2886
2887         /*
2888          * Packet Buffer Allocation (PBA)
2889          * Writing PBA sets the receive portion of the buffer
2890          * the remainder is used for the transmit buffer.
2891          */
2892         switch (hw->mac.type) {
2893         case e1000_82575:
2894                 pba = E1000_PBA_32K;
2895                 break;
2896         case e1000_82576:
2897         case e1000_vfadapt:
2898                 pba = E1000_READ_REG(hw, E1000_RXPBS);
2899                 pba &= E1000_RXPBS_SIZE_MASK_82576;
2900                 break;
2901         case e1000_82580:
2902         case e1000_i350:
2903         case e1000_vfadapt_i350:
2904                 pba = E1000_READ_REG(hw, E1000_RXPBS);
2905                 pba = e1000_rxpbs_adjust_82580(pba);
2906                 break;
2907         case e1000_i210:
2908         case e1000_i211:
2909                 pba = E1000_PBA_34K;
2910         default:
2911                 break;
2912         }
2913
2914         /* Special needs in case of Jumbo frames */
2915         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2916                 u32 tx_space, min_tx, min_rx;
2917                 pba = E1000_READ_REG(hw, E1000_PBA);
2918                 tx_space = pba >> 16;
2919                 pba &= 0xffff;
2920                 min_tx = (adapter->max_frame_size +
2921                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2922                 min_tx = roundup2(min_tx, 1024);
2923                 min_tx >>= 10;
2924                 min_rx = adapter->max_frame_size;
2925                 min_rx = roundup2(min_rx, 1024);
2926                 min_rx >>= 10;
2927                 if (tx_space < min_tx &&
2928                     ((min_tx - tx_space) < pba)) {
2929                         pba = pba - (min_tx - tx_space);
2930                         /*
2931                          * if short on rx space, rx wins
2932                          * and must trump tx adjustment
2933                          */
2934                         if (pba < min_rx)
2935                                 pba = min_rx;
2936                 }
2937                 E1000_WRITE_REG(hw, E1000_PBA, pba);
2938         }
2939
2940         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2941
2942         /*
2943          * These parameters control the automatic generation (Tx) and
2944          * response (Rx) to Ethernet PAUSE frames.
2945          * - High water mark should allow for at least two frames to be
2946          *   received after sending an XOFF.
2947          * - Low water mark works best when it is very near the high water mark.
2948          *   This allows the receiver to restart by sending XON when it has
2949          *   drained a bit.
2950          */
2951         hwm = min(((pba << 10) * 9 / 10),
2952             ((pba << 10) - 2 * adapter->max_frame_size));
2953
2954         if (hw->mac.type < e1000_82576) {
2955                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2956                 fc->low_water = fc->high_water - 8;
2957         } else {
2958                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2959                 fc->low_water = fc->high_water - 16;
2960         }
2961
2962         fc->pause_time = IGB_FC_PAUSE_TIME;
2963         fc->send_xon = TRUE;
2964         if (adapter->fc)
2965                 fc->requested_mode = adapter->fc;
2966         else
2967                 fc->requested_mode = e1000_fc_default;
2968
2969         /* Issue a global reset */
2970         e1000_reset_hw(hw);
2971         E1000_WRITE_REG(hw, E1000_WUC, 0);
2972
2973         if (e1000_init_hw(hw) < 0)
2974                 device_printf(dev, "Hardware Initialization Failed\n");
2975
2976         /* Setup DMA Coalescing */
2977         if ((hw->mac.type > e1000_82580) &&
2978             (hw->mac.type != e1000_i211)) {
2979                 u32 dmac;
2980                 u32 reg = ~E1000_DMACR_DMAC_EN;
2981
2982                 if (adapter->dmac == 0) { /* Disabling it */
2983                         E1000_WRITE_REG(hw, E1000_DMACR, reg);
2984                         goto reset_out;
2985                 }
2986
2987                 /* Set starting thresholds */
2988                 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2989                 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2990
2991                 hwm = 64 * pba - adapter->max_frame_size / 16;
2992                 if (hwm < 64 * (pba - 6))
2993                         hwm = 64 * (pba - 6);
2994                 reg = E1000_READ_REG(hw, E1000_FCRTC);
2995                 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2996                 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2997                     & E1000_FCRTC_RTH_COAL_MASK);
2998                 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2999
3000
3001                 dmac = pba - adapter->max_frame_size / 512;
3002                 if (dmac < pba - 10)
3003                         dmac = pba - 10;
3004                 reg = E1000_READ_REG(hw, E1000_DMACR);
3005                 reg &= ~E1000_DMACR_DMACTHR_MASK;
3006                 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
3007                     & E1000_DMACR_DMACTHR_MASK);
3008                 /* transition to L0x or L1 if available..*/
3009                 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
3010                 /* timer = value in adapter->dmac in 32usec intervals */
3011                 reg |= (adapter->dmac >> 5);
3012                 E1000_WRITE_REG(hw, E1000_DMACR, reg);
3013
3014                 /* Set the interval before transition */
3015                 reg = E1000_READ_REG(hw, E1000_DMCTLX);
3016                 reg |= 0x80000004;
3017                 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3018
3019                 /* free space in tx packet buffer to wake from DMA coal */
3020                 E1000_WRITE_REG(hw, E1000_DMCTXTH,
3021                     (20480 - (2 * adapter->max_frame_size)) >> 6);
3022
3023                 /* make low power state decision controlled by DMA coal */
3024                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3025                 reg &= ~E1000_PCIEMISC_LX_DECISION;
3026                 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3027                 device_printf(dev, "DMA Coalescing enabled\n");
3028
3029         } else if (hw->mac.type == e1000_82580) {
3030                 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3031                 E1000_WRITE_REG(hw, E1000_DMACR, 0);
3032                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
3033                     reg & ~E1000_PCIEMISC_LX_DECISION);
3034         }
3035
3036 reset_out:
3037         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3038         e1000_get_phy_info(hw);
3039         e1000_check_for_link(hw);
3040         return;
3041 }
3042
3043 /*********************************************************************
3044  *
3045  *  Setup networking device structure and register an interface.
3046  *
3047  **********************************************************************/
3048 static int
3049 igb_setup_interface(device_t dev, struct adapter *adapter)
3050 {
3051         struct ifnet   *ifp;
3052
3053         INIT_DEBUGOUT("igb_setup_interface: begin");
3054
3055         ifp = adapter->ifp = if_alloc(IFT_ETHER);
3056         if (ifp == NULL) {
3057                 device_printf(dev, "can not allocate ifnet structure\n");
3058                 return (-1);
3059         }
3060         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3061         ifp->if_init =  igb_init;
3062         ifp->if_softc = adapter;
3063         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3064         ifp->if_ioctl = igb_ioctl;
3065 #if __FreeBSD_version >= 800000
3066         ifp->if_transmit = igb_mq_start;
3067         ifp->if_qflush = igb_qflush;
3068 #else
3069         ifp->if_start = igb_start;
3070         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3071         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3072         IFQ_SET_READY(&ifp->if_snd);
3073 #endif
3074
3075         ether_ifattach(ifp, adapter->hw.mac.addr);
3076
3077         ifp->if_capabilities = ifp->if_capenable = 0;
3078
3079         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3080         ifp->if_capabilities |= IFCAP_TSO4;
3081         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3082         ifp->if_capenable = ifp->if_capabilities;
3083
3084         /* Don't enable LRO by default */
3085         ifp->if_capabilities |= IFCAP_LRO;
3086
3087 #ifdef DEVICE_POLLING
3088         ifp->if_capabilities |= IFCAP_POLLING;
3089 #endif
3090
3091         /*
3092          * Tell the upper layer(s) we
3093          * support full VLAN capability.
3094          */
3095         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3096         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3097                              |  IFCAP_VLAN_HWTSO
3098                              |  IFCAP_VLAN_MTU;
3099         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3100                           |  IFCAP_VLAN_HWTSO
3101                           |  IFCAP_VLAN_MTU;
3102
3103         /*
3104         ** Don't turn this on by default, if vlans are
3105         ** created on another pseudo device (eg. lagg)
3106         ** then vlan events are not passed thru, breaking
3107         ** operation, but with HW FILTER off it works. If
3108         ** using vlans directly on the igb driver you can
3109         ** enable this and get full hardware tag filtering.
3110         */
3111         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3112
3113         /*
3114          * Specify the media types supported by this adapter and register
3115          * callbacks to update media and link information
3116          */
3117         ifmedia_init(&adapter->media, IFM_IMASK,
3118             igb_media_change, igb_media_status);
3119         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3120             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3121                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
3122                             0, NULL);
3123                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3124         } else {
3125                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3126                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3127                             0, NULL);
3128                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3129                             0, NULL);
3130                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3131                             0, NULL);
3132                 if (adapter->hw.phy.type != e1000_phy_ife) {
3133                         ifmedia_add(&adapter->media,
3134                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3135                         ifmedia_add(&adapter->media,
3136                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3137                 }
3138         }
3139         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3140         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3141         return (0);
3142 }
3143
3144
3145 /*
3146  * Manage DMA'able memory.
3147  */
3148 static void
3149 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3150 {
3151         if (error)
3152                 return;
3153         *(bus_addr_t *) arg = segs[0].ds_addr;
3154 }
3155
3156 static int
3157 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3158         struct igb_dma_alloc *dma, int mapflags)
3159 {
3160         int error;
3161
3162         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3163                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
3164                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3165                                 BUS_SPACE_MAXADDR,      /* highaddr */
3166                                 NULL, NULL,             /* filter, filterarg */
3167                                 size,                   /* maxsize */
3168                                 1,                      /* nsegments */
3169                                 size,                   /* maxsegsize */
3170                                 0,                      /* flags */
3171                                 NULL,                   /* lockfunc */
3172                                 NULL,                   /* lockarg */
3173                                 &dma->dma_tag);
3174         if (error) {
3175                 device_printf(adapter->dev,
3176                     "%s: bus_dma_tag_create failed: %d\n",
3177                     __func__, error);
3178                 goto fail_0;
3179         }
3180
3181         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3182             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3183         if (error) {
3184                 device_printf(adapter->dev,
3185                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3186                     __func__, (uintmax_t)size, error);
3187                 goto fail_2;
3188         }
3189
3190         dma->dma_paddr = 0;
3191         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3192             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3193         if (error || dma->dma_paddr == 0) {
3194                 device_printf(adapter->dev,
3195                     "%s: bus_dmamap_load failed: %d\n",
3196                     __func__, error);
3197                 goto fail_3;
3198         }
3199
3200         return (0);
3201
3202 fail_3:
3203         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3204 fail_2:
3205         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3206         bus_dma_tag_destroy(dma->dma_tag);
3207 fail_0:
3208         dma->dma_map = NULL;
3209         dma->dma_tag = NULL;
3210
3211         return (error);
3212 }
3213
3214 static void
3215 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3216 {
3217         if (dma->dma_tag == NULL)
3218                 return;
3219         if (dma->dma_map != NULL) {
3220                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3221                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3222                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3223                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3224                 dma->dma_map = NULL;
3225         }
3226         bus_dma_tag_destroy(dma->dma_tag);
3227         dma->dma_tag = NULL;
3228 }
3229
3230
3231 /*********************************************************************
3232  *
3233  *  Allocate memory for the transmit and receive rings, and then
3234  *  the descriptors associated with each, called only once at attach.
3235  *
3236  **********************************************************************/
3237 static int
3238 igb_allocate_queues(struct adapter *adapter)
3239 {
3240         device_t dev = adapter->dev;
3241         struct igb_queue        *que = NULL;
3242         struct tx_ring          *txr = NULL;
3243         struct rx_ring          *rxr = NULL;
3244         int rsize, tsize, error = E1000_SUCCESS;
3245         int txconf = 0, rxconf = 0;
3246
3247         /* First allocate the top level queue structs */
3248         if (!(adapter->queues =
3249             (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3250             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3251                 device_printf(dev, "Unable to allocate queue memory\n");
3252                 error = ENOMEM;
3253                 goto fail;
3254         }
3255
3256         /* Next allocate the TX ring struct memory */
3257         if (!(adapter->tx_rings =
3258             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3259             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3260                 device_printf(dev, "Unable to allocate TX ring memory\n");
3261                 error = ENOMEM;
3262                 goto tx_fail;
3263         }
3264
3265         /* Now allocate the RX */
3266         if (!(adapter->rx_rings =
3267             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3268             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3269                 device_printf(dev, "Unable to allocate RX ring memory\n");
3270                 error = ENOMEM;
3271                 goto rx_fail;
3272         }
3273
3274         tsize = roundup2(adapter->num_tx_desc *
3275             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3276         /*
3277          * Now set up the TX queues, txconf is needed to handle the
3278          * possibility that things fail midcourse and we need to
3279          * undo memory gracefully
3280          */ 
3281         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3282                 /* Set up some basics */
3283                 txr = &adapter->tx_rings[i];
3284                 txr->adapter = adapter;
3285                 txr->me = i;
3286
3287                 /* Initialize the TX lock */
3288                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3289                     device_get_nameunit(dev), txr->me);
3290                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3291
3292                 if (igb_dma_malloc(adapter, tsize,
3293                         &txr->txdma, BUS_DMA_NOWAIT)) {
3294                         device_printf(dev,
3295                             "Unable to allocate TX Descriptor memory\n");
3296                         error = ENOMEM;
3297                         goto err_tx_desc;
3298                 }
3299                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3300                 bzero((void *)txr->tx_base, tsize);
3301
3302                 /* Now allocate transmit buffers for the ring */
3303                 if (igb_allocate_transmit_buffers(txr)) {
3304                         device_printf(dev,
3305                             "Critical Failure setting up transmit buffers\n");
3306                         error = ENOMEM;
3307                         goto err_tx_desc;
3308                 }
3309 #if __FreeBSD_version >= 800000
3310                 /* Allocate a buf ring */
3311                 txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
3312                     M_WAITOK, &txr->tx_mtx);
3313 #endif
3314         }
3315
3316         /*
3317          * Next the RX queues...
3318          */ 
3319         rsize = roundup2(adapter->num_rx_desc *
3320             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3321         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3322                 rxr = &adapter->rx_rings[i];
3323                 rxr->adapter = adapter;
3324                 rxr->me = i;
3325
3326                 /* Initialize the RX lock */
3327                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3328                     device_get_nameunit(dev), txr->me);
3329                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3330
3331                 if (igb_dma_malloc(adapter, rsize,
3332                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3333                         device_printf(dev,
3334                             "Unable to allocate RxDescriptor memory\n");
3335                         error = ENOMEM;
3336                         goto err_rx_desc;
3337                 }
3338                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3339                 bzero((void *)rxr->rx_base, rsize);
3340
3341                 /* Allocate receive buffers for the ring*/
3342                 if (igb_allocate_receive_buffers(rxr)) {
3343                         device_printf(dev,
3344                             "Critical Failure setting up receive buffers\n");
3345                         error = ENOMEM;
3346                         goto err_rx_desc;
3347                 }
3348         }
3349
3350         /*
3351         ** Finally set up the queue holding structs
3352         */
3353         for (int i = 0; i < adapter->num_queues; i++) {
3354                 que = &adapter->queues[i];
3355                 que->adapter = adapter;
3356                 que->txr = &adapter->tx_rings[i];
3357                 que->rxr = &adapter->rx_rings[i];
3358         }
3359
3360         return (0);
3361
3362 err_rx_desc:
3363         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3364                 igb_dma_free(adapter, &rxr->rxdma);
3365 err_tx_desc:
3366         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3367                 igb_dma_free(adapter, &txr->txdma);
3368         free(adapter->rx_rings, M_DEVBUF);
3369 rx_fail:
3370 #if __FreeBSD_version >= 800000
3371         buf_ring_free(txr->br, M_DEVBUF);
3372 #endif
3373         free(adapter->tx_rings, M_DEVBUF);
3374 tx_fail:
3375         free(adapter->queues, M_DEVBUF);
3376 fail:
3377         return (error);
3378 }
3379
3380 /*********************************************************************
3381  *
3382  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3383  *  the information needed to transmit a packet on the wire. This is
3384  *  called only once at attach, setup is done every reset.
3385  *
3386  **********************************************************************/
3387 static int
3388 igb_allocate_transmit_buffers(struct tx_ring *txr)
3389 {
3390         struct adapter *adapter = txr->adapter;
3391         device_t dev = adapter->dev;
3392         struct igb_tx_buffer *txbuf;
3393         int error, i;
3394
3395         /*
3396          * Setup DMA descriptor areas.
3397          */
3398         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3399                                1, 0,                    /* alignment, bounds */
3400                                BUS_SPACE_MAXADDR,       /* lowaddr */
3401                                BUS_SPACE_MAXADDR,       /* highaddr */
3402                                NULL, NULL,              /* filter, filterarg */
3403                                IGB_TSO_SIZE,            /* maxsize */
3404                                IGB_MAX_SCATTER,         /* nsegments */
3405                                PAGE_SIZE,               /* maxsegsize */
3406                                0,                       /* flags */
3407                                NULL,                    /* lockfunc */
3408                                NULL,                    /* lockfuncarg */
3409                                &txr->txtag))) {
3410                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3411                 goto fail;
3412         }
3413
3414         if (!(txr->tx_buffers =
3415             (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3416             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3417                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3418                 error = ENOMEM;
3419                 goto fail;
3420         }
3421
3422         /* Create the descriptor buffer dma maps */
3423         txbuf = txr->tx_buffers;
3424         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3425                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3426                 if (error != 0) {
3427                         device_printf(dev, "Unable to create TX DMA map\n");
3428                         goto fail;
3429                 }
3430         }
3431
3432         return 0;
3433 fail:
3434         /* We free all, it handles case where we are in the middle */
3435         igb_free_transmit_structures(adapter);
3436         return (error);
3437 }
3438
3439 /*********************************************************************
3440  *
3441  *  Initialize a transmit ring.
3442  *
3443  **********************************************************************/
3444 static void
3445 igb_setup_transmit_ring(struct tx_ring *txr)
3446 {
3447         struct adapter *adapter = txr->adapter;
3448         struct igb_tx_buffer *txbuf;
3449         int i;
3450 #ifdef DEV_NETMAP
3451         struct netmap_adapter *na = NA(adapter->ifp);
3452         struct netmap_slot *slot;
3453 #endif /* DEV_NETMAP */
3454
3455         /* Clear the old descriptor contents */
3456         IGB_TX_LOCK(txr);
3457 #ifdef DEV_NETMAP
3458         slot = netmap_reset(na, NR_TX, txr->me, 0);
3459 #endif /* DEV_NETMAP */
3460         bzero((void *)txr->tx_base,
3461               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3462         /* Reset indices */
3463         txr->next_avail_desc = 0;
3464         txr->next_to_clean = 0;
3465
3466         /* Free any existing tx buffers. */
3467         txbuf = txr->tx_buffers;
3468         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3469                 if (txbuf->m_head != NULL) {
3470                         bus_dmamap_sync(txr->txtag, txbuf->map,
3471                             BUS_DMASYNC_POSTWRITE);
3472                         bus_dmamap_unload(txr->txtag, txbuf->map);
3473                         m_freem(txbuf->m_head);
3474                         txbuf->m_head = NULL;
3475                 }
3476 #ifdef DEV_NETMAP
3477                 if (slot) {
3478                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3479                         /* no need to set the address */
3480                         netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3481                 }
3482 #endif /* DEV_NETMAP */
3483                 /* clear the watch index */
3484                 txbuf->next_eop = -1;
3485         }
3486
3487         /* Set number of descriptors available */
3488         txr->tx_avail = adapter->num_tx_desc;
3489
3490         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3491             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3492         IGB_TX_UNLOCK(txr);
3493 }
3494
3495 /*********************************************************************
3496  *
3497  *  Initialize all transmit rings.
3498  *
3499  **********************************************************************/
3500 static void
3501 igb_setup_transmit_structures(struct adapter *adapter)
3502 {
3503         struct tx_ring *txr = adapter->tx_rings;
3504
3505         for (int i = 0; i < adapter->num_queues; i++, txr++)
3506                 igb_setup_transmit_ring(txr);
3507
3508         return;
3509 }
3510
3511 /*********************************************************************
3512  *
3513  *  Enable transmit unit.
3514  *
3515  **********************************************************************/
3516 static void
3517 igb_initialize_transmit_units(struct adapter *adapter)
3518 {
3519         struct tx_ring  *txr = adapter->tx_rings;
3520         struct e1000_hw *hw = &adapter->hw;
3521         u32             tctl, txdctl;
3522
3523         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3524         tctl = txdctl = 0;
3525
3526         /* Setup the Tx Descriptor Rings */
3527         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3528                 u64 bus_addr = txr->txdma.dma_paddr;
3529
3530                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3531                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3532                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3533                     (uint32_t)(bus_addr >> 32));
3534                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3535                     (uint32_t)bus_addr);
3536
3537                 /* Setup the HW Tx Head and Tail descriptor pointers */
3538                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3539                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3540
3541                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3542                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3543                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3544
3545                 txr->queue_status = IGB_QUEUE_IDLE;
3546
3547                 txdctl |= IGB_TX_PTHRESH;
3548                 txdctl |= IGB_TX_HTHRESH << 8;
3549                 txdctl |= IGB_TX_WTHRESH << 16;
3550                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3551                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3552         }
3553
3554         if (adapter->vf_ifp)
3555                 return;
3556
3557         e1000_config_collision_dist(hw);
3558
3559         /* Program the Transmit Control Register */
3560         tctl = E1000_READ_REG(hw, E1000_TCTL);
3561         tctl &= ~E1000_TCTL_CT;
3562         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3563                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3564
3565         /* This write will effectively turn on the transmit unit. */
3566         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3567 }
3568
3569 /*********************************************************************
3570  *
3571  *  Free all transmit rings.
3572  *
3573  **********************************************************************/
3574 static void
3575 igb_free_transmit_structures(struct adapter *adapter)
3576 {
3577         struct tx_ring *txr = adapter->tx_rings;
3578
3579         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3580                 IGB_TX_LOCK(txr);
3581                 igb_free_transmit_buffers(txr);
3582                 igb_dma_free(adapter, &txr->txdma);
3583                 IGB_TX_UNLOCK(txr);
3584                 IGB_TX_LOCK_DESTROY(txr);
3585         }
3586         free(adapter->tx_rings, M_DEVBUF);
3587 }
3588
3589 /*********************************************************************
3590  *
3591  *  Free transmit ring related data structures.
3592  *
3593  **********************************************************************/
3594 static void
3595 igb_free_transmit_buffers(struct tx_ring *txr)
3596 {
3597         struct adapter *adapter = txr->adapter;
3598         struct igb_tx_buffer *tx_buffer;
3599         int             i;
3600
3601         INIT_DEBUGOUT("free_transmit_ring: begin");
3602
3603         if (txr->tx_buffers == NULL)
3604                 return;
3605
3606         tx_buffer = txr->tx_buffers;
3607         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3608                 if (tx_buffer->m_head != NULL) {
3609                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3610                             BUS_DMASYNC_POSTWRITE);
3611                         bus_dmamap_unload(txr->txtag,
3612                             tx_buffer->map);
3613                         m_freem(tx_buffer->m_head);
3614                         tx_buffer->m_head = NULL;
3615                         if (tx_buffer->map != NULL) {
3616                                 bus_dmamap_destroy(txr->txtag,
3617                                     tx_buffer->map);
3618                                 tx_buffer->map = NULL;
3619                         }
3620                 } else if (tx_buffer->map != NULL) {
3621                         bus_dmamap_unload(txr->txtag,
3622                             tx_buffer->map);
3623                         bus_dmamap_destroy(txr->txtag,
3624                             tx_buffer->map);
3625                         tx_buffer->map = NULL;
3626                 }
3627         }
3628 #if __FreeBSD_version >= 800000
3629         if (txr->br != NULL)
3630                 buf_ring_free(txr->br, M_DEVBUF);
3631 #endif
3632         if (txr->tx_buffers != NULL) {
3633                 free(txr->tx_buffers, M_DEVBUF);
3634                 txr->tx_buffers = NULL;
3635         }
3636         if (txr->txtag != NULL) {
3637                 bus_dma_tag_destroy(txr->txtag);
3638                 txr->txtag = NULL;
3639         }
3640         return;
3641 }
3642
3643 /**********************************************************************
3644  *
3645  *  Setup work for hardware segmentation offload (TSO)
3646  *
3647  **********************************************************************/
3648 static bool
3649 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ehdrlen,
3650         struct ip *ip, struct tcphdr *th)
3651 {
3652         struct adapter *adapter = txr->adapter;
3653         struct e1000_adv_tx_context_desc *TXD;
3654         struct igb_tx_buffer        *tx_buffer;
3655         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3656         u32 mss_l4len_idx = 0;
3657         u16 vtag = 0;
3658         int ctxd, ip_hlen, tcp_hlen;
3659
3660         ctxd = txr->next_avail_desc;
3661         tx_buffer = &txr->tx_buffers[ctxd];
3662         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3663
3664         ip->ip_sum = 0;
3665         ip_hlen = ip->ip_hl << 2;
3666         tcp_hlen = th->th_off << 2;
3667
3668         /* VLAN MACLEN IPLEN */
3669         if (mp->m_flags & M_VLANTAG) {
3670                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3671                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3672         }
3673
3674         vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3675         vlan_macip_lens |= ip_hlen;
3676         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3677
3678         /* ADV DTYPE TUCMD */
3679         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3680         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3681         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3682         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3683
3684         /* MSS L4LEN IDX */
3685         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3686         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3687         /* 82575 needs the queue index added */
3688         if (adapter->hw.mac.type == e1000_82575)
3689                 mss_l4len_idx |= txr->me << 4;
3690         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3691
3692         TXD->seqnum_seed = htole32(0);
3693         tx_buffer->m_head = NULL;
3694         tx_buffer->next_eop = -1;
3695
3696         if (++ctxd == adapter->num_tx_desc)
3697                 ctxd = 0;
3698
3699         txr->tx_avail--;
3700         txr->next_avail_desc = ctxd;
3701         return TRUE;
3702 }
3703
3704
3705 /*********************************************************************
3706  *
3707  *  Context Descriptor setup for VLAN or CSUM
3708  *
3709  **********************************************************************/
3710
3711 static bool
3712 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3713 {
3714         struct adapter *adapter = txr->adapter;
3715         struct e1000_adv_tx_context_desc *TXD;
3716         struct igb_tx_buffer        *tx_buffer;
3717         u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3718         struct ether_vlan_header *eh;
3719         struct ip *ip = NULL;
3720         struct ip6_hdr *ip6;
3721         int  ehdrlen, ctxd, ip_hlen = 0;
3722         u16     etype, vtag = 0;
3723         u8      ipproto = 0;
3724         bool    offload = TRUE;
3725
3726         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3727                 offload = FALSE;
3728
3729         vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3730         ctxd = txr->next_avail_desc;
3731         tx_buffer = &txr->tx_buffers[ctxd];
3732         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3733
3734         /*
3735         ** In advanced descriptors the vlan tag must 
3736         ** be placed into the context descriptor, thus
3737         ** we need to be here just for that setup.
3738         */
3739         if (mp->m_flags & M_VLANTAG) {
3740                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3741                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3742         } else if (offload == FALSE)
3743                 return FALSE;
3744
3745         /*
3746          * Determine where frame payload starts.
3747          * Jump over vlan headers if already present,
3748          * helpful for QinQ too.
3749          */
3750         eh = mtod(mp, struct ether_vlan_header *);
3751         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3752                 etype = ntohs(eh->evl_proto);
3753                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3754         } else {
3755                 etype = ntohs(eh->evl_encap_proto);
3756                 ehdrlen = ETHER_HDR_LEN;
3757         }
3758
3759         /* Set the ether header length */
3760         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3761
3762         switch (etype) {
3763                 case ETHERTYPE_IP:
3764                         ip = (struct ip *)(mp->m_data + ehdrlen);
3765                         ip_hlen = ip->ip_hl << 2;
3766                         if (mp->m_len < ehdrlen + ip_hlen) {
3767                                 offload = FALSE;
3768                                 break;
3769                         }
3770                         ipproto = ip->ip_p;
3771                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3772                         break;
3773                 case ETHERTYPE_IPV6:
3774                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3775                         ip_hlen = sizeof(struct ip6_hdr);
3776                         ipproto = ip6->ip6_nxt;
3777                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3778                         break;
3779                 default:
3780                         offload = FALSE;
3781                         break;
3782         }
3783
3784         vlan_macip_lens |= ip_hlen;
3785         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3786
3787         switch (ipproto) {
3788                 case IPPROTO_TCP:
3789                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3790                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3791                         break;
3792                 case IPPROTO_UDP:
3793                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3794                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3795                         break;
3796 #if __FreeBSD_version >= 800000
3797                 case IPPROTO_SCTP:
3798                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3799                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3800                         break;
3801 #endif
3802                 default:
3803                         offload = FALSE;
3804                         break;
3805         }
3806
3807         /* 82575 needs the queue index added */
3808         if (adapter->hw.mac.type == e1000_82575)
3809                 mss_l4len_idx = txr->me << 4;
3810
3811         /* Now copy bits into descriptor */
3812         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3813         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3814         TXD->seqnum_seed = htole32(0);
3815         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3816
3817         tx_buffer->m_head = NULL;
3818         tx_buffer->next_eop = -1;
3819
3820         /* We've consumed the first desc, adjust counters */
3821         if (++ctxd == adapter->num_tx_desc)
3822                 ctxd = 0;
3823         txr->next_avail_desc = ctxd;
3824         --txr->tx_avail;
3825
3826         return (offload);
3827 }
3828
3829
3830 /**********************************************************************
3831  *
3832  *  Examine each tx_buffer in the used queue. If the hardware is done
3833  *  processing the packet then free associated resources. The
3834  *  tx_buffer is put back on the free queue.
3835  *
3836  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3837  **********************************************************************/
3838 static bool
3839 igb_txeof(struct tx_ring *txr)
3840 {
3841         struct adapter  *adapter = txr->adapter;
3842         int first, last, done, processed;
3843         struct igb_tx_buffer *tx_buffer;
3844         struct e1000_tx_desc   *tx_desc, *eop_desc;
3845         struct ifnet   *ifp = adapter->ifp;
3846
3847         IGB_TX_LOCK_ASSERT(txr);
3848
3849 #ifdef DEV_NETMAP
3850         if (ifp->if_capenable & IFCAP_NETMAP) {
3851                 struct netmap_adapter *na = NA(ifp);
3852
3853                 selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3854                 IGB_TX_UNLOCK(txr);
3855                 IGB_CORE_LOCK(adapter);
3856                 selwakeuppri(&na->tx_si, PI_NET);
3857                 IGB_CORE_UNLOCK(adapter);
3858                 IGB_TX_LOCK(txr);
3859                 return FALSE;
3860         }
3861 #endif /* DEV_NETMAP */
3862         if (txr->tx_avail == adapter->num_tx_desc) {
3863                 txr->queue_status = IGB_QUEUE_IDLE;
3864                 return FALSE;
3865         }
3866
3867         processed = 0;
3868         first = txr->next_to_clean;
3869         tx_desc = &txr->tx_base[first];
3870         tx_buffer = &txr->tx_buffers[first];
3871         last = tx_buffer->next_eop;
3872         eop_desc = &txr->tx_base[last];
3873
3874         /*
3875          * What this does is get the index of the
3876          * first descriptor AFTER the EOP of the 
3877          * first packet, that way we can do the
3878          * simple comparison on the inner while loop.
3879          */
3880         if (++last == adapter->num_tx_desc)
3881                 last = 0;
3882         done = last;
3883
3884         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3885             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3886
3887         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3888                 /* We clean the range of the packet */
3889                 while (first != done) {
3890                         tx_desc->upper.data = 0;
3891                         tx_desc->lower.data = 0;
3892                         tx_desc->buffer_addr = 0;
3893                         ++txr->tx_avail;
3894                         ++processed;
3895
3896                         if (tx_buffer->m_head) {
3897                                 txr->bytes +=
3898                                     tx_buffer->m_head->m_pkthdr.len;
3899                                 bus_dmamap_sync(txr->txtag,
3900                                     tx_buffer->map,
3901                                     BUS_DMASYNC_POSTWRITE);
3902                                 bus_dmamap_unload(txr->txtag,
3903                                     tx_buffer->map);
3904
3905                                 m_freem(tx_buffer->m_head);
3906                                 tx_buffer->m_head = NULL;
3907                         }
3908                         tx_buffer->next_eop = -1;
3909                         txr->watchdog_time = ticks;
3910
3911                         if (++first == adapter->num_tx_desc)
3912                                 first = 0;
3913
3914                         tx_buffer = &txr->tx_buffers[first];
3915                         tx_desc = &txr->tx_base[first];
3916                 }
3917                 ++txr->packets;
3918                 ++ifp->if_opackets;
3919                 /* See if we can continue to the next packet */
3920                 last = tx_buffer->next_eop;
3921                 if (last != -1) {
3922                         eop_desc = &txr->tx_base[last];
3923                         /* Get new done point */
3924                         if (++last == adapter->num_tx_desc) last = 0;
3925                         done = last;
3926                 } else
3927                         break;
3928         }
3929         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3930             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3931
3932         txr->next_to_clean = first;
3933
3934         /*
3935         ** Watchdog calculation, we know there's
3936         ** work outstanding or the first return
3937         ** would have been taken, so none processed
3938         ** for too long indicates a hang.
3939         */
3940         if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3941                 txr->queue_status |= IGB_QUEUE_HUNG;
3942         /*
3943          * If we have a minimum free,
3944          * clear depleted state bit
3945          */
3946         if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)          
3947                 txr->queue_status &= ~IGB_QUEUE_DEPLETED;
3948
3949         /* All clean, turn off the watchdog */
3950         if (txr->tx_avail == adapter->num_tx_desc) {
3951                 txr->queue_status = IGB_QUEUE_IDLE;
3952                 return (FALSE);
3953         }
3954
3955         return (TRUE);
3956 }
3957
3958 /*********************************************************************
3959  *
3960  *  Refresh mbuf buffers for RX descriptor rings
3961  *   - now keeps its own state so discards due to resource
3962  *     exhaustion are unnecessary, if an mbuf cannot be obtained
3963  *     it just returns, keeping its placeholder, thus it can simply
3964  *     be recalled to try again.
3965  *
3966  **********************************************************************/
3967 static void
3968 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3969 {
3970         struct adapter          *adapter = rxr->adapter;
3971         bus_dma_segment_t       hseg[1];
3972         bus_dma_segment_t       pseg[1];
3973         struct igb_rx_buf       *rxbuf;
3974         struct mbuf             *mh, *mp;
3975         int                     i, j, nsegs, error;
3976         bool                    refreshed = FALSE;
3977
3978         i = j = rxr->next_to_refresh;
3979         /*
3980         ** Get one descriptor beyond
3981         ** our work mark to control
3982         ** the loop.
3983         */
3984         if (++j == adapter->num_rx_desc)
3985                 j = 0;
3986
3987         while (j != limit) {
3988                 rxbuf = &rxr->rx_buffers[i];
3989                 /* No hdr mbuf used with header split off */
3990                 if (rxr->hdr_split == FALSE)
3991                         goto no_split;
3992                 if (rxbuf->m_head == NULL) {
3993                         mh = m_gethdr(M_DONTWAIT, MT_DATA);
3994                         if (mh == NULL)
3995                                 goto update;
3996                 } else
3997                         mh = rxbuf->m_head;
3998
3999                 mh->m_pkthdr.len = mh->m_len = MHLEN;
4000                 mh->m_len = MHLEN;
4001                 mh->m_flags |= M_PKTHDR;
4002                 /* Get the memory mapping */
4003                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4004                     rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4005                 if (error != 0) {
4006                         printf("Refresh mbufs: hdr dmamap load"
4007                             " failure - %d\n", error);
4008                         m_free(mh);
4009                         rxbuf->m_head = NULL;
4010                         goto update;
4011                 }
4012                 rxbuf->m_head = mh;
4013                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4014                     BUS_DMASYNC_PREREAD);
4015                 rxr->rx_base[i].read.hdr_addr =
4016                     htole64(hseg[0].ds_addr);
4017 no_split:
4018                 if (rxbuf->m_pack == NULL) {
4019                         mp = m_getjcl(M_DONTWAIT, MT_DATA,
4020                             M_PKTHDR, adapter->rx_mbuf_sz);
4021                         if (mp == NULL)
4022                                 goto update;
4023                 } else
4024                         mp = rxbuf->m_pack;
4025
4026                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4027                 /* Get the memory mapping */
4028                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4029                     rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4030                 if (error != 0) {
4031                         printf("Refresh mbufs: payload dmamap load"
4032                             " failure - %d\n", error);
4033                         m_free(mp);
4034                         rxbuf->m_pack = NULL;
4035                         goto update;
4036                 }
4037                 rxbuf->m_pack = mp;
4038                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4039                     BUS_DMASYNC_PREREAD);
4040                 rxr->rx_base[i].read.pkt_addr =
4041                     htole64(pseg[0].ds_addr);
4042                 refreshed = TRUE; /* I feel wefreshed :) */
4043
4044                 i = j; /* our next is precalculated */
4045                 rxr->next_to_refresh = i;
4046                 if (++j == adapter->num_rx_desc)
4047                         j = 0;
4048         }
4049 update:
4050         if (refreshed) /* update tail */
4051                 E1000_WRITE_REG(&adapter->hw,
4052                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4053         return;
4054 }
4055
4056
4057 /*********************************************************************
4058  *
4059  *  Allocate memory for rx_buffer structures. Since we use one
4060  *  rx_buffer per received packet, the maximum number of rx_buffer's
4061  *  that we'll need is equal to the number of receive descriptors
4062  *  that we've allocated.
4063  *
4064  **********************************************************************/
4065 static int
4066 igb_allocate_receive_buffers(struct rx_ring *rxr)
4067 {
4068         struct  adapter         *adapter = rxr->adapter;
4069         device_t                dev = adapter->dev;
4070         struct igb_rx_buf       *rxbuf;
4071         int                     i, bsize, error;
4072
4073         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4074         if (!(rxr->rx_buffers =
4075             (struct igb_rx_buf *) malloc(bsize,
4076             M_DEVBUF, M_NOWAIT | M_ZERO))) {
4077                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4078                 error = ENOMEM;
4079                 goto fail;
4080         }
4081
4082         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4083                                    1, 0,                /* alignment, bounds */
4084                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4085                                    BUS_SPACE_MAXADDR,   /* highaddr */
4086                                    NULL, NULL,          /* filter, filterarg */
4087                                    MSIZE,               /* maxsize */
4088                                    1,                   /* nsegments */
4089                                    MSIZE,               /* maxsegsize */
4090                                    0,                   /* flags */
4091                                    NULL,                /* lockfunc */
4092                                    NULL,                /* lockfuncarg */
4093                                    &rxr->htag))) {
4094                 device_printf(dev, "Unable to create RX DMA tag\n");
4095                 goto fail;
4096         }
4097
4098         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4099                                    1, 0,                /* alignment, bounds */
4100                                    BUS_SPACE_MAXADDR,   /* lowaddr */
4101                                    BUS_SPACE_MAXADDR,   /* highaddr */
4102                                    NULL, NULL,          /* filter, filterarg */
4103                                    MJUM9BYTES,          /* maxsize */
4104                                    1,                   /* nsegments */
4105                                    MJUM9BYTES,          /* maxsegsize */
4106                                    0,                   /* flags */
4107                                    NULL,                /* lockfunc */
4108                                    NULL,                /* lockfuncarg */
4109                                    &rxr->ptag))) {
4110                 device_printf(dev, "Unable to create RX payload DMA tag\n");
4111                 goto fail;
4112         }
4113
4114         for (i = 0; i < adapter->num_rx_desc; i++) {
4115                 rxbuf = &rxr->rx_buffers[i];
4116                 error = bus_dmamap_create(rxr->htag,
4117                     BUS_DMA_NOWAIT, &rxbuf->hmap);
4118                 if (error) {
4119                         device_printf(dev,
4120                             "Unable to create RX head DMA maps\n");
4121                         goto fail;
4122                 }
4123                 error = bus_dmamap_create(rxr->ptag,
4124                     BUS_DMA_NOWAIT, &rxbuf->pmap);
4125                 if (error) {
4126                         device_printf(dev,
4127                             "Unable to create RX packet DMA maps\n");
4128                         goto fail;
4129                 }
4130         }
4131
4132         return (0);
4133
4134 fail:
4135         /* Frees all, but can handle partial completion */
4136         igb_free_receive_structures(adapter);
4137         return (error);
4138 }
4139
4140
4141 static void
4142 igb_free_receive_ring(struct rx_ring *rxr)
4143 {
4144         struct  adapter         *adapter = rxr->adapter;
4145         struct igb_rx_buf       *rxbuf;
4146
4147
4148         for (int i = 0; i < adapter->num_rx_desc; i++) {
4149                 rxbuf = &rxr->rx_buffers[i];
4150                 if (rxbuf->m_head != NULL) {
4151                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4152                             BUS_DMASYNC_POSTREAD);
4153                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4154                         rxbuf->m_head->m_flags |= M_PKTHDR;
4155                         m_freem(rxbuf->m_head);
4156                 }
4157                 if (rxbuf->m_pack != NULL) {
4158                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4159                             BUS_DMASYNC_POSTREAD);
4160                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4161                         rxbuf->m_pack->m_flags |= M_PKTHDR;
4162                         m_freem(rxbuf->m_pack);
4163                 }
4164                 rxbuf->m_head = NULL;
4165                 rxbuf->m_pack = NULL;
4166         }
4167 }
4168
4169
4170 /*********************************************************************
4171  *
4172  *  Initialize a receive ring and its buffers.
4173  *
4174  **********************************************************************/
4175 static int
4176 igb_setup_receive_ring(struct rx_ring *rxr)
4177 {
4178         struct  adapter         *adapter;
4179         struct  ifnet           *ifp;
4180         device_t                dev;
4181         struct igb_rx_buf       *rxbuf;
4182         bus_dma_segment_t       pseg[1], hseg[1];
4183         struct lro_ctrl         *lro = &rxr->lro;
4184         int                     rsize, nsegs, error = 0;
4185 #ifdef DEV_NETMAP
4186         struct netmap_adapter *na = NA(rxr->adapter->ifp);
4187         struct netmap_slot *slot;
4188 #endif /* DEV_NETMAP */
4189
4190         adapter = rxr->adapter;
4191         dev = adapter->dev;
4192         ifp = adapter->ifp;
4193
4194         /* Clear the ring contents */
4195         IGB_RX_LOCK(rxr);
4196 #ifdef DEV_NETMAP
4197         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4198 #endif /* DEV_NETMAP */
4199         rsize = roundup2(adapter->num_rx_desc *
4200             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4201         bzero((void *)rxr->rx_base, rsize);
4202
4203         /*
4204         ** Free current RX buffer structures and their mbufs
4205         */
4206         igb_free_receive_ring(rxr);
4207
4208         /* Configure for header split? */
4209         if (igb_header_split)
4210                 rxr->hdr_split = TRUE;
4211
4212         /* Now replenish the ring mbufs */
4213         for (int j = 0; j < adapter->num_rx_desc; ++j) {
4214                 struct mbuf     *mh, *mp;
4215
4216                 rxbuf = &rxr->rx_buffers[j];
4217 #ifdef DEV_NETMAP
4218                 if (slot) {
4219                         /* slot sj is mapped to the i-th NIC-ring entry */
4220                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4221                         uint64_t paddr;
4222                         void *addr;
4223
4224                         addr = PNMB(slot + sj, &paddr);
4225                         netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4226                         /* Update descriptor */
4227                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4228                         continue;
4229                 }
4230 #endif /* DEV_NETMAP */
4231                 if (rxr->hdr_split == FALSE)
4232                         goto skip_head;
4233
4234                 /* First the header */
4235                 rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
4236                 if (rxbuf->m_head == NULL) {
4237                         error = ENOBUFS;
4238                         goto fail;
4239                 }
4240                 m_adj(rxbuf->m_head, ETHER_ALIGN);
4241                 mh = rxbuf->m_head;
4242                 mh->m_len = mh->m_pkthdr.len = MHLEN;
4243                 mh->m_flags |= M_PKTHDR;
4244                 /* Get the memory mapping */
4245                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4246                     rxbuf->hmap, rxbuf->m_head, hseg,
4247                     &nsegs, BUS_DMA_NOWAIT);
4248                 if (error != 0) /* Nothing elegant to do here */
4249                         goto fail;
4250                 bus_dmamap_sync(rxr->htag,
4251                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
4252                 /* Update descriptor */
4253                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4254
4255 skip_head:
4256                 /* Now the payload cluster */
4257                 rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
4258                     M_PKTHDR, adapter->rx_mbuf_sz);
4259                 if (rxbuf->m_pack == NULL) {
4260                         error = ENOBUFS;
4261                         goto fail;
4262                 }
4263                 mp = rxbuf->m_pack;
4264                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4265                 /* Get the memory mapping */
4266                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4267                     rxbuf->pmap, mp, pseg,
4268                     &nsegs, BUS_DMA_NOWAIT);
4269                 if (error != 0)
4270                         goto fail;
4271                 bus_dmamap_sync(rxr->ptag,
4272                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4273                 /* Update descriptor */
4274                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4275         }
4276
4277         /* Setup our descriptor indices */
4278         rxr->next_to_check = 0;
4279         rxr->next_to_refresh = adapter->num_rx_desc - 1;
4280         rxr->lro_enabled = FALSE;
4281         rxr->rx_split_packets = 0;
4282         rxr->rx_bytes = 0;
4283
4284         rxr->fmp = NULL;
4285         rxr->lmp = NULL;
4286         rxr->discard = FALSE;
4287
4288         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4289             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4290
4291         /*
4292         ** Now set up the LRO interface, we
4293         ** also only do head split when LRO
4294         ** is enabled, since so often they
4295         ** are undesireable in similar setups.
4296         */
4297         if (ifp->if_capenable & IFCAP_LRO) {
4298                 error = tcp_lro_init(lro);
4299                 if (error) {
4300                         device_printf(dev, "LRO Initialization failed!\n");
4301                         goto fail;
4302                 }
4303                 INIT_DEBUGOUT("RX LRO Initialized\n");
4304                 rxr->lro_enabled = TRUE;
4305                 lro->ifp = adapter->ifp;
4306         }
4307
4308         IGB_RX_UNLOCK(rxr);
4309         return (0);
4310
4311 fail:
4312         igb_free_receive_ring(rxr);
4313         IGB_RX_UNLOCK(rxr);
4314         return (error);
4315 }
4316
4317
4318 /*********************************************************************
4319  *
4320  *  Initialize all receive rings.
4321  *
4322  **********************************************************************/
4323 static int
4324 igb_setup_receive_structures(struct adapter *adapter)
4325 {
4326         struct rx_ring *rxr = adapter->rx_rings;
4327         int i;
4328
4329         for (i = 0; i < adapter->num_queues; i++, rxr++)
4330                 if (igb_setup_receive_ring(rxr))
4331                         goto fail;
4332
4333         return (0);
4334 fail:
4335         /*
4336          * Free RX buffers allocated so far, we will only handle
4337          * the rings that completed, the failing case will have
4338          * cleaned up for itself. 'i' is the endpoint.
4339          */
4340         for (int j = 0; j > i; ++j) {
4341                 rxr = &adapter->rx_rings[i];
4342                 IGB_RX_LOCK(rxr);
4343                 igb_free_receive_ring(rxr);
4344                 IGB_RX_UNLOCK(rxr);
4345         }
4346
4347         return (ENOBUFS);
4348 }
4349
4350 /*********************************************************************
4351  *
4352  *  Enable receive unit.
4353  *
4354  **********************************************************************/
4355 static void
4356 igb_initialize_receive_units(struct adapter *adapter)
4357 {
4358         struct rx_ring  *rxr = adapter->rx_rings;
4359         struct ifnet    *ifp = adapter->ifp;
4360         struct e1000_hw *hw = &adapter->hw;
4361         u32             rctl, rxcsum, psize, srrctl = 0;
4362
4363         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4364
4365         /*
4366          * Make sure receives are disabled while setting
4367          * up the descriptor ring
4368          */
4369         rctl = E1000_READ_REG(hw, E1000_RCTL);
4370         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4371
4372         /*
4373         ** Set up for header split
4374         */
4375         if (igb_header_split) {
4376                 /* Use a standard mbuf for the header */
4377                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4378                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4379         } else
4380                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4381
4382         /*
4383         ** Set up for jumbo frames
4384         */
4385         if (ifp->if_mtu > ETHERMTU) {
4386                 rctl |= E1000_RCTL_LPE;
4387                 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4388                         srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4389                         rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4390                 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4391                         srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4392                         rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4393                 }
4394                 /* Set maximum packet len */
4395                 psize = adapter->max_frame_size;
4396                 /* are we on a vlan? */
4397                 if (adapter->ifp->if_vlantrunk != NULL)
4398                         psize += VLAN_TAG_SIZE;
4399                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4400         } else {
4401                 rctl &= ~E1000_RCTL_LPE;
4402                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4403                 rctl |= E1000_RCTL_SZ_2048;
4404         }
4405
4406         /* Setup the Base and Length of the Rx Descriptor Rings */
4407         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4408                 u64 bus_addr = rxr->rxdma.dma_paddr;
4409                 u32 rxdctl;
4410
4411                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4412                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4413                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4414                     (uint32_t)(bus_addr >> 32));
4415                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4416                     (uint32_t)bus_addr);
4417                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4418                 /* Enable this Queue */
4419                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4420                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4421                 rxdctl &= 0xFFF00000;
4422                 rxdctl |= IGB_RX_PTHRESH;
4423                 rxdctl |= IGB_RX_HTHRESH << 8;
4424                 rxdctl |= IGB_RX_WTHRESH << 16;
4425                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4426         }
4427
4428         /*
4429         ** Setup for RX MultiQueue
4430         */
4431         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4432         if (adapter->num_queues >1) {
4433                 u32 random[10], mrqc, shift = 0;
4434                 union igb_reta {
4435                         u32 dword;
4436                         u8  bytes[4];
4437                 } reta;
4438
4439                 arc4rand(&random, sizeof(random), 0);
4440                 if (adapter->hw.mac.type == e1000_82575)
4441                         shift = 6;
4442                 /* Warning FM follows */
4443                 for (int i = 0; i < 128; i++) {
4444                         reta.bytes[i & 3] =
4445                             (i % adapter->num_queues) << shift;
4446                         if ((i & 3) == 3)
4447                                 E1000_WRITE_REG(hw,
4448                                     E1000_RETA(i >> 2), reta.dword);
4449                 }
4450                 /* Now fill in hash table */
4451                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4452                 for (int i = 0; i < 10; i++)
4453                         E1000_WRITE_REG_ARRAY(hw,
4454                             E1000_RSSRK(0), i, random[i]);
4455
4456                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4457                     E1000_MRQC_RSS_FIELD_IPV4_TCP);
4458                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4459                     E1000_MRQC_RSS_FIELD_IPV6_TCP);
4460                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4461                     E1000_MRQC_RSS_FIELD_IPV6_UDP);
4462                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4463                     E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4464
4465                 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4466
4467                 /*
4468                 ** NOTE: Receive Full-Packet Checksum Offload 
4469                 ** is mutually exclusive with Multiqueue. However
4470                 ** this is not the same as TCP/IP checksums which
4471                 ** still work.
4472                 */
4473                 rxcsum |= E1000_RXCSUM_PCSD;
4474 #if __FreeBSD_version >= 800000
4475                 /* For SCTP Offload */
4476                 if ((hw->mac.type == e1000_82576)
4477                     && (ifp->if_capenable & IFCAP_RXCSUM))
4478                         rxcsum |= E1000_RXCSUM_CRCOFL;
4479 #endif
4480         } else {
4481                 /* Non RSS setup */
4482                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4483                         rxcsum |= E1000_RXCSUM_IPPCSE;
4484 #if __FreeBSD_version >= 800000
4485                         if (adapter->hw.mac.type == e1000_82576)
4486                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4487 #endif
4488                 } else
4489                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4490         }
4491         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4492
4493         /* Setup the Receive Control Register */
4494         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4495         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4496                    E1000_RCTL_RDMTS_HALF |
4497                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4498         /* Strip CRC bytes. */
4499         rctl |= E1000_RCTL_SECRC;
4500         /* Make sure VLAN Filters are off */
4501         rctl &= ~E1000_RCTL_VFE;
4502         /* Don't store bad packets */
4503         rctl &= ~E1000_RCTL_SBP;
4504
4505         /* Enable Receives */
4506         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4507
4508         /*
4509          * Setup the HW Rx Head and Tail Descriptor Pointers
4510          *   - needs to be after enable
4511          */
4512         for (int i = 0; i < adapter->num_queues; i++) {
4513                 rxr = &adapter->rx_rings[i];
4514                 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4515 #ifdef DEV_NETMAP
4516                 /*
4517                  * an init() while a netmap client is active must
4518                  * preserve the rx buffers passed to userspace.
4519                  * In this driver it means we adjust RDT to
4520                  * somthing different from next_to_refresh
4521                  * (which is not used in netmap mode).
4522                  */
4523                 if (ifp->if_capenable & IFCAP_NETMAP) {
4524                         struct netmap_adapter *na = NA(adapter->ifp);
4525                         struct netmap_kring *kring = &na->rx_rings[i];
4526                         int t = rxr->next_to_refresh - kring->nr_hwavail;
4527
4528                         if (t >= adapter->num_rx_desc)
4529                                 t -= adapter->num_rx_desc;
4530                         else if (t < 0)
4531                                 t += adapter->num_rx_desc;
4532                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4533                 } else
4534 #endif /* DEV_NETMAP */
4535                 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4536         }
4537         return;
4538 }
4539
4540 /*********************************************************************
4541  *
4542  *  Free receive rings.
4543  *
4544  **********************************************************************/
4545 static void
4546 igb_free_receive_structures(struct adapter *adapter)
4547 {
4548         struct rx_ring *rxr = adapter->rx_rings;
4549
4550         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4551                 struct lro_ctrl *lro = &rxr->lro;
4552                 igb_free_receive_buffers(rxr);
4553                 tcp_lro_free(lro);
4554                 igb_dma_free(adapter, &rxr->rxdma);
4555         }
4556
4557         free(adapter->rx_rings, M_DEVBUF);
4558 }
4559
4560 /*********************************************************************
4561  *
4562  *  Free receive ring data structures.
4563  *
4564  **********************************************************************/
4565 static void
4566 igb_free_receive_buffers(struct rx_ring *rxr)
4567 {
4568         struct adapter          *adapter = rxr->adapter;
4569         struct igb_rx_buf       *rxbuf;
4570         int i;
4571
4572         INIT_DEBUGOUT("free_receive_structures: begin");
4573
4574         /* Cleanup any existing buffers */
4575         if (rxr->rx_buffers != NULL) {
4576                 for (i = 0; i < adapter->num_rx_desc; i++) {
4577                         rxbuf = &rxr->rx_buffers[i];
4578                         if (rxbuf->m_head != NULL) {
4579                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4580                                     BUS_DMASYNC_POSTREAD);
4581                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4582                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4583                                 m_freem(rxbuf->m_head);
4584                         }
4585                         if (rxbuf->m_pack != NULL) {
4586                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4587                                     BUS_DMASYNC_POSTREAD);
4588                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4589                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4590                                 m_freem(rxbuf->m_pack);
4591                         }
4592                         rxbuf->m_head = NULL;
4593                         rxbuf->m_pack = NULL;
4594                         if (rxbuf->hmap != NULL) {
4595                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4596                                 rxbuf->hmap = NULL;
4597                         }
4598                         if (rxbuf->pmap != NULL) {
4599                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4600                                 rxbuf->pmap = NULL;
4601                         }
4602                 }
4603                 if (rxr->rx_buffers != NULL) {
4604                         free(rxr->rx_buffers, M_DEVBUF);
4605                         rxr->rx_buffers = NULL;
4606                 }
4607         }
4608
4609         if (rxr->htag != NULL) {
4610                 bus_dma_tag_destroy(rxr->htag);
4611                 rxr->htag = NULL;
4612         }
4613         if (rxr->ptag != NULL) {
4614                 bus_dma_tag_destroy(rxr->ptag);
4615                 rxr->ptag = NULL;
4616         }
4617 }
4618
4619 static __inline void
4620 igb_rx_discard(struct rx_ring *rxr, int i)
4621 {
4622         struct igb_rx_buf       *rbuf;
4623
4624         rbuf = &rxr->rx_buffers[i];
4625
4626         /* Partially received? Free the chain */
4627         if (rxr->fmp != NULL) {
4628                 rxr->fmp->m_flags |= M_PKTHDR;
4629                 m_freem(rxr->fmp);
4630                 rxr->fmp = NULL;
4631                 rxr->lmp = NULL;
4632         }
4633
4634         /*
4635         ** With advanced descriptors the writeback
4636         ** clobbers the buffer addrs, so its easier
4637         ** to just free the existing mbufs and take
4638         ** the normal refresh path to get new buffers
4639         ** and mapping.
4640         */
4641         if (rbuf->m_head) {
4642                 m_free(rbuf->m_head);
4643                 rbuf->m_head = NULL;
4644         }
4645
4646         if (rbuf->m_pack) {
4647                 m_free(rbuf->m_pack);
4648                 rbuf->m_pack = NULL;
4649         }
4650
4651         return;
4652 }
4653
4654 static __inline void
4655 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4656 {
4657
4658         /*
4659          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4660          * should be computed by hardware. Also it should not have VLAN tag in
4661          * ethernet header.
4662          */
4663         if (rxr->lro_enabled &&
4664             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4665             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4666             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4667             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4668             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4669             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4670                 /*
4671                  * Send to the stack if:
4672                  **  - LRO not enabled, or
4673                  **  - no LRO resources, or
4674                  **  - lro enqueue fails
4675                  */
4676                 if (rxr->lro.lro_cnt != 0)
4677                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4678                                 return;
4679         }
4680         IGB_RX_UNLOCK(rxr);
4681         (*ifp->if_input)(ifp, m);
4682         IGB_RX_LOCK(rxr);
4683 }
4684
4685 /*********************************************************************
4686  *
4687  *  This routine executes in interrupt context. It replenishes
4688  *  the mbufs in the descriptor and sends data which has been
4689  *  dma'ed into host memory to upper layer.
4690  *
4691  *  We loop at most count times if count is > 0, or until done if
4692  *  count < 0.
4693  *
4694  *  Return TRUE if more to clean, FALSE otherwise
4695  *********************************************************************/
4696 static bool
4697 igb_rxeof(struct igb_queue *que, int count, int *done)
4698 {
4699         struct adapter          *adapter = que->adapter;
4700         struct rx_ring          *rxr = que->rxr;
4701         struct ifnet            *ifp = adapter->ifp;
4702         struct lro_ctrl         *lro = &rxr->lro;
4703         struct lro_entry        *queued;
4704         int                     i, processed = 0, rxdone = 0;
4705         u32                     ptype, staterr = 0;
4706         union e1000_adv_rx_desc *cur;
4707
4708         IGB_RX_LOCK(rxr);
4709         /* Sync the ring. */
4710         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4711             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4712
4713 #ifdef DEV_NETMAP
4714         if (ifp->if_capenable & IFCAP_NETMAP) {
4715                 struct netmap_adapter *na = NA(ifp);
4716
4717                 na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4718                 selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4719                 IGB_RX_UNLOCK(rxr);
4720                 IGB_CORE_LOCK(adapter);
4721                 selwakeuppri(&na->rx_si, PI_NET);
4722                 IGB_CORE_UNLOCK(adapter);
4723                 return (0);
4724         }
4725 #endif /* DEV_NETMAP */
4726
4727         /* Main clean loop */
4728         for (i = rxr->next_to_check; count != 0;) {
4729                 struct mbuf             *sendmp, *mh, *mp;
4730                 struct igb_rx_buf       *rxbuf;
4731                 u16                     hlen, plen, hdr, vtag;
4732                 bool                    eop = FALSE;
4733  
4734                 cur = &rxr->rx_base[i];
4735                 staterr = le32toh(cur->wb.upper.status_error);
4736                 if ((staterr & E1000_RXD_STAT_DD) == 0)
4737                         break;
4738                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4739                         break;
4740                 count--;
4741                 sendmp = mh = mp = NULL;
4742                 cur->wb.upper.status_error = 0;
4743                 rxbuf = &rxr->rx_buffers[i];
4744                 plen = le16toh(cur->wb.upper.length);
4745                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4746                 if ((adapter->hw.mac.type == e1000_i350) &&
4747                     (staterr & E1000_RXDEXT_STATERR_LB))
4748                         vtag = be16toh(cur->wb.upper.vlan);
4749                 else
4750                         vtag = le16toh(cur->wb.upper.vlan);
4751                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4752                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4753
4754                 /* Make sure all segments of a bad packet are discarded */
4755                 if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4756                     (rxr->discard)) {
4757                         adapter->dropped_pkts++;
4758                         ++rxr->rx_discarded;
4759                         if (!eop) /* Catch subsequent segs */
4760                                 rxr->discard = TRUE;
4761                         else
4762                                 rxr->discard = FALSE;
4763                         igb_rx_discard(rxr, i);
4764                         goto next_desc;
4765                 }
4766
4767                 /*
4768                 ** The way the hardware is configured to
4769                 ** split, it will ONLY use the header buffer
4770                 ** when header split is enabled, otherwise we
4771                 ** get normal behavior, ie, both header and
4772                 ** payload are DMA'd into the payload buffer.
4773                 **
4774                 ** The fmp test is to catch the case where a
4775                 ** packet spans multiple descriptors, in that
4776                 ** case only the first header is valid.
4777                 */
4778                 if (rxr->hdr_split && rxr->fmp == NULL) {
4779                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4780                             E1000_RXDADV_HDRBUFLEN_SHIFT;
4781                         if (hlen > IGB_HDR_BUF)
4782                                 hlen = IGB_HDR_BUF;
4783                         mh = rxr->rx_buffers[i].m_head;
4784                         mh->m_len = hlen;
4785                         /* clear buf pointer for refresh */
4786                         rxbuf->m_head = NULL;
4787                         /*
4788                         ** Get the payload length, this
4789                         ** could be zero if its a small
4790                         ** packet.
4791                         */
4792                         if (plen > 0) {
4793                                 mp = rxr->rx_buffers[i].m_pack;
4794                                 mp->m_len = plen;
4795                                 mh->m_next = mp;
4796                                 /* clear buf pointer */
4797                                 rxbuf->m_pack = NULL;
4798                                 rxr->rx_split_packets++;
4799                         }
4800                 } else {
4801                         /*
4802                         ** Either no header split, or a
4803                         ** secondary piece of a fragmented
4804                         ** split packet.
4805                         */
4806                         mh = rxr->rx_buffers[i].m_pack;
4807                         mh->m_len = plen;
4808                         /* clear buf info for refresh */
4809                         rxbuf->m_pack = NULL;
4810                 }
4811
4812                 ++processed; /* So we know when to refresh */
4813
4814                 /* Initial frame - setup */
4815                 if (rxr->fmp == NULL) {
4816                         mh->m_pkthdr.len = mh->m_len;
4817                         /* Save the head of the chain */
4818                         rxr->fmp = mh;
4819                         rxr->lmp = mh;
4820                         if (mp != NULL) {
4821                                 /* Add payload if split */
4822                                 mh->m_pkthdr.len += mp->m_len;
4823                                 rxr->lmp = mh->m_next;
4824                         }
4825                 } else {
4826                         /* Chain mbuf's together */
4827                         rxr->lmp->m_next = mh;
4828                         rxr->lmp = rxr->lmp->m_next;
4829                         rxr->fmp->m_pkthdr.len += mh->m_len;
4830                 }
4831
4832                 if (eop) {
4833                         rxr->fmp->m_pkthdr.rcvif = ifp;
4834                         ifp->if_ipackets++;
4835                         rxr->rx_packets++;
4836                         /* capture data for AIM */
4837                         rxr->packets++;
4838                         rxr->bytes += rxr->fmp->m_pkthdr.len;
4839                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4840
4841                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4842                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
4843
4844                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4845                             (staterr & E1000_RXD_STAT_VP) != 0) {
4846                                 rxr->fmp->m_pkthdr.ether_vtag = vtag;
4847                                 rxr->fmp->m_flags |= M_VLANTAG;
4848                         }
4849 #if __FreeBSD_version >= 800000
4850                         rxr->fmp->m_pkthdr.flowid = que->msix;
4851                         rxr->fmp->m_flags |= M_FLOWID;
4852 #endif
4853                         sendmp = rxr->fmp;
4854                         /* Make sure to set M_PKTHDR. */
4855                         sendmp->m_flags |= M_PKTHDR;
4856                         rxr->fmp = NULL;
4857                         rxr->lmp = NULL;
4858                 }
4859
4860 next_desc:
4861                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4862                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4863
4864                 /* Advance our pointers to the next descriptor. */
4865                 if (++i == adapter->num_rx_desc)
4866                         i = 0;
4867                 /*
4868                 ** Send to the stack or LRO
4869                 */
4870                 if (sendmp != NULL) {
4871                         rxr->next_to_check = i;
4872                         igb_rx_input(rxr, ifp, sendmp, ptype);
4873                         i = rxr->next_to_check;
4874                         rxdone++;
4875                 }
4876
4877                 /* Every 8 descriptors we go to refresh mbufs */
4878                 if (processed == 8) {
4879                         igb_refresh_mbufs(rxr, i);
4880                         processed = 0;
4881                 }
4882         }
4883
4884         /* Catch any remainders */
4885         if (igb_rx_unrefreshed(rxr))
4886                 igb_refresh_mbufs(rxr, i);
4887
4888         rxr->next_to_check = i;
4889
4890         /*
4891          * Flush any outstanding LRO work
4892          */
4893         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4894                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4895                 tcp_lro_flush(lro, queued);
4896         }
4897
4898         if (done != NULL)
4899                 *done = rxdone;
4900
4901         IGB_RX_UNLOCK(rxr);
4902         return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4903 }
4904
4905 /*********************************************************************
4906  *
4907  *  Verify that the hardware indicated that the checksum is valid.
4908  *  Inform the stack about the status of checksum so that stack
4909  *  doesn't spend time verifying the checksum.
4910  *
4911  *********************************************************************/
4912 static void
4913 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4914 {
4915         u16 status = (u16)staterr;
4916         u8  errors = (u8) (staterr >> 24);
4917         int sctp;
4918
4919         /* Ignore Checksum bit is set */
4920         if (status & E1000_RXD_STAT_IXSM) {
4921                 mp->m_pkthdr.csum_flags = 0;
4922                 return;
4923         }
4924
4925         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4926             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4927                 sctp = 1;
4928         else
4929                 sctp = 0;
4930         if (status & E1000_RXD_STAT_IPCS) {
4931                 /* Did it pass? */
4932                 if (!(errors & E1000_RXD_ERR_IPE)) {
4933                         /* IP Checksum Good */
4934                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4935                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4936                 } else
4937                         mp->m_pkthdr.csum_flags = 0;
4938         }
4939
4940         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4941                 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4942 #if __FreeBSD_version >= 800000
4943                 if (sctp) /* reassign */
4944                         type = CSUM_SCTP_VALID;
4945 #endif
4946                 /* Did it pass? */
4947                 if (!(errors & E1000_RXD_ERR_TCPE)) {
4948                         mp->m_pkthdr.csum_flags |= type;
4949                         if (sctp == 0)
4950                                 mp->m_pkthdr.csum_data = htons(0xffff);
4951                 }
4952         }
4953         return;
4954 }
4955
4956 /*
4957  * This routine is run via an vlan
4958  * config EVENT
4959  */
4960 static void
4961 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4962 {
4963         struct adapter  *adapter = ifp->if_softc;
4964         u32             index, bit;
4965
4966         if (ifp->if_softc !=  arg)   /* Not our event */
4967                 return;
4968
4969         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4970                 return;
4971
4972         IGB_CORE_LOCK(adapter);
4973         index = (vtag >> 5) & 0x7F;
4974         bit = vtag & 0x1F;
4975         adapter->shadow_vfta[index] |= (1 << bit);
4976         ++adapter->num_vlans;
4977         /* Change hw filter setting */
4978         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4979                 igb_setup_vlan_hw_support(adapter);
4980         IGB_CORE_UNLOCK(adapter);
4981 }
4982
4983 /*
4984  * This routine is run via an vlan
4985  * unconfig EVENT
4986  */
4987 static void
4988 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4989 {
4990         struct adapter  *adapter = ifp->if_softc;
4991         u32             index, bit;
4992
4993         if (ifp->if_softc !=  arg)
4994                 return;
4995
4996         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4997                 return;
4998
4999         IGB_CORE_LOCK(adapter);
5000         index = (vtag >> 5) & 0x7F;
5001         bit = vtag & 0x1F;
5002         adapter->shadow_vfta[index] &= ~(1 << bit);
5003         --adapter->num_vlans;
5004         /* Change hw filter setting */
5005         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5006                 igb_setup_vlan_hw_support(adapter);
5007         IGB_CORE_UNLOCK(adapter);
5008 }
5009
5010 static void
5011 igb_setup_vlan_hw_support(struct adapter *adapter)
5012 {
5013         struct e1000_hw *hw = &adapter->hw;
5014         struct ifnet    *ifp = adapter->ifp;
5015         u32             reg;
5016
5017         if (adapter->vf_ifp) {
5018                 e1000_rlpml_set_vf(hw,
5019                     adapter->max_frame_size + VLAN_TAG_SIZE);
5020                 return;
5021         }
5022
5023         reg = E1000_READ_REG(hw, E1000_CTRL);
5024         reg |= E1000_CTRL_VME;
5025         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5026
5027         /* Enable the Filter Table */
5028         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5029                 reg = E1000_READ_REG(hw, E1000_RCTL);
5030                 reg &= ~E1000_RCTL_CFIEN;
5031                 reg |= E1000_RCTL_VFE;
5032                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5033         }
5034
5035         /* Update the frame size */
5036         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5037             adapter->max_frame_size + VLAN_TAG_SIZE);
5038
5039         /* Don't bother with table if no vlans */
5040         if ((adapter->num_vlans == 0) ||
5041             ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5042                 return;
5043         /*
5044         ** A soft reset zero's out the VFTA, so
5045         ** we need to repopulate it now.
5046         */
5047         for (int i = 0; i < IGB_VFTA_SIZE; i++)
5048                 if (adapter->shadow_vfta[i] != 0) {
5049                         if (adapter->vf_ifp)
5050                                 e1000_vfta_set_vf(hw,
5051                                     adapter->shadow_vfta[i], TRUE);
5052                         else
5053                                 e1000_write_vfta(hw,
5054                                     i, adapter->shadow_vfta[i]);
5055                 }
5056 }
5057
5058 static void
5059 igb_enable_intr(struct adapter *adapter)
5060 {
5061         /* With RSS set up what to auto clear */
5062         if (adapter->msix_mem) {
5063                 u32 mask = (adapter->que_mask | adapter->link_mask);
5064                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5065                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5066                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5067                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5068                     E1000_IMS_LSC);
5069         } else {
5070                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5071                     IMS_ENABLE_MASK);
5072         }
5073         E1000_WRITE_FLUSH(&adapter->hw);
5074
5075         return;
5076 }
5077
5078 static void
5079 igb_disable_intr(struct adapter *adapter)
5080 {
5081         if (adapter->msix_mem) {
5082                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5083                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5084         } 
5085         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5086         E1000_WRITE_FLUSH(&adapter->hw);
5087         return;
5088 }
5089
5090 /*
5091  * Bit of a misnomer, what this really means is
5092  * to enable OS management of the system... aka
5093  * to disable special hardware management features 
5094  */
5095 static void
5096 igb_init_manageability(struct adapter *adapter)
5097 {
5098         if (adapter->has_manage) {
5099                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5100                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5101
5102                 /* disable hardware interception of ARP */
5103                 manc &= ~(E1000_MANC_ARP_EN);
5104
5105                 /* enable receiving management packets to the host */
5106                 manc |= E1000_MANC_EN_MNG2HOST;
5107                 manc2h |= 1 << 5;  /* Mng Port 623 */
5108                 manc2h |= 1 << 6;  /* Mng Port 664 */
5109                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5110                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5111         }
5112 }
5113
5114 /*
5115  * Give control back to hardware management
5116  * controller if there is one.
5117  */
5118 static void
5119 igb_release_manageability(struct adapter *adapter)
5120 {
5121         if (adapter->has_manage) {
5122                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5123
5124                 /* re-enable hardware interception of ARP */
5125                 manc |= E1000_MANC_ARP_EN;
5126                 manc &= ~E1000_MANC_EN_MNG2HOST;
5127
5128                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5129         }
5130 }
5131
5132 /*
5133  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5134  * For ASF and Pass Through versions of f/w this means that
5135  * the driver is loaded. 
5136  *
5137  */
5138 static void
5139 igb_get_hw_control(struct adapter *adapter)
5140 {
5141         u32 ctrl_ext;
5142
5143         if (adapter->vf_ifp)
5144                 return;
5145
5146         /* Let firmware know the driver has taken over */
5147         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5148         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5149             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5150 }
5151
5152 /*
5153  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5154  * For ASF and Pass Through versions of f/w this means that the
5155  * driver is no longer loaded.
5156  *
5157  */
5158 static void
5159 igb_release_hw_control(struct adapter *adapter)
5160 {
5161         u32 ctrl_ext;
5162
5163         if (adapter->vf_ifp)
5164                 return;
5165
5166         /* Let firmware taken over control of h/w */
5167         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5168         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5169             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5170 }
5171
5172 static int
5173 igb_is_valid_ether_addr(uint8_t *addr)
5174 {
5175         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5176
5177         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5178                 return (FALSE);
5179         }
5180
5181         return (TRUE);
5182 }
5183
5184
5185 /*
5186  * Enable PCI Wake On Lan capability
5187  */
5188 static void
5189 igb_enable_wakeup(device_t dev)
5190 {
5191         u16     cap, status;
5192         u8      id;
5193
5194         /* First find the capabilities pointer*/
5195         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5196         /* Read the PM Capabilities */
5197         id = pci_read_config(dev, cap, 1);
5198         if (id != PCIY_PMG)     /* Something wrong */
5199                 return;
5200         /* OK, we have the power capabilities, so
5201            now get the status register */
5202         cap += PCIR_POWER_STATUS;
5203         status = pci_read_config(dev, cap, 2);
5204         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5205         pci_write_config(dev, cap, status, 2);
5206         return;
5207 }
5208
5209 static void
5210 igb_led_func(void *arg, int onoff)
5211 {
5212         struct adapter  *adapter = arg;
5213
5214         IGB_CORE_LOCK(adapter);
5215         if (onoff) {
5216                 e1000_setup_led(&adapter->hw);
5217                 e1000_led_on(&adapter->hw);
5218         } else {
5219                 e1000_led_off(&adapter->hw);
5220                 e1000_cleanup_led(&adapter->hw);
5221         }
5222         IGB_CORE_UNLOCK(adapter);
5223 }
5224
5225 /**********************************************************************
5226  *
5227  *  Update the board statistics counters.
5228  *
5229  **********************************************************************/
5230 static void
5231 igb_update_stats_counters(struct adapter *adapter)
5232 {
5233         struct ifnet            *ifp;
5234         struct e1000_hw         *hw = &adapter->hw;
5235         struct e1000_hw_stats   *stats;
5236
5237         /* 
5238         ** The virtual function adapter has only a
5239         ** small controlled set of stats, do only 
5240         ** those and return.
5241         */
5242         if (adapter->vf_ifp) {
5243                 igb_update_vf_stats_counters(adapter);
5244                 return;
5245         }
5246
5247         stats = (struct e1000_hw_stats  *)adapter->stats;
5248
5249         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5250            (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5251                 stats->symerrs +=
5252                     E1000_READ_REG(hw,E1000_SYMERRS);
5253                 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5254         }
5255
5256         stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5257         stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5258         stats->scc += E1000_READ_REG(hw, E1000_SCC);
5259         stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5260
5261         stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5262         stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5263         stats->colc += E1000_READ_REG(hw, E1000_COLC);
5264         stats->dc += E1000_READ_REG(hw, E1000_DC);
5265         stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5266         stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5267         stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5268         /*
5269         ** For watchdog management we need to know if we have been
5270         ** paused during the last interval, so capture that here.
5271         */ 
5272         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5273         stats->xoffrxc += adapter->pause_frames;
5274         stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5275         stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5276         stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5277         stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5278         stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5279         stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5280         stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5281         stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5282         stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5283         stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5284         stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5285         stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5286
5287         /* For the 64-bit byte counters the low dword must be read first. */
5288         /* Both registers clear on the read of the high dword */
5289
5290         stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5291             ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5292         stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5293             ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5294
5295         stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5296         stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5297         stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5298         stats->roc += E1000_READ_REG(hw, E1000_ROC);
5299         stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5300
5301         stats->tor += E1000_READ_REG(hw, E1000_TORH);
5302         stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5303
5304         stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5305         stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5306         stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5307         stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5308         stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5309         stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5310         stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5311         stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5312         stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5313         stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5314
5315         /* Interrupt Counts */
5316
5317         stats->iac += E1000_READ_REG(hw, E1000_IAC);
5318         stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5319         stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5320         stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5321         stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5322         stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5323         stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5324         stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5325         stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5326
5327         /* Host to Card Statistics */
5328
5329         stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5330         stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5331         stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5332         stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5333         stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5334         stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5335         stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5336         stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5337             ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5338         stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5339             ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5340         stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5341         stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5342         stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5343
5344         stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5345         stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5346         stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5347         stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5348         stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5349         stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5350
5351         ifp = adapter->ifp;
5352         ifp->if_collisions = stats->colc;
5353
5354         /* Rx Errors */
5355         ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5356             stats->crcerrs + stats->algnerrc +
5357             stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5358
5359         /* Tx Errors */
5360         ifp->if_oerrors = stats->ecol +
5361             stats->latecol + adapter->watchdog_events;
5362
5363         /* Driver specific counters */
5364         adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5365         adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5366         adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5367         adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5368         adapter->packet_buf_alloc_tx =
5369             ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5370         adapter->packet_buf_alloc_rx =
5371             (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5372 }
5373
5374
5375 /**********************************************************************
5376  *
5377  *  Initialize the VF board statistics counters.
5378  *
5379  **********************************************************************/
5380 static void
5381 igb_vf_init_stats(struct adapter *adapter)
5382 {
5383         struct e1000_hw *hw = &adapter->hw;
5384         struct e1000_vf_stats   *stats;
5385
5386         stats = (struct e1000_vf_stats  *)adapter->stats;
5387         if (stats == NULL)
5388                 return;
5389         stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5390         stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5391         stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5392         stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5393         stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5394 }
5395  
5396 /**********************************************************************
5397  *
5398  *  Update the VF board statistics counters.
5399  *
5400  **********************************************************************/
5401 static void
5402 igb_update_vf_stats_counters(struct adapter *adapter)
5403 {
5404         struct e1000_hw *hw = &adapter->hw;
5405         struct e1000_vf_stats   *stats;
5406
5407         if (adapter->link_speed == 0)
5408                 return;
5409
5410         stats = (struct e1000_vf_stats  *)adapter->stats;
5411
5412         UPDATE_VF_REG(E1000_VFGPRC,
5413             stats->last_gprc, stats->gprc);
5414         UPDATE_VF_REG(E1000_VFGORC,
5415             stats->last_gorc, stats->gorc);
5416         UPDATE_VF_REG(E1000_VFGPTC,
5417             stats->last_gptc, stats->gptc);
5418         UPDATE_VF_REG(E1000_VFGOTC,
5419             stats->last_gotc, stats->gotc);
5420         UPDATE_VF_REG(E1000_VFMPRC,
5421             stats->last_mprc, stats->mprc);
5422 }
5423
5424 /* Export a single 32-bit register via a read-only sysctl. */
5425 static int
5426 igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5427 {
5428         struct adapter *adapter;
5429         u_int val;
5430
5431         adapter = oidp->oid_arg1;
5432         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5433         return (sysctl_handle_int(oidp, &val, 0, req));
5434 }
5435
5436 /*
5437 **  Tuneable interrupt rate handler
5438 */
5439 static int
5440 igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5441 {
5442         struct igb_queue        *que = ((struct igb_queue *)oidp->oid_arg1);
5443         int                     error;
5444         u32                     reg, usec, rate;
5445                         
5446         reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5447         usec = ((reg & 0x7FFC) >> 2);
5448         if (usec > 0)
5449                 rate = 1000000 / usec;
5450         else
5451                 rate = 0;
5452         error = sysctl_handle_int(oidp, &rate, 0, req);
5453         if (error || !req->newptr)
5454                 return error;
5455         return 0;
5456 }
5457
5458 /*
5459  * Add sysctl variables, one per statistic, to the system.
5460  */
5461 static void
5462 igb_add_hw_stats(struct adapter *adapter)
5463 {
5464         device_t dev = adapter->dev;
5465
5466         struct tx_ring *txr = adapter->tx_rings;
5467         struct rx_ring *rxr = adapter->rx_rings;
5468
5469         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5470         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5471         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5472         struct e1000_hw_stats *stats = adapter->stats;
5473
5474         struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5475         struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5476
5477 #define QUEUE_NAME_LEN 32
5478         char namebuf[QUEUE_NAME_LEN];
5479
5480         /* Driver Statistics */
5481         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq", 
5482                         CTLFLAG_RD, &adapter->link_irq, 0,
5483                         "Link MSIX IRQ Handled");
5484         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5485                         CTLFLAG_RD, &adapter->dropped_pkts,
5486                         "Driver dropped packets");
5487         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5488                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5489                         "Driver tx dma failure in xmit");
5490         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5491                         CTLFLAG_RD, &adapter->rx_overruns,
5492                         "RX overruns");
5493         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5494                         CTLFLAG_RD, &adapter->watchdog_events,
5495                         "Watchdog timeouts");
5496
5497         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", 
5498                         CTLFLAG_RD, &adapter->device_control,
5499                         "Device Control Register");
5500         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", 
5501                         CTLFLAG_RD, &adapter->rx_control,
5502                         "Receiver Control Register");
5503         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", 
5504                         CTLFLAG_RD, &adapter->int_mask,
5505                         "Interrupt Mask");
5506         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", 
5507                         CTLFLAG_RD, &adapter->eint_mask,
5508                         "Extended Interrupt Mask");
5509         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", 
5510                         CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5511                         "Transmit Buffer Packet Allocation");
5512         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", 
5513                         CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5514                         "Receive Buffer Packet Allocation");
5515         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5516                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5517                         "Flow Control High Watermark");
5518         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5519                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5520                         "Flow Control Low Watermark");
5521
5522         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5523                 struct lro_ctrl *lro = &rxr->lro;
5524
5525                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5526                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5527                                             CTLFLAG_RD, NULL, "Queue Name");
5528                 queue_list = SYSCTL_CHILDREN(queue_node);
5529
5530                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", 
5531                                 CTLFLAG_RD, &adapter->queues[i],
5532                                 sizeof(&adapter->queues[i]),
5533                                 igb_sysctl_interrupt_rate_handler,
5534                                 "IU", "Interrupt Rate");
5535
5536                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5537                                 CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5538                                 igb_sysctl_reg_handler, "IU",
5539                                 "Transmit Descriptor Head");
5540                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5541                                 CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5542                                 igb_sysctl_reg_handler, "IU",
5543                                 "Transmit Descriptor Tail");
5544                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5545                                 CTLFLAG_RD, &txr->no_desc_avail,
5546                                 "Queue No Descriptor Available");
5547                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5548                                 CTLFLAG_RD, &txr->tx_packets,
5549                                 "Queue Packets Transmitted");
5550
5551                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5552                                 CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5553                                 igb_sysctl_reg_handler, "IU",
5554                                 "Receive Descriptor Head");
5555                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5556                                 CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5557                                 igb_sysctl_reg_handler, "IU",
5558                                 "Receive Descriptor Tail");
5559                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5560                                 CTLFLAG_RD, &rxr->rx_packets,
5561                                 "Queue Packets Received");
5562                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5563                                 CTLFLAG_RD, &rxr->rx_bytes,
5564                                 "Queue Bytes Received");
5565                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5566                                 CTLFLAG_RD, &lro->lro_queued, 0,
5567                                 "LRO Queued");
5568                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5569                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5570                                 "LRO Flushed");
5571         }
5572
5573         /* MAC stats get their own sub node */
5574
5575         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5576                                     CTLFLAG_RD, NULL, "MAC Statistics");
5577         stat_list = SYSCTL_CHILDREN(stat_node);
5578
5579         /*
5580         ** VF adapter has a very limited set of stats
5581         ** since its not managing the metal, so to speak.
5582         */
5583         if (adapter->vf_ifp) {
5584         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5585                         CTLFLAG_RD, &stats->gprc,
5586                         "Good Packets Received");
5587         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5588                         CTLFLAG_RD, &stats->gptc,
5589                         "Good Packets Transmitted");
5590         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5591                         CTLFLAG_RD, &stats->gorc, 
5592                         "Good Octets Received"); 
5593         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5594                         CTLFLAG_RD, &stats->gotc, 
5595                         "Good Octets Transmitted"); 
5596         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5597                         CTLFLAG_RD, &stats->mprc,
5598                         "Multicast Packets Received");
5599                 return;
5600         }
5601
5602         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
5603                         CTLFLAG_RD, &stats->ecol,
5604                         "Excessive collisions");
5605         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
5606                         CTLFLAG_RD, &stats->scc,
5607                         "Single collisions");
5608         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
5609                         CTLFLAG_RD, &stats->mcc,
5610                         "Multiple collisions");
5611         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
5612                         CTLFLAG_RD, &stats->latecol,
5613                         "Late collisions");
5614         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
5615                         CTLFLAG_RD, &stats->colc,
5616                         "Collision Count");
5617         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5618                         CTLFLAG_RD, &stats->symerrs,
5619                         "Symbol Errors");
5620         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5621                         CTLFLAG_RD, &stats->sec,
5622                         "Sequence Errors");
5623         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5624                         CTLFLAG_RD, &stats->dc,
5625                         "Defer Count");
5626         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5627                         CTLFLAG_RD, &stats->mpc,
5628                         "Missed Packets");
5629         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5630                         CTLFLAG_RD, &stats->rnbc,
5631                         "Receive No Buffers");
5632         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5633                         CTLFLAG_RD, &stats->ruc,
5634                         "Receive Undersize");
5635         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5636                         CTLFLAG_RD, &stats->rfc,
5637                         "Fragmented Packets Received ");
5638         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5639                         CTLFLAG_RD, &stats->roc,
5640                         "Oversized Packets Received");
5641         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5642                         CTLFLAG_RD, &stats->rjc,
5643                         "Recevied Jabber");
5644         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5645                         CTLFLAG_RD, &stats->rxerrc,
5646                         "Receive Errors");
5647         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5648                         CTLFLAG_RD, &stats->crcerrs,
5649                         "CRC errors");
5650         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5651                         CTLFLAG_RD, &stats->algnerrc,
5652                         "Alignment Errors");
5653         /* On 82575 these are collision counts */
5654         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5655                         CTLFLAG_RD, &stats->cexterr,
5656                         "Collision/Carrier extension errors");
5657         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5658                         CTLFLAG_RD, &stats->xonrxc,
5659                         "XON Received");
5660         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5661                         CTLFLAG_RD, &stats->xontxc,
5662                         "XON Transmitted");
5663         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5664                         CTLFLAG_RD, &stats->xoffrxc,
5665                         "XOFF Received");
5666         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5667                         CTLFLAG_RD, &stats->xofftxc,
5668                         "XOFF Transmitted");
5669         /* Packet Reception Stats */
5670         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5671                         CTLFLAG_RD, &stats->tpr,
5672                         "Total Packets Received ");
5673         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5674                         CTLFLAG_RD, &stats->gprc,
5675                         "Good Packets Received");
5676         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5677                         CTLFLAG_RD, &stats->bprc,
5678                         "Broadcast Packets Received");
5679         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5680                         CTLFLAG_RD, &stats->mprc,
5681                         "Multicast Packets Received");
5682         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5683                         CTLFLAG_RD, &stats->prc64,
5684                         "64 byte frames received ");
5685         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5686                         CTLFLAG_RD, &stats->prc127,
5687                         "65-127 byte frames received");
5688         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5689                         CTLFLAG_RD, &stats->prc255,
5690                         "128-255 byte frames received");
5691         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5692                         CTLFLAG_RD, &stats->prc511,
5693                         "256-511 byte frames received");
5694         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5695                         CTLFLAG_RD, &stats->prc1023,
5696                         "512-1023 byte frames received");
5697         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5698                         CTLFLAG_RD, &stats->prc1522,
5699                         "1023-1522 byte frames received");
5700         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5701                         CTLFLAG_RD, &stats->gorc, 
5702                         "Good Octets Received"); 
5703
5704         /* Packet Transmission Stats */
5705         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5706                         CTLFLAG_RD, &stats->gotc, 
5707                         "Good Octets Transmitted"); 
5708         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5709                         CTLFLAG_RD, &stats->tpt,
5710                         "Total Packets Transmitted");
5711         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5712                         CTLFLAG_RD, &stats->gptc,
5713                         "Good Packets Transmitted");
5714         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5715                         CTLFLAG_RD, &stats->bptc,
5716                         "Broadcast Packets Transmitted");
5717         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5718                         CTLFLAG_RD, &stats->mptc,
5719                         "Multicast Packets Transmitted");
5720         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5721                         CTLFLAG_RD, &stats->ptc64,
5722                         "64 byte frames transmitted ");
5723         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5724                         CTLFLAG_RD, &stats->ptc127,
5725                         "65-127 byte frames transmitted");
5726         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5727                         CTLFLAG_RD, &stats->ptc255,
5728                         "128-255 byte frames transmitted");
5729         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5730                         CTLFLAG_RD, &stats->ptc511,
5731                         "256-511 byte frames transmitted");
5732         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5733                         CTLFLAG_RD, &stats->ptc1023,
5734                         "512-1023 byte frames transmitted");
5735         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5736                         CTLFLAG_RD, &stats->ptc1522,
5737                         "1024-1522 byte frames transmitted");
5738         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5739                         CTLFLAG_RD, &stats->tsctc,
5740                         "TSO Contexts Transmitted");
5741         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5742                         CTLFLAG_RD, &stats->tsctfc,
5743                         "TSO Contexts Failed");
5744
5745
5746         /* Interrupt Stats */
5747
5748         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5749                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5750         int_list = SYSCTL_CHILDREN(int_node);
5751
5752         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5753                         CTLFLAG_RD, &stats->iac,
5754                         "Interrupt Assertion Count");
5755
5756         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5757                         CTLFLAG_RD, &stats->icrxptc,
5758                         "Interrupt Cause Rx Pkt Timer Expire Count");
5759
5760         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5761                         CTLFLAG_RD, &stats->icrxatc,
5762                         "Interrupt Cause Rx Abs Timer Expire Count");
5763
5764         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5765                         CTLFLAG_RD, &stats->ictxptc,
5766                         "Interrupt Cause Tx Pkt Timer Expire Count");
5767
5768         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5769                         CTLFLAG_RD, &stats->ictxatc,
5770                         "Interrupt Cause Tx Abs Timer Expire Count");
5771
5772         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5773                         CTLFLAG_RD, &stats->ictxqec,
5774                         "Interrupt Cause Tx Queue Empty Count");
5775
5776         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5777                         CTLFLAG_RD, &stats->ictxqmtc,
5778                         "Interrupt Cause Tx Queue Min Thresh Count");
5779
5780         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5781                         CTLFLAG_RD, &stats->icrxdmtc,
5782                         "Interrupt Cause Rx Desc Min Thresh Count");
5783
5784         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5785                         CTLFLAG_RD, &stats->icrxoc,
5786                         "Interrupt Cause Receiver Overrun Count");
5787
5788         /* Host to Card Stats */
5789
5790         host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", 
5791                                     CTLFLAG_RD, NULL, 
5792                                     "Host to Card Statistics");
5793
5794         host_list = SYSCTL_CHILDREN(host_node);
5795
5796         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5797                         CTLFLAG_RD, &stats->cbtmpc,
5798                         "Circuit Breaker Tx Packet Count");
5799
5800         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5801                         CTLFLAG_RD, &stats->htdpmc,
5802                         "Host Transmit Discarded Packets");
5803
5804         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5805                         CTLFLAG_RD, &stats->rpthc,
5806                         "Rx Packets To Host");
5807
5808         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5809                         CTLFLAG_RD, &stats->cbrmpc,
5810                         "Circuit Breaker Rx Packet Count");
5811
5812         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5813                         CTLFLAG_RD, &stats->cbrdpc,
5814                         "Circuit Breaker Rx Dropped Count");
5815
5816         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5817                         CTLFLAG_RD, &stats->hgptc,
5818                         "Host Good Packets Tx Count");
5819
5820         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5821                         CTLFLAG_RD, &stats->htcbdpc,
5822                         "Host Tx Circuit Breaker Dropped Count");
5823
5824         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5825                         CTLFLAG_RD, &stats->hgorc,
5826                         "Host Good Octets Received Count");
5827
5828         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5829                         CTLFLAG_RD, &stats->hgotc,
5830                         "Host Good Octets Transmit Count");
5831
5832         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5833                         CTLFLAG_RD, &stats->lenerrs,
5834                         "Length Errors");
5835
5836         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5837                         CTLFLAG_RD, &stats->scvpc,
5838                         "SerDes/SGMII Code Violation Pkt Count");
5839
5840         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5841                         CTLFLAG_RD, &stats->hrmpc,
5842                         "Header Redirection Missed Packet Count");
5843 }
5844
5845
5846 /**********************************************************************
5847  *
5848  *  This routine provides a way to dump out the adapter eeprom,
5849  *  often a useful debug/service tool. This only dumps the first
5850  *  32 words, stuff that matters is in that extent.
5851  *
5852  **********************************************************************/
5853 static int
5854 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5855 {
5856         struct adapter *adapter;
5857         int error;
5858         int result;
5859
5860         result = -1;
5861         error = sysctl_handle_int(oidp, &result, 0, req);
5862
5863         if (error || !req->newptr)
5864                 return (error);
5865
5866         /*
5867          * This value will cause a hex dump of the
5868          * first 32 16-bit words of the EEPROM to
5869          * the screen.
5870          */
5871         if (result == 1) {
5872                 adapter = (struct adapter *)arg1;
5873                 igb_print_nvm_info(adapter);
5874         }
5875
5876         return (error);
5877 }
5878
5879 static void
5880 igb_print_nvm_info(struct adapter *adapter)
5881 {
5882         u16     eeprom_data;
5883         int     i, j, row = 0;
5884
5885         /* Its a bit crude, but it gets the job done */
5886         printf("\nInterface EEPROM Dump:\n");
5887         printf("Offset\n0x0000  ");
5888         for (i = 0, j = 0; i < 32; i++, j++) {
5889                 if (j == 8) { /* Make the offset block */
5890                         j = 0; ++row;
5891                         printf("\n0x00%x0  ",row);
5892                 }
5893                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5894                 printf("%04x ", eeprom_data);
5895         }
5896         printf("\n");
5897 }
5898
5899 static void
5900 igb_set_sysctl_value(struct adapter *adapter, const char *name,
5901         const char *description, int *limit, int value)
5902 {
5903         *limit = value;
5904         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5905             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5906             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5907 }
5908
5909 /*
5910 ** Set flow control using sysctl:
5911 ** Flow control values:
5912 **      0 - off
5913 **      1 - rx pause
5914 **      2 - tx pause
5915 **      3 - full
5916 */
5917 static int
5918 igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5919 {
5920         int             error;
5921         static int      input = 3; /* default is full */
5922         struct adapter  *adapter = (struct adapter *) arg1;
5923
5924         error = sysctl_handle_int(oidp, &input, 0, req);
5925
5926         if ((error) || (req->newptr == NULL))
5927                 return (error);
5928
5929         switch (input) {
5930                 case e1000_fc_rx_pause:
5931                 case e1000_fc_tx_pause:
5932                 case e1000_fc_full:
5933                 case e1000_fc_none:
5934                         adapter->hw.fc.requested_mode = input;
5935                         adapter->fc = input;
5936                         break;
5937                 default:
5938                         /* Do nothing */
5939                         return (error);
5940         }
5941
5942         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5943         e1000_force_mac_fc(&adapter->hw);
5944         return (error);
5945 }
5946
5947 /*
5948 ** Manage DMA Coalesce:
5949 ** Control values:
5950 **      0/1 - off/on
5951 **      Legal timer values are:
5952 **      250,500,1000-10000 in thousands
5953 */
5954 static int
5955 igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
5956 {
5957         struct adapter *adapter = (struct adapter *) arg1;
5958         int             error;
5959
5960         error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
5961
5962         if ((error) || (req->newptr == NULL))
5963                 return (error);
5964
5965         switch (adapter->dmac) {
5966                 case 0:
5967                         /*Disabling */
5968                         break;
5969                 case 1: /* Just enable and use default */
5970                         adapter->dmac = 1000;
5971                         break;
5972                 case 250:
5973                 case 500:
5974                 case 1000:
5975                 case 2000:
5976                 case 3000:
5977                 case 4000:
5978                 case 5000:
5979                 case 6000:
5980                 case 7000:
5981                 case 8000:
5982                 case 9000:
5983                 case 10000:
5984                         /* Legal values - allow */
5985                         break;
5986                 default:
5987                         /* Do nothing, illegal value */
5988                         adapter->dmac = 0;
5989                         return (error);
5990         }
5991         /* Reinit the interface */
5992         igb_init(adapter);
5993         return (error);
5994 }
5995
5996 /*
5997 ** Manage Energy Efficient Ethernet:
5998 ** Control values:
5999 **     0/1 - enabled/disabled
6000 */
6001 static int
6002 igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6003 {
6004         struct adapter  *adapter = (struct adapter *) arg1;
6005         int             error, value;
6006
6007         value = adapter->hw.dev_spec._82575.eee_disable;
6008         error = sysctl_handle_int(oidp, &value, 0, req);
6009         if (error || req->newptr == NULL)
6010                 return (error);
6011         IGB_CORE_LOCK(adapter);
6012         adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6013         igb_init_locked(adapter);
6014         IGB_CORE_UNLOCK(adapter);
6015         return (0);
6016 }