]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/ixgbe/ixgbe.c
* Add SIOCGI2C driver ioctl used to retrieve i2c info.
[FreeBSD/FreeBSD.git] / sys / dev / ixgbe / ixgbe.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2013, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38 #include "opt_rss.h"
39 #include "ixgbe.h"
40
41 #ifdef  RSS
42 #include <netinet/in_rss.h>
43 #endif
44
45 /*********************************************************************
46  *  Set this to one to display debug statistics
47  *********************************************************************/
48 int             ixgbe_display_debug_stats = 0;
49
50 /*********************************************************************
51  *  Driver version
52  *********************************************************************/
53 char ixgbe_driver_version[] = "2.5.15";
54
55 /*********************************************************************
56  *  PCI Device ID Table
57  *
58  *  Used by probe to select devices to load on
59  *  Last field stores an index into ixgbe_strings
60  *  Last entry must be all 0s
61  *
62  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
63  *********************************************************************/
64
65 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
66 {
67         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
68         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
69         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
70         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
71         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
72         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
73         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
74         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
75         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
76         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
77         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
78         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
79         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
80         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
81         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
82         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
83         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
84         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
85         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
86         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
87         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
88         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
89         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0},
90         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
91         /* required last entry */
92         {0, 0, 0, 0, 0}
93 };
94
95 /*********************************************************************
96  *  Table of branding strings
97  *********************************************************************/
98
99 static char    *ixgbe_strings[] = {
100         "Intel(R) PRO/10GbE PCI-Express Network Driver"
101 };
102
103 /*********************************************************************
104  *  Function prototypes
105  *********************************************************************/
106 static int      ixgbe_probe(device_t);
107 static int      ixgbe_attach(device_t);
108 static int      ixgbe_detach(device_t);
109 static int      ixgbe_shutdown(device_t);
110 #ifdef IXGBE_LEGACY_TX
111 static void     ixgbe_start(struct ifnet *);
112 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
113 #else /* ! IXGBE_LEGACY_TX */
114 static int      ixgbe_mq_start(struct ifnet *, struct mbuf *);
115 static int      ixgbe_mq_start_locked(struct ifnet *, struct tx_ring *);
116 static void     ixgbe_qflush(struct ifnet *);
117 static void     ixgbe_deferred_mq_start(void *, int);
118 #endif /* IXGBE_LEGACY_TX */
119 static int      ixgbe_ioctl(struct ifnet *, u_long, caddr_t);
120 static void     ixgbe_init(void *);
121 static void     ixgbe_init_locked(struct adapter *);
122 static void     ixgbe_stop(void *);
123 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
124 static int      ixgbe_media_change(struct ifnet *);
125 static void     ixgbe_identify_hardware(struct adapter *);
126 static int      ixgbe_allocate_pci_resources(struct adapter *);
127 static void     ixgbe_get_slot_info(struct ixgbe_hw *);
128 static int      ixgbe_allocate_msix(struct adapter *);
129 static int      ixgbe_allocate_legacy(struct adapter *);
130 static int      ixgbe_allocate_queues(struct adapter *);
131 static int      ixgbe_setup_msix(struct adapter *);
132 static void     ixgbe_free_pci_resources(struct adapter *);
133 static void     ixgbe_local_timer(void *);
134 static int      ixgbe_setup_interface(device_t, struct adapter *);
135 static void     ixgbe_config_link(struct adapter *);
136
137 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
138 static int      ixgbe_setup_transmit_structures(struct adapter *);
139 static void     ixgbe_setup_transmit_ring(struct tx_ring *);
140 static void     ixgbe_initialize_transmit_units(struct adapter *);
141 static void     ixgbe_free_transmit_structures(struct adapter *);
142 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
143
144 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
145 static int      ixgbe_setup_receive_structures(struct adapter *);
146 static int      ixgbe_setup_receive_ring(struct rx_ring *);
147 static void     ixgbe_initialize_receive_units(struct adapter *);
148 static void     ixgbe_free_receive_structures(struct adapter *);
149 static void     ixgbe_free_receive_buffers(struct rx_ring *);
150 static void     ixgbe_setup_hw_rsc(struct rx_ring *);
151
152 static void     ixgbe_enable_intr(struct adapter *);
153 static void     ixgbe_disable_intr(struct adapter *);
154 static void     ixgbe_update_stats_counters(struct adapter *);
155 static void     ixgbe_txeof(struct tx_ring *);
156 static bool     ixgbe_rxeof(struct ix_queue *);
157 static void     ixgbe_rx_checksum(u32, struct mbuf *, u32);
158 static void     ixgbe_set_promisc(struct adapter *);
159 static void     ixgbe_set_multi(struct adapter *);
160 static void     ixgbe_update_link_status(struct adapter *);
161 static void     ixgbe_refresh_mbufs(struct rx_ring *, int);
162 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
163 static int      ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS);
164 static int      ixgbe_set_advertise(SYSCTL_HANDLER_ARGS);
165 static int      ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS);
166 static int      ixgbe_dma_malloc(struct adapter *, bus_size_t,
167                     struct ixgbe_dma_alloc *, int);
168 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
169 static int      ixgbe_tx_ctx_setup(struct tx_ring *,
170                     struct mbuf *, u32 *, u32 *);
171 static int      ixgbe_tso_setup(struct tx_ring *,
172                     struct mbuf *, u32 *, u32 *);
173 static void     ixgbe_set_ivar(struct adapter *, u8, u8, s8);
174 static void     ixgbe_configure_ivars(struct adapter *);
175 static u8 *     ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
176
177 static void     ixgbe_setup_vlan_hw_support(struct adapter *);
178 static void     ixgbe_register_vlan(void *, struct ifnet *, u16);
179 static void     ixgbe_unregister_vlan(void *, struct ifnet *, u16);
180
181 static void     ixgbe_add_hw_stats(struct adapter *adapter);
182
183 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
184 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
185                     struct mbuf *, u32);
186
187 static void     ixgbe_enable_rx_drop(struct adapter *);
188 static void     ixgbe_disable_rx_drop(struct adapter *);
189
190 /* Support for pluggable optic modules */
191 static bool     ixgbe_sfp_probe(struct adapter *);
192 static void     ixgbe_setup_optics(struct adapter *);
193
194 /* Legacy (single vector interrupt handler */
195 static void     ixgbe_legacy_irq(void *);
196
197 /* The MSI/X Interrupt handlers */
198 static void     ixgbe_msix_que(void *);
199 static void     ixgbe_msix_link(void *);
200
201 /* Deferred interrupt tasklets */
202 static void     ixgbe_handle_que(void *, int);
203 static void     ixgbe_handle_link(void *, int);
204 static void     ixgbe_handle_msf(void *, int);
205 static void     ixgbe_handle_mod(void *, int);
206
207 #ifdef IXGBE_FDIR
208 static void     ixgbe_atr(struct tx_ring *, struct mbuf *);
209 static void     ixgbe_reinit_fdir(void *, int);
210 #endif
211
212 /* Missing shared code prototype */
213 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw);
214
215 /*********************************************************************
216  *  FreeBSD Device Interface Entry Points
217  *********************************************************************/
218
219 static device_method_t ixgbe_methods[] = {
220         /* Device interface */
221         DEVMETHOD(device_probe, ixgbe_probe),
222         DEVMETHOD(device_attach, ixgbe_attach),
223         DEVMETHOD(device_detach, ixgbe_detach),
224         DEVMETHOD(device_shutdown, ixgbe_shutdown),
225         DEVMETHOD_END
226 };
227
228 static driver_t ixgbe_driver = {
229         "ix", ixgbe_methods, sizeof(struct adapter),
230 };
231
232 devclass_t ixgbe_devclass;
233 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
234
235 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
236 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
237
238 /*
239 ** TUNEABLE PARAMETERS:
240 */
241
242 static SYSCTL_NODE(_hw, OID_AUTO, ix, CTLFLAG_RD, 0,
243                    "IXGBE driver parameters");
244
245 /*
246 ** AIM: Adaptive Interrupt Moderation
247 ** which means that the interrupt rate
248 ** is varied over time based on the
249 ** traffic for that interrupt vector
250 */
251 static int ixgbe_enable_aim = TRUE;
252 SYSCTL_INT(_hw_ix, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &ixgbe_enable_aim, 0,
253     "Enable adaptive interrupt moderation");
254
255 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
256 SYSCTL_INT(_hw_ix, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
257     &ixgbe_max_interrupt_rate, 0, "Maximum interrupts per second");
258
259 /* How many packets rxeof tries to clean at a time */
260 static int ixgbe_rx_process_limit = 256;
261 SYSCTL_INT(_hw_ix, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
262     &ixgbe_rx_process_limit, 0,
263     "Maximum number of received packets to process at a time,"
264     "-1 means unlimited");
265
266 /* How many packets txeof tries to clean at a time */
267 static int ixgbe_tx_process_limit = 256;
268 SYSCTL_INT(_hw_ix, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
269     &ixgbe_tx_process_limit, 0,
270     "Maximum number of sent packets to process at a time,"
271     "-1 means unlimited");
272
273 /*
274 ** Smart speed setting, default to on
275 ** this only works as a compile option
276 ** right now as its during attach, set
277 ** this to 'ixgbe_smart_speed_off' to
278 ** disable.
279 */
280 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
281
282 /*
283  * MSIX should be the default for best performance,
284  * but this allows it to be forced off for testing.
285  */
286 static int ixgbe_enable_msix = 1;
287 SYSCTL_INT(_hw_ix, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixgbe_enable_msix, 0,
288     "Enable MSI-X interrupts");
289
290 /*
291  * Number of Queues, can be set to 0,
292  * it then autoconfigures based on the
293  * number of cpus with a max of 8. This
294  * can be overriden manually here.
295  */
296 static int ixgbe_num_queues = 0;
297 SYSCTL_INT(_hw_ix, OID_AUTO, num_queues, CTLFLAG_RDTUN, &ixgbe_num_queues, 0,
298     "Number of queues to configure, 0 indicates autoconfigure");
299
300 /*
301 ** Number of TX descriptors per ring,
302 ** setting higher than RX as this seems
303 ** the better performing choice.
304 */
305 static int ixgbe_txd = PERFORM_TXD;
306 SYSCTL_INT(_hw_ix, OID_AUTO, txd, CTLFLAG_RDTUN, &ixgbe_txd, 0,
307     "Number of transmit descriptors per queue");
308
309 /* Number of RX descriptors per ring */
310 static int ixgbe_rxd = PERFORM_RXD;
311 SYSCTL_INT(_hw_ix, OID_AUTO, rxd, CTLFLAG_RDTUN, &ixgbe_rxd, 0,
312     "Number of receive descriptors per queue");
313
314 /*
315 ** Defining this on will allow the use
316 ** of unsupported SFP+ modules, note that
317 ** doing so you are on your own :)
318 */
319 static int allow_unsupported_sfp = FALSE;
320 TUNABLE_INT("hw.ix.unsupported_sfp", &allow_unsupported_sfp);
321
322 /*
323 ** HW RSC control: 
324 **  this feature only works with
325 **  IPv4, and only on 82599 and later.
326 **  Also this will cause IP forwarding to
327 **  fail and that can't be controlled by
328 **  the stack as LRO can. For all these
329 **  reasons I've deemed it best to leave
330 **  this off and not bother with a tuneable
331 **  interface, this would need to be compiled
332 **  to enable.
333 */
334 static bool ixgbe_rsc_enable = FALSE;
335
336 /* Keep running tab on them for sanity check */
337 static int ixgbe_total_ports;
338
339 #ifdef IXGBE_FDIR
340 /*
341 ** For Flow Director: this is the
342 ** number of TX packets we sample
343 ** for the filter pool, this means
344 ** every 20th packet will be probed.
345 **
346 ** This feature can be disabled by 
347 ** setting this to 0.
348 */
349 static int atr_sample_rate = 20;
350 /* 
351 ** Flow Director actually 'steals'
352 ** part of the packet buffer as its
353 ** filter pool, this variable controls
354 ** how much it uses:
355 **  0 = 64K, 1 = 128K, 2 = 256K
356 */
357 static int fdir_pballoc = 1;
358 #endif
359
360 #ifdef DEV_NETMAP
361 /*
362  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
363  * be a reference on how to implement netmap support in a driver.
364  * Additional comments are in ixgbe_netmap.h .
365  *
366  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
367  * that extend the standard driver.
368  */
369 #include <dev/netmap/ixgbe_netmap.h>
370 #endif /* DEV_NETMAP */
371
372 /*********************************************************************
373  *  Device identification routine
374  *
375  *  ixgbe_probe determines if the driver should be loaded on
376  *  adapter based on PCI vendor/device id of the adapter.
377  *
378  *  return BUS_PROBE_DEFAULT on success, positive on failure
379  *********************************************************************/
380
381 static int
382 ixgbe_probe(device_t dev)
383 {
384         ixgbe_vendor_info_t *ent;
385
386         u16     pci_vendor_id = 0;
387         u16     pci_device_id = 0;
388         u16     pci_subvendor_id = 0;
389         u16     pci_subdevice_id = 0;
390         char    adapter_name[256];
391
392         INIT_DEBUGOUT("ixgbe_probe: begin");
393
394         pci_vendor_id = pci_get_vendor(dev);
395         if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID)
396                 return (ENXIO);
397
398         pci_device_id = pci_get_device(dev);
399         pci_subvendor_id = pci_get_subvendor(dev);
400         pci_subdevice_id = pci_get_subdevice(dev);
401
402         ent = ixgbe_vendor_info_array;
403         while (ent->vendor_id != 0) {
404                 if ((pci_vendor_id == ent->vendor_id) &&
405                     (pci_device_id == ent->device_id) &&
406
407                     ((pci_subvendor_id == ent->subvendor_id) ||
408                      (ent->subvendor_id == 0)) &&
409
410                     ((pci_subdevice_id == ent->subdevice_id) ||
411                      (ent->subdevice_id == 0))) {
412                         sprintf(adapter_name, "%s, Version - %s",
413                                 ixgbe_strings[ent->index],
414                                 ixgbe_driver_version);
415                         device_set_desc_copy(dev, adapter_name);
416                         ++ixgbe_total_ports;
417                         return (BUS_PROBE_DEFAULT);
418                 }
419                 ent++;
420         }
421         return (ENXIO);
422 }
423
424 /*********************************************************************
425  *  Device initialization routine
426  *
427  *  The attach entry point is called when the driver is being loaded.
428  *  This routine identifies the type of hardware, allocates all resources
429  *  and initializes the hardware.
430  *
431  *  return 0 on success, positive on failure
432  *********************************************************************/
433
434 static int
435 ixgbe_attach(device_t dev)
436 {
437         struct adapter *adapter;
438         struct ixgbe_hw *hw;
439         int             error = 0;
440         u16             csum;
441         u32             ctrl_ext;
442
443         INIT_DEBUGOUT("ixgbe_attach: begin");
444
445         /* Allocate, clear, and link in our adapter structure */
446         adapter = device_get_softc(dev);
447         adapter->dev = adapter->osdep.dev = dev;
448         hw = &adapter->hw;
449
450         /* Core Lock Init*/
451         IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
452
453         /* SYSCTL APIs */
454
455         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
456                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
457                         OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW,
458                         adapter, 0, ixgbe_set_flowcntl, "I", "Flow Control");
459
460         SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
461                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
462                         OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
463                         &ixgbe_enable_aim, 1, "Interrupt Moderation");
464
465         /*
466         ** Allow a kind of speed control by forcing the autoneg
467         ** advertised speed list to only a certain value, this
468         ** supports 1G on 82599 devices, and 100Mb on x540.
469         */
470         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
471                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
472                         OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW,
473                         adapter, 0, ixgbe_set_advertise, "I", "Link Speed");
474
475         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
476                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
477                         OID_AUTO, "ts", CTLTYPE_INT | CTLFLAG_RW, adapter,
478                         0, ixgbe_set_thermal_test, "I", "Thermal Test");
479
480         /* Set up the timer callout */
481         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
482
483         /* Determine hardware revision */
484         ixgbe_identify_hardware(adapter);
485
486         /* Do base PCI setup - map BAR0 */
487         if (ixgbe_allocate_pci_resources(adapter)) {
488                 device_printf(dev, "Allocation of PCI resources failed\n");
489                 error = ENXIO;
490                 goto err_out;
491         }
492
493         /* Do descriptor calc and sanity checks */
494         if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
495             ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
496                 device_printf(dev, "TXD config issue, using default!\n");
497                 adapter->num_tx_desc = DEFAULT_TXD;
498         } else
499                 adapter->num_tx_desc = ixgbe_txd;
500
501         /*
502         ** With many RX rings it is easy to exceed the
503         ** system mbuf allocation. Tuning nmbclusters
504         ** can alleviate this.
505         */
506         if (nmbclusters > 0 ) {
507                 int s;
508                 s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
509                 if (s > nmbclusters) {
510                         device_printf(dev, "RX Descriptors exceed "
511                             "system mbuf max, using default instead!\n");
512                         ixgbe_rxd = DEFAULT_RXD;
513                 }
514         }
515
516         if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
517             ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) {
518                 device_printf(dev, "RXD config issue, using default!\n");
519                 adapter->num_rx_desc = DEFAULT_RXD;
520         } else
521                 adapter->num_rx_desc = ixgbe_rxd;
522
523         /* Allocate our TX/RX Queues */
524         if (ixgbe_allocate_queues(adapter)) {
525                 error = ENOMEM;
526                 goto err_out;
527         }
528
529         /* Allocate multicast array memory. */
530         adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
531             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
532         if (adapter->mta == NULL) {
533                 device_printf(dev, "Can not allocate multicast setup array\n");
534                 error = ENOMEM;
535                 goto err_late;
536         }
537
538         /* Initialize the shared code */
539         hw->allow_unsupported_sfp = allow_unsupported_sfp;
540         error = ixgbe_init_shared_code(hw);
541         if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
542                 /*
543                 ** No optics in this port, set up
544                 ** so the timer routine will probe 
545                 ** for later insertion.
546                 */
547                 adapter->sfp_probe = TRUE;
548                 error = 0;
549         } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
550                 device_printf(dev,"Unsupported SFP+ module detected!\n");
551                 error = EIO;
552                 goto err_late;
553         } else if (error) {
554                 device_printf(dev,"Unable to initialize the shared code\n");
555                 error = EIO;
556                 goto err_late;
557         }
558
559         /* Make sure we have a good EEPROM before we read from it */
560         if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
561                 device_printf(dev,"The EEPROM Checksum Is Not Valid\n");
562                 error = EIO;
563                 goto err_late;
564         }
565
566         error = ixgbe_init_hw(hw);
567         switch (error) {
568         case IXGBE_ERR_EEPROM_VERSION:
569                 device_printf(dev, "This device is a pre-production adapter/"
570                     "LOM.  Please be aware there may be issues associated "
571                     "with your hardware.\n If you are experiencing problems "
572                     "please contact your Intel or hardware representative "
573                     "who provided you with this hardware.\n");
574                 break;
575         case IXGBE_ERR_SFP_NOT_SUPPORTED:
576                 device_printf(dev,"Unsupported SFP+ Module\n");
577                 error = EIO;
578                 goto err_late;
579         case IXGBE_ERR_SFP_NOT_PRESENT:
580                 device_printf(dev,"No SFP+ Module found\n");
581                 /* falls thru */
582         default:
583                 break;
584         }
585
586         /* Detect and set physical type */
587         ixgbe_setup_optics(adapter);
588
589         if ((adapter->msix > 1) && (ixgbe_enable_msix))
590                 error = ixgbe_allocate_msix(adapter); 
591         else
592                 error = ixgbe_allocate_legacy(adapter); 
593         if (error) 
594                 goto err_late;
595
596         /* Setup OS specific network interface */
597         if (ixgbe_setup_interface(dev, adapter) != 0)
598                 goto err_late;
599
600         /* Initialize statistics */
601         ixgbe_update_stats_counters(adapter);
602
603         /* Register for VLAN events */
604         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
605             ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
606         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
607             ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
608
609         /*
610         ** Check PCIE slot type/speed/width
611         */
612         ixgbe_get_slot_info(hw);
613
614         /* Set an initial default flow control value */
615         adapter->fc =  ixgbe_fc_full;
616
617         /* let hardware know driver is loaded */
618         ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
619         ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
620         IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
621
622         ixgbe_add_hw_stats(adapter);
623
624 #ifdef DEV_NETMAP
625         ixgbe_netmap_attach(adapter);
626 #endif /* DEV_NETMAP */
627         INIT_DEBUGOUT("ixgbe_attach: end");
628         return (0);
629 err_late:
630         ixgbe_free_transmit_structures(adapter);
631         ixgbe_free_receive_structures(adapter);
632 err_out:
633         if (adapter->ifp != NULL)
634                 if_free(adapter->ifp);
635         ixgbe_free_pci_resources(adapter);
636         free(adapter->mta, M_DEVBUF);
637         return (error);
638
639 }
640
641 /*********************************************************************
642  *  Device removal routine
643  *
644  *  The detach entry point is called when the driver is being removed.
645  *  This routine stops the adapter and deallocates all the resources
646  *  that were allocated for driver operation.
647  *
648  *  return 0 on success, positive on failure
649  *********************************************************************/
650
651 static int
652 ixgbe_detach(device_t dev)
653 {
654         struct adapter *adapter = device_get_softc(dev);
655         struct ix_queue *que = adapter->queues;
656         struct tx_ring *txr = adapter->tx_rings;
657         u32     ctrl_ext;
658
659         INIT_DEBUGOUT("ixgbe_detach: begin");
660
661         /* Make sure VLANS are not using driver */
662         if (adapter->ifp->if_vlantrunk != NULL) {
663                 device_printf(dev,"Vlan in use, detach first\n");
664                 return (EBUSY);
665         }
666
667         IXGBE_CORE_LOCK(adapter);
668         ixgbe_stop(adapter);
669         IXGBE_CORE_UNLOCK(adapter);
670
671         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
672                 if (que->tq) {
673 #ifndef IXGBE_LEGACY_TX
674                         taskqueue_drain(que->tq, &txr->txq_task);
675 #endif
676                         taskqueue_drain(que->tq, &que->que_task);
677                         taskqueue_free(que->tq);
678                 }
679         }
680
681         /* Drain the Link queue */
682         if (adapter->tq) {
683                 taskqueue_drain(adapter->tq, &adapter->link_task);
684                 taskqueue_drain(adapter->tq, &adapter->mod_task);
685                 taskqueue_drain(adapter->tq, &adapter->msf_task);
686 #ifdef IXGBE_FDIR
687                 taskqueue_drain(adapter->tq, &adapter->fdir_task);
688 #endif
689                 taskqueue_free(adapter->tq);
690         }
691
692         /* let hardware know driver is unloading */
693         ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
694         ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
695         IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
696
697         /* Unregister VLAN events */
698         if (adapter->vlan_attach != NULL)
699                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
700         if (adapter->vlan_detach != NULL)
701                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
702
703         ether_ifdetach(adapter->ifp);
704         callout_drain(&adapter->timer);
705 #ifdef DEV_NETMAP
706         netmap_detach(adapter->ifp);
707 #endif /* DEV_NETMAP */
708         ixgbe_free_pci_resources(adapter);
709         bus_generic_detach(dev);
710         if_free(adapter->ifp);
711
712         ixgbe_free_transmit_structures(adapter);
713         ixgbe_free_receive_structures(adapter);
714         free(adapter->mta, M_DEVBUF);
715
716         IXGBE_CORE_LOCK_DESTROY(adapter);
717         return (0);
718 }
719
720 /*********************************************************************
721  *
722  *  Shutdown entry point
723  *
724  **********************************************************************/
725
726 static int
727 ixgbe_shutdown(device_t dev)
728 {
729         struct adapter *adapter = device_get_softc(dev);
730         IXGBE_CORE_LOCK(adapter);
731         ixgbe_stop(adapter);
732         IXGBE_CORE_UNLOCK(adapter);
733         return (0);
734 }
735
736
737 #ifdef IXGBE_LEGACY_TX
738 /*********************************************************************
739  *  Transmit entry point
740  *
741  *  ixgbe_start is called by the stack to initiate a transmit.
742  *  The driver will remain in this routine as long as there are
743  *  packets to transmit and transmit resources are available.
744  *  In case resources are not available stack is notified and
745  *  the packet is requeued.
746  **********************************************************************/
747
748 static void
749 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
750 {
751         struct mbuf    *m_head;
752         struct adapter *adapter = txr->adapter;
753
754         IXGBE_TX_LOCK_ASSERT(txr);
755
756         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
757                 return;
758         if (!adapter->link_active)
759                 return;
760
761         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
762                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
763                         break;
764
765                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
766                 if (m_head == NULL)
767                         break;
768
769                 if (ixgbe_xmit(txr, &m_head)) {
770                         if (m_head != NULL)
771                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
772                         break;
773                 }
774                 /* Send a copy of the frame to the BPF listener */
775                 ETHER_BPF_MTAP(ifp, m_head);
776
777                 /* Set watchdog on */
778                 txr->watchdog_time = ticks;
779                 txr->queue_status = IXGBE_QUEUE_WORKING;
780
781         }
782         return;
783 }
784
785 /*
786  * Legacy TX start - called by the stack, this
787  * always uses the first tx ring, and should
788  * not be used with multiqueue tx enabled.
789  */
790 static void
791 ixgbe_start(struct ifnet *ifp)
792 {
793         struct adapter *adapter = ifp->if_softc;
794         struct tx_ring  *txr = adapter->tx_rings;
795
796         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
797                 IXGBE_TX_LOCK(txr);
798                 ixgbe_start_locked(txr, ifp);
799                 IXGBE_TX_UNLOCK(txr);
800         }
801         return;
802 }
803
804 #else /* ! IXGBE_LEGACY_TX */
805
806 /*
807 ** Multiqueue Transmit driver
808 **
809 */
810 static int
811 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
812 {
813         struct adapter  *adapter = ifp->if_softc;
814         struct ix_queue *que;
815         struct tx_ring  *txr;
816         int             i, err = 0;
817 #ifdef  RSS
818         uint32_t bucket_id;
819 #endif
820
821         /* Which queue to use */
822         /*
823          * When doing RSS, map it to the same outbound queue
824          * as the incoming flow would be mapped to.
825          *
826          * If everything is setup correctly, it should be the
827          * same bucket that the current CPU we're on is.
828          */
829         if ((m->m_flags & M_FLOWID) != 0) {
830 #ifdef  RSS
831                 if (rss_hash2bucket(m->m_pkthdr.flowid,
832                     M_HASHTYPE_GET(m), &bucket_id) == 0) {
833                         /* XXX TODO: spit out something if bucket_id > num_queues? */
834                         i = bucket_id % adapter->num_queues;
835                 } else {
836 #endif
837                         i = m->m_pkthdr.flowid % adapter->num_queues;
838 #ifdef  RSS
839                 }
840 #endif
841         } else {
842                 i = curcpu % adapter->num_queues;
843         }
844
845         txr = &adapter->tx_rings[i];
846         que = &adapter->queues[i];
847
848         err = drbr_enqueue(ifp, txr->br, m);
849         if (err)
850                 return (err);
851         if (IXGBE_TX_TRYLOCK(txr)) {
852                 ixgbe_mq_start_locked(ifp, txr);
853                 IXGBE_TX_UNLOCK(txr);
854         } else
855                 taskqueue_enqueue(que->tq, &txr->txq_task);
856
857         return (0);
858 }
859
860 static int
861 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
862 {
863         struct adapter  *adapter = txr->adapter;
864         struct mbuf     *next;
865         int             enqueued = 0, err = 0;
866
867         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
868             adapter->link_active == 0)
869                 return (ENETDOWN);
870
871         /* Process the queue */
872 #if __FreeBSD_version < 901504
873         next = drbr_dequeue(ifp, txr->br);
874         while (next != NULL) {
875                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
876                         if (next != NULL)
877                                 err = drbr_enqueue(ifp, txr->br, next);
878 #else
879         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
880                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
881                         if (next == NULL) {
882                                 drbr_advance(ifp, txr->br);
883                         } else {
884                                 drbr_putback(ifp, txr->br, next);
885                         }
886 #endif
887                         break;
888                 }
889 #if __FreeBSD_version >= 901504
890                 drbr_advance(ifp, txr->br);
891 #endif
892                 enqueued++;
893                 /* Send a copy of the frame to the BPF listener */
894                 ETHER_BPF_MTAP(ifp, next);
895                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
896                         break;
897 #if __FreeBSD_version < 901504
898                 next = drbr_dequeue(ifp, txr->br);
899 #endif
900         }
901
902         if (enqueued > 0) {
903                 /* Set watchdog on */
904                 txr->queue_status = IXGBE_QUEUE_WORKING;
905                 txr->watchdog_time = ticks;
906         }
907
908         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
909                 ixgbe_txeof(txr);
910
911         return (err);
912 }
913
914 /*
915  * Called from a taskqueue to drain queued transmit packets.
916  */
917 static void
918 ixgbe_deferred_mq_start(void *arg, int pending)
919 {
920         struct tx_ring *txr = arg;
921         struct adapter *adapter = txr->adapter;
922         struct ifnet *ifp = adapter->ifp;
923
924         IXGBE_TX_LOCK(txr);
925         if (!drbr_empty(ifp, txr->br))
926                 ixgbe_mq_start_locked(ifp, txr);
927         IXGBE_TX_UNLOCK(txr);
928 }
929
930 /*
931 ** Flush all ring buffers
932 */
933 static void
934 ixgbe_qflush(struct ifnet *ifp)
935 {
936         struct adapter  *adapter = ifp->if_softc;
937         struct tx_ring  *txr = adapter->tx_rings;
938         struct mbuf     *m;
939
940         for (int i = 0; i < adapter->num_queues; i++, txr++) {
941                 IXGBE_TX_LOCK(txr);
942                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
943                         m_freem(m);
944                 IXGBE_TX_UNLOCK(txr);
945         }
946         if_qflush(ifp);
947 }
948 #endif /* IXGBE_LEGACY_TX */
949
950 /*********************************************************************
951  *  Ioctl entry point
952  *
953  *  ixgbe_ioctl is called when the user wants to configure the
954  *  interface.
955  *
956  *  return 0 on success, positive on failure
957  **********************************************************************/
958
959 static int
960 ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data)
961 {
962         struct adapter  *adapter = ifp->if_softc;
963         struct ixgbe_hw *hw = &adapter->hw;
964         struct ifreq    *ifr = (struct ifreq *) data;
965 #if defined(INET) || defined(INET6)
966         struct ifaddr *ifa = (struct ifaddr *)data;
967         bool            avoid_reset = FALSE;
968 #endif
969         int             error = 0;
970
971         switch (command) {
972
973         case SIOCSIFADDR:
974 #ifdef INET
975                 if (ifa->ifa_addr->sa_family == AF_INET)
976                         avoid_reset = TRUE;
977 #endif
978 #ifdef INET6
979                 if (ifa->ifa_addr->sa_family == AF_INET6)
980                         avoid_reset = TRUE;
981 #endif
982 #if defined(INET) || defined(INET6)
983                 /*
984                 ** Calling init results in link renegotiation,
985                 ** so we avoid doing it when possible.
986                 */
987                 if (avoid_reset) {
988                         ifp->if_flags |= IFF_UP;
989                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
990                                 ixgbe_init(adapter);
991                         if (!(ifp->if_flags & IFF_NOARP))
992                                 arp_ifinit(ifp, ifa);
993                 } else
994                         error = ether_ioctl(ifp, command, data);
995 #endif
996                 break;
997         case SIOCSIFMTU:
998                 IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
999                 if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
1000                         error = EINVAL;
1001                 } else {
1002                         IXGBE_CORE_LOCK(adapter);
1003                         ifp->if_mtu = ifr->ifr_mtu;
1004                         adapter->max_frame_size =
1005                                 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1006                         ixgbe_init_locked(adapter);
1007                         IXGBE_CORE_UNLOCK(adapter);
1008                 }
1009                 break;
1010         case SIOCSIFFLAGS:
1011                 IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
1012                 IXGBE_CORE_LOCK(adapter);
1013                 if (ifp->if_flags & IFF_UP) {
1014                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1015                                 if ((ifp->if_flags ^ adapter->if_flags) &
1016                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1017                                         ixgbe_set_promisc(adapter);
1018                                 }
1019                         } else
1020                                 ixgbe_init_locked(adapter);
1021                 } else
1022                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1023                                 ixgbe_stop(adapter);
1024                 adapter->if_flags = ifp->if_flags;
1025                 IXGBE_CORE_UNLOCK(adapter);
1026                 break;
1027         case SIOCADDMULTI:
1028         case SIOCDELMULTI:
1029                 IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
1030                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1031                         IXGBE_CORE_LOCK(adapter);
1032                         ixgbe_disable_intr(adapter);
1033                         ixgbe_set_multi(adapter);
1034                         ixgbe_enable_intr(adapter);
1035                         IXGBE_CORE_UNLOCK(adapter);
1036                 }
1037                 break;
1038         case SIOCSIFMEDIA:
1039         case SIOCGIFMEDIA:
1040                 IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
1041                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1042                 break;
1043         case SIOCSIFCAP:
1044         {
1045                 int mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1046                 IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
1047                 if (mask & IFCAP_HWCSUM)
1048                         ifp->if_capenable ^= IFCAP_HWCSUM;
1049                 if (mask & IFCAP_TSO4)
1050                         ifp->if_capenable ^= IFCAP_TSO4;
1051                 if (mask & IFCAP_TSO6)
1052                         ifp->if_capenable ^= IFCAP_TSO6;
1053                 if (mask & IFCAP_LRO)
1054                         ifp->if_capenable ^= IFCAP_LRO;
1055                 if (mask & IFCAP_VLAN_HWTAGGING)
1056                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1057                 if (mask & IFCAP_VLAN_HWFILTER)
1058                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1059                 if (mask & IFCAP_VLAN_HWTSO)
1060                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1061                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1062                         IXGBE_CORE_LOCK(adapter);
1063                         ixgbe_init_locked(adapter);
1064                         IXGBE_CORE_UNLOCK(adapter);
1065                 }
1066                 VLAN_CAPABILITIES(ifp);
1067                 break;
1068         }
1069         case SIOCGI2C:
1070         {
1071                 struct ifi2creq i2c;
1072                 int i;
1073                 IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)");
1074                 error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
1075                 if (error != 0)
1076                         break;
1077                 if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
1078                         error = EINVAL;
1079                         break;
1080                 }
1081                 if (i2c.len > sizeof(i2c.data)) {
1082                         error = EINVAL;
1083                         break;
1084                 }
1085
1086                 for (i = 0; i < i2c.len; i++)
1087                         hw->phy.ops.read_i2c_byte(hw, i2c.offset + i,
1088                             i2c.dev_addr, &i2c.data[i]);
1089                 error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
1090                 break;
1091         }
1092         default:
1093                 IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
1094                 error = ether_ioctl(ifp, command, data);
1095                 break;
1096         }
1097
1098         return (error);
1099 }
1100
1101 /*********************************************************************
1102  *  Init entry point
1103  *
1104  *  This routine is used in two ways. It is used by the stack as
1105  *  init entry point in network interface structure. It is also used
1106  *  by the driver as a hw/sw initialization routine to get to a
1107  *  consistent state.
1108  *
1109  *  return 0 on success, positive on failure
1110  **********************************************************************/
1111 #define IXGBE_MHADD_MFS_SHIFT 16
1112
1113 static void
1114 ixgbe_init_locked(struct adapter *adapter)
1115 {
1116         struct ifnet   *ifp = adapter->ifp;
1117         device_t        dev = adapter->dev;
1118         struct ixgbe_hw *hw = &adapter->hw;
1119         u32             k, txdctl, mhadd, gpie;
1120         u32             rxdctl, rxctrl;
1121
1122         mtx_assert(&adapter->core_mtx, MA_OWNED);
1123         INIT_DEBUGOUT("ixgbe_init_locked: begin");
1124         hw->adapter_stopped = FALSE;
1125         ixgbe_stop_adapter(hw);
1126         callout_stop(&adapter->timer);
1127
1128         /* reprogram the RAR[0] in case user changed it. */
1129         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
1130
1131         /* Get the latest mac address, User can use a LAA */
1132         bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr,
1133               IXGBE_ETH_LENGTH_OF_ADDRESS);
1134         ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
1135         hw->addr_ctrl.rar_used_count = 1;
1136
1137         /* Set the various hardware offload abilities */
1138         ifp->if_hwassist = 0;
1139         if (ifp->if_capenable & IFCAP_TSO)
1140                 ifp->if_hwassist |= CSUM_TSO;
1141         if (ifp->if_capenable & IFCAP_TXCSUM) {
1142                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1143 #if __FreeBSD_version >= 800000
1144                 if (hw->mac.type != ixgbe_mac_82598EB)
1145                         ifp->if_hwassist |= CSUM_SCTP;
1146 #endif
1147         }
1148
1149         /* Prepare transmit descriptors and buffers */
1150         if (ixgbe_setup_transmit_structures(adapter)) {
1151                 device_printf(dev,"Could not setup transmit structures\n");
1152                 ixgbe_stop(adapter);
1153                 return;
1154         }
1155
1156         ixgbe_init_hw(hw);
1157         ixgbe_initialize_transmit_units(adapter);
1158
1159         /* Setup Multicast table */
1160         ixgbe_set_multi(adapter);
1161
1162         /*
1163         ** Determine the correct mbuf pool
1164         ** for doing jumbo frames
1165         */
1166         if (adapter->max_frame_size <= 2048)
1167                 adapter->rx_mbuf_sz = MCLBYTES;
1168         else if (adapter->max_frame_size <= 4096)
1169                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1170         else if (adapter->max_frame_size <= 9216)
1171                 adapter->rx_mbuf_sz = MJUM9BYTES;
1172         else
1173                 adapter->rx_mbuf_sz = MJUM16BYTES;
1174
1175         /* Prepare receive descriptors and buffers */
1176         if (ixgbe_setup_receive_structures(adapter)) {
1177                 device_printf(dev,"Could not setup receive structures\n");
1178                 ixgbe_stop(adapter);
1179                 return;
1180         }
1181
1182         /* Configure RX settings */
1183         ixgbe_initialize_receive_units(adapter);
1184
1185         gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
1186
1187         /* Enable Fan Failure Interrupt */
1188         gpie |= IXGBE_SDP1_GPIEN;
1189
1190         /* Add for Module detection */
1191         if (hw->mac.type == ixgbe_mac_82599EB)
1192                 gpie |= IXGBE_SDP2_GPIEN;
1193
1194         /* Thermal Failure Detection */
1195         if (hw->mac.type == ixgbe_mac_X540)
1196                 gpie |= IXGBE_SDP0_GPIEN;
1197
1198         if (adapter->msix > 1) {
1199                 /* Enable Enhanced MSIX mode */
1200                 gpie |= IXGBE_GPIE_MSIX_MODE;
1201                 gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
1202                     IXGBE_GPIE_OCD;
1203         }
1204         IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
1205
1206         /* Set MTU size */
1207         if (ifp->if_mtu > ETHERMTU) {
1208                 mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
1209                 mhadd &= ~IXGBE_MHADD_MFS_MASK;
1210                 mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
1211                 IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
1212         }
1213         
1214         /* Now enable all the queues */
1215
1216         for (int i = 0; i < adapter->num_queues; i++) {
1217                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
1218                 txdctl |= IXGBE_TXDCTL_ENABLE;
1219                 /* Set WTHRESH to 8, burst writeback */
1220                 txdctl |= (8 << 16);
1221                 /*
1222                  * When the internal queue falls below PTHRESH (32),
1223                  * start prefetching as long as there are at least
1224                  * HTHRESH (1) buffers ready. The values are taken
1225                  * from the Intel linux driver 3.8.21.
1226                  * Prefetching enables tx line rate even with 1 queue.
1227                  */
1228                 txdctl |= (32 << 0) | (1 << 8);
1229                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
1230         }
1231
1232         for (int i = 0; i < adapter->num_queues; i++) {
1233                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
1234                 if (hw->mac.type == ixgbe_mac_82598EB) {
1235                         /*
1236                         ** PTHRESH = 21
1237                         ** HTHRESH = 4
1238                         ** WTHRESH = 8
1239                         */
1240                         rxdctl &= ~0x3FFFFF;
1241                         rxdctl |= 0x080420;
1242                 }
1243                 rxdctl |= IXGBE_RXDCTL_ENABLE;
1244                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
1245                 for (k = 0; k < 10; k++) {
1246                         if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
1247                             IXGBE_RXDCTL_ENABLE)
1248                                 break;
1249                         else
1250                                 msec_delay(1);
1251                 }
1252                 wmb();
1253 #ifdef DEV_NETMAP
1254                 /*
1255                  * In netmap mode, we must preserve the buffers made
1256                  * available to userspace before the if_init()
1257                  * (this is true by default on the TX side, because
1258                  * init makes all buffers available to userspace).
1259                  *
1260                  * netmap_reset() and the device specific routines
1261                  * (e.g. ixgbe_setup_receive_rings()) map these
1262                  * buffers at the end of the NIC ring, so here we
1263                  * must set the RDT (tail) register to make sure
1264                  * they are not overwritten.
1265                  *
1266                  * In this driver the NIC ring starts at RDH = 0,
1267                  * RDT points to the last slot available for reception (?),
1268                  * so RDT = num_rx_desc - 1 means the whole ring is available.
1269                  */
1270                 if (ifp->if_capenable & IFCAP_NETMAP) {
1271                         struct netmap_adapter *na = NA(adapter->ifp);
1272                         struct netmap_kring *kring = &na->rx_rings[i];
1273                         int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
1274
1275                         IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
1276                 } else
1277 #endif /* DEV_NETMAP */
1278                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
1279         }
1280
1281         /* Enable Receive engine */
1282         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1283         if (hw->mac.type == ixgbe_mac_82598EB)
1284                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
1285         rxctrl |= IXGBE_RXCTRL_RXEN;
1286         ixgbe_enable_rx_dma(hw, rxctrl);
1287
1288         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
1289
1290         /* Set up MSI/X routing */
1291         if (ixgbe_enable_msix)  {
1292                 ixgbe_configure_ivars(adapter);
1293                 /* Set up auto-mask */
1294                 if (hw->mac.type == ixgbe_mac_82598EB)
1295                         IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1296                 else {
1297                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1298                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1299                 }
1300         } else {  /* Simple settings for Legacy/MSI */
1301                 ixgbe_set_ivar(adapter, 0, 0, 0);
1302                 ixgbe_set_ivar(adapter, 0, 0, 1);
1303                 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1304         }
1305
1306 #ifdef IXGBE_FDIR
1307         /* Init Flow director */
1308         if (hw->mac.type != ixgbe_mac_82598EB) {
1309                 u32 hdrm = 32 << fdir_pballoc;
1310
1311                 hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
1312                 ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
1313         }
1314 #endif
1315
1316         /*
1317         ** Check on any SFP devices that
1318         ** need to be kick-started
1319         */
1320         if (hw->phy.type == ixgbe_phy_none) {
1321                 int err = hw->phy.ops.identify(hw);
1322                 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1323                         device_printf(dev,
1324                             "Unsupported SFP+ module type was detected.\n");
1325                         return;
1326                 }
1327         }
1328
1329         /* Set moderation on the Link interrupt */
1330         IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
1331
1332         /* Config/Enable Link */
1333         ixgbe_config_link(adapter);
1334
1335         /* Hardware Packet Buffer & Flow Control setup */
1336         {
1337                 u32 rxpb, frame, size, tmp;
1338
1339                 frame = adapter->max_frame_size;
1340
1341                 /* Calculate High Water */
1342                 if (hw->mac.type == ixgbe_mac_X540)
1343                         tmp = IXGBE_DV_X540(frame, frame);
1344                 else
1345                         tmp = IXGBE_DV(frame, frame);
1346                 size = IXGBE_BT2KB(tmp);
1347                 rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
1348                 hw->fc.high_water[0] = rxpb - size;
1349
1350                 /* Now calculate Low Water */
1351                 if (hw->mac.type == ixgbe_mac_X540)
1352                         tmp = IXGBE_LOW_DV_X540(frame);
1353                 else
1354                         tmp = IXGBE_LOW_DV(frame);
1355                 hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1356                 
1357                 hw->fc.requested_mode = adapter->fc;
1358                 hw->fc.pause_time = IXGBE_FC_PAUSE;
1359                 hw->fc.send_xon = TRUE;
1360         }
1361         /* Initialize the FC settings */
1362         ixgbe_start_hw(hw);
1363
1364         /* Set up VLAN support and filter */
1365         ixgbe_setup_vlan_hw_support(adapter);
1366
1367         /* And now turn on interrupts */
1368         ixgbe_enable_intr(adapter);
1369
1370         /* Now inform the stack we're ready */
1371         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1372
1373         return;
1374 }
1375
1376 static void
1377 ixgbe_init(void *arg)
1378 {
1379         struct adapter *adapter = arg;
1380
1381         IXGBE_CORE_LOCK(adapter);
1382         ixgbe_init_locked(adapter);
1383         IXGBE_CORE_UNLOCK(adapter);
1384         return;
1385 }
1386
1387
1388 /*
1389 **
1390 ** MSIX Interrupt Handlers and Tasklets
1391 **
1392 */
1393
1394 static inline void
1395 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
1396 {
1397         struct ixgbe_hw *hw = &adapter->hw;
1398         u64     queue = (u64)(1 << vector);
1399         u32     mask;
1400
1401         if (hw->mac.type == ixgbe_mac_82598EB) {
1402                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1403                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
1404         } else {
1405                 mask = (queue & 0xFFFFFFFF);
1406                 if (mask)
1407                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
1408                 mask = (queue >> 32);
1409                 if (mask)
1410                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
1411         }
1412 }
1413
1414 static inline void
1415 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
1416 {
1417         struct ixgbe_hw *hw = &adapter->hw;
1418         u64     queue = (u64)(1 << vector);
1419         u32     mask;
1420
1421         if (hw->mac.type == ixgbe_mac_82598EB) {
1422                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1423                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
1424         } else {
1425                 mask = (queue & 0xFFFFFFFF);
1426                 if (mask)
1427                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
1428                 mask = (queue >> 32);
1429                 if (mask)
1430                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
1431         }
1432 }
1433
1434 static void
1435 ixgbe_handle_que(void *context, int pending)
1436 {
1437         struct ix_queue *que = context;
1438         struct adapter  *adapter = que->adapter;
1439         struct tx_ring  *txr = que->txr;
1440         struct ifnet    *ifp = adapter->ifp;
1441         bool            more;
1442
1443         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1444                 more = ixgbe_rxeof(que);
1445                 IXGBE_TX_LOCK(txr);
1446                 ixgbe_txeof(txr);
1447 #ifndef IXGBE_LEGACY_TX
1448                 if (!drbr_empty(ifp, txr->br))
1449                         ixgbe_mq_start_locked(ifp, txr);
1450 #else
1451                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1452                         ixgbe_start_locked(txr, ifp);
1453 #endif
1454                 IXGBE_TX_UNLOCK(txr);
1455         }
1456
1457         /* Reenable this interrupt */
1458         if (que->res != NULL)
1459                 ixgbe_enable_queue(adapter, que->msix);
1460         else
1461                 ixgbe_enable_intr(adapter);
1462         return;
1463 }
1464
1465
1466 /*********************************************************************
1467  *
1468  *  Legacy Interrupt Service routine
1469  *
1470  **********************************************************************/
1471
1472 static void
1473 ixgbe_legacy_irq(void *arg)
1474 {
1475         struct ix_queue *que = arg;
1476         struct adapter  *adapter = que->adapter;
1477         struct ixgbe_hw *hw = &adapter->hw;
1478         struct ifnet    *ifp = adapter->ifp;
1479         struct          tx_ring *txr = adapter->tx_rings;
1480         bool            more;
1481         u32             reg_eicr;
1482
1483
1484         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1485
1486         ++que->irqs;
1487         if (reg_eicr == 0) {
1488                 ixgbe_enable_intr(adapter);
1489                 return;
1490         }
1491
1492         more = ixgbe_rxeof(que);
1493
1494         IXGBE_TX_LOCK(txr);
1495         ixgbe_txeof(txr);
1496 #ifdef IXGBE_LEGACY_TX
1497         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1498                 ixgbe_start_locked(txr, ifp);
1499 #else
1500         if (!drbr_empty(ifp, txr->br))
1501                 ixgbe_mq_start_locked(ifp, txr);
1502 #endif
1503         IXGBE_TX_UNLOCK(txr);
1504
1505         /* Check for fan failure */
1506         if ((hw->phy.media_type == ixgbe_media_type_copper) &&
1507             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1508                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1509                     "REPLACE IMMEDIATELY!!\n");
1510                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
1511         }
1512
1513         /* Link status change */
1514         if (reg_eicr & IXGBE_EICR_LSC)
1515                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1516
1517         if (more)
1518                 taskqueue_enqueue(que->tq, &que->que_task);
1519         else
1520                 ixgbe_enable_intr(adapter);
1521         return;
1522 }
1523
1524
1525 /*********************************************************************
1526  *
1527  *  MSIX Queue Interrupt Service routine
1528  *
1529  **********************************************************************/
1530 void
1531 ixgbe_msix_que(void *arg)
1532 {
1533         struct ix_queue *que = arg;
1534         struct adapter  *adapter = que->adapter;
1535         struct ifnet    *ifp = adapter->ifp;
1536         struct tx_ring  *txr = que->txr;
1537         struct rx_ring  *rxr = que->rxr;
1538         bool            more;
1539         u32             newitr = 0;
1540
1541         /* Protect against spurious interrupts */
1542         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1543                 return;
1544
1545         ixgbe_disable_queue(adapter, que->msix);
1546         ++que->irqs;
1547
1548         more = ixgbe_rxeof(que);
1549
1550         IXGBE_TX_LOCK(txr);
1551         ixgbe_txeof(txr);
1552 #ifdef IXGBE_LEGACY_TX
1553         if (!IFQ_DRV_IS_EMPTY(ifp->if_snd))
1554                 ixgbe_start_locked(txr, ifp);
1555 #else
1556         if (!drbr_empty(ifp, txr->br))
1557                 ixgbe_mq_start_locked(ifp, txr);
1558 #endif
1559         IXGBE_TX_UNLOCK(txr);
1560
1561         /* Do AIM now? */
1562
1563         if (ixgbe_enable_aim == FALSE)
1564                 goto no_calc;
1565         /*
1566         ** Do Adaptive Interrupt Moderation:
1567         **  - Write out last calculated setting
1568         **  - Calculate based on average size over
1569         **    the last interval.
1570         */
1571         if (que->eitr_setting)
1572                 IXGBE_WRITE_REG(&adapter->hw,
1573                     IXGBE_EITR(que->msix), que->eitr_setting);
1574  
1575         que->eitr_setting = 0;
1576
1577         /* Idle, do nothing */
1578         if ((txr->bytes == 0) && (rxr->bytes == 0))
1579                 goto no_calc;
1580                                 
1581         if ((txr->bytes) && (txr->packets))
1582                 newitr = txr->bytes/txr->packets;
1583         if ((rxr->bytes) && (rxr->packets))
1584                 newitr = max(newitr,
1585                     (rxr->bytes / rxr->packets));
1586         newitr += 24; /* account for hardware frame, crc */
1587
1588         /* set an upper boundary */
1589         newitr = min(newitr, 3000);
1590
1591         /* Be nice to the mid range */
1592         if ((newitr > 300) && (newitr < 1200))
1593                 newitr = (newitr / 3);
1594         else
1595                 newitr = (newitr / 2);
1596
1597         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
1598                 newitr |= newitr << 16;
1599         else
1600                 newitr |= IXGBE_EITR_CNT_WDIS;
1601                  
1602         /* save for next interrupt */
1603         que->eitr_setting = newitr;
1604
1605         /* Reset state */
1606         txr->bytes = 0;
1607         txr->packets = 0;
1608         rxr->bytes = 0;
1609         rxr->packets = 0;
1610
1611 no_calc:
1612         if (more)
1613                 taskqueue_enqueue(que->tq, &que->que_task);
1614         else
1615                 ixgbe_enable_queue(adapter, que->msix);
1616         return;
1617 }
1618
1619
1620 static void
1621 ixgbe_msix_link(void *arg)
1622 {
1623         struct adapter  *adapter = arg;
1624         struct ixgbe_hw *hw = &adapter->hw;
1625         u32             reg_eicr;
1626
1627         ++adapter->link_irq;
1628
1629         /* First get the cause */
1630         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
1631         /* Be sure the queue bits are not cleared */
1632         reg_eicr &= ~IXGBE_EICR_RTX_QUEUE;
1633         /* Clear interrupt with write */
1634         IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
1635
1636         /* Link status change */
1637         if (reg_eicr & IXGBE_EICR_LSC)
1638                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1639
1640         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
1641 #ifdef IXGBE_FDIR
1642                 if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
1643                         /* This is probably overkill :) */
1644                         if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
1645                                 return;
1646                         /* Disable the interrupt */
1647                         IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
1648                         taskqueue_enqueue(adapter->tq, &adapter->fdir_task);
1649                 } else
1650 #endif
1651                 if (reg_eicr & IXGBE_EICR_ECC) {
1652                         device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
1653                             "Please Reboot!!\n");
1654                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
1655                 } else
1656
1657                 if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
1658                         /* Clear the interrupt */
1659                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1660                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
1661                 } else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
1662                         /* Clear the interrupt */
1663                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
1664                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
1665                 }
1666         } 
1667
1668         /* Check for fan failure */
1669         if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
1670             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1671                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1672                     "REPLACE IMMEDIATELY!!\n");
1673                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1674         }
1675
1676         /* Check for over temp condition */
1677         if ((hw->mac.type == ixgbe_mac_X540) &&
1678             (reg_eicr & IXGBE_EICR_TS)) {
1679                 device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
1680                     "PHY IS SHUT DOWN!!\n");
1681                 device_printf(adapter->dev, "System shutdown required\n");
1682                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
1683         }
1684
1685         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
1686         return;
1687 }
1688
1689 /*********************************************************************
1690  *
1691  *  Media Ioctl callback
1692  *
1693  *  This routine is called whenever the user queries the status of
1694  *  the interface using ifconfig.
1695  *
1696  **********************************************************************/
1697 static void
1698 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
1699 {
1700         struct adapter *adapter = ifp->if_softc;
1701
1702         INIT_DEBUGOUT("ixgbe_media_status: begin");
1703         IXGBE_CORE_LOCK(adapter);
1704         ixgbe_update_link_status(adapter);
1705
1706         ifmr->ifm_status = IFM_AVALID;
1707         ifmr->ifm_active = IFM_ETHER;
1708
1709         if (!adapter->link_active) {
1710                 IXGBE_CORE_UNLOCK(adapter);
1711                 return;
1712         }
1713
1714         ifmr->ifm_status |= IFM_ACTIVE;
1715
1716         switch (adapter->link_speed) {
1717                 case IXGBE_LINK_SPEED_100_FULL:
1718                         ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1719                         break;
1720                 case IXGBE_LINK_SPEED_1GB_FULL:
1721                         ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1722                         break;
1723                 case IXGBE_LINK_SPEED_10GB_FULL:
1724                         ifmr->ifm_active |= adapter->optics | IFM_FDX;
1725                         break;
1726         }
1727
1728         IXGBE_CORE_UNLOCK(adapter);
1729
1730         return;
1731 }
1732
1733 /*********************************************************************
1734  *
1735  *  Media Ioctl callback
1736  *
1737  *  This routine is called when the user changes speed/duplex using
1738  *  media/mediopt option with ifconfig.
1739  *
1740  **********************************************************************/
1741 static int
1742 ixgbe_media_change(struct ifnet * ifp)
1743 {
1744         struct adapter *adapter = ifp->if_softc;
1745         struct ifmedia *ifm = &adapter->media;
1746
1747         INIT_DEBUGOUT("ixgbe_media_change: begin");
1748
1749         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1750                 return (EINVAL);
1751
1752         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1753         case IFM_AUTO:
1754                 adapter->hw.phy.autoneg_advertised =
1755                     IXGBE_LINK_SPEED_100_FULL |
1756                     IXGBE_LINK_SPEED_1GB_FULL |
1757                     IXGBE_LINK_SPEED_10GB_FULL;
1758                 break;
1759         default:
1760                 device_printf(adapter->dev, "Only auto media type\n");
1761                 return (EINVAL);
1762         }
1763
1764         return (0);
1765 }
1766
1767 /*********************************************************************
1768  *
1769  *  This routine maps the mbufs to tx descriptors, allowing the
1770  *  TX engine to transmit the packets. 
1771  *      - return 0 on success, positive on failure
1772  *
1773  **********************************************************************/
1774
1775 static int
1776 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1777 {
1778         struct adapter  *adapter = txr->adapter;
1779         u32             olinfo_status = 0, cmd_type_len;
1780         int             i, j, error, nsegs;
1781         int             first;
1782         bool            remap = TRUE;
1783         struct mbuf     *m_head;
1784         bus_dma_segment_t segs[adapter->num_segs];
1785         bus_dmamap_t    map;
1786         struct ixgbe_tx_buf *txbuf;
1787         union ixgbe_adv_tx_desc *txd = NULL;
1788
1789         m_head = *m_headp;
1790
1791         /* Basic descriptor defines */
1792         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1793             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1794
1795         if (m_head->m_flags & M_VLANTAG)
1796                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1797
1798         /*
1799          * Important to capture the first descriptor
1800          * used because it will contain the index of
1801          * the one we tell the hardware to report back
1802          */
1803         first = txr->next_avail_desc;
1804         txbuf = &txr->tx_buffers[first];
1805         map = txbuf->map;
1806
1807         /*
1808          * Map the packet for DMA.
1809          */
1810 retry:
1811         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1812             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1813
1814         if (__predict_false(error)) {
1815                 struct mbuf *m;
1816
1817                 switch (error) {
1818                 case EFBIG:
1819                         /* Try it again? - one try */
1820                         if (remap == TRUE) {
1821                                 remap = FALSE;
1822                                 m = m_defrag(*m_headp, M_NOWAIT);
1823                                 if (m == NULL) {
1824                                         adapter->mbuf_defrag_failed++;
1825                                         m_freem(*m_headp);
1826                                         *m_headp = NULL;
1827                                         return (ENOBUFS);
1828                                 }
1829                                 *m_headp = m;
1830                                 goto retry;
1831                         } else
1832                                 return (error);
1833                 case ENOMEM:
1834                         txr->no_tx_dma_setup++;
1835                         return (error);
1836                 default:
1837                         txr->no_tx_dma_setup++;
1838                         m_freem(*m_headp);
1839                         *m_headp = NULL;
1840                         return (error);
1841                 }
1842         }
1843
1844         /* Make certain there are enough descriptors */
1845         if (nsegs > txr->tx_avail - 2) {
1846                 txr->no_desc_avail++;
1847                 bus_dmamap_unload(txr->txtag, map);
1848                 return (ENOBUFS);
1849         }
1850         m_head = *m_headp;
1851
1852         /*
1853         ** Set up the appropriate offload context
1854         ** this will consume the first descriptor
1855         */
1856         error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1857         if (__predict_false(error)) {
1858                 if (error == ENOBUFS)
1859                         *m_headp = NULL;
1860                 return (error);
1861         }
1862
1863 #ifdef IXGBE_FDIR
1864         /* Do the flow director magic */
1865         if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
1866                 ++txr->atr_count;
1867                 if (txr->atr_count >= atr_sample_rate) {
1868                         ixgbe_atr(txr, m_head);
1869                         txr->atr_count = 0;
1870                 }
1871         }
1872 #endif
1873
1874         i = txr->next_avail_desc;
1875         for (j = 0; j < nsegs; j++) {
1876                 bus_size_t seglen;
1877                 bus_addr_t segaddr;
1878
1879                 txbuf = &txr->tx_buffers[i];
1880                 txd = &txr->tx_base[i];
1881                 seglen = segs[j].ds_len;
1882                 segaddr = htole64(segs[j].ds_addr);
1883
1884                 txd->read.buffer_addr = segaddr;
1885                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
1886                     cmd_type_len |seglen);
1887                 txd->read.olinfo_status = htole32(olinfo_status);
1888
1889                 if (++i == txr->num_desc)
1890                         i = 0;
1891         }
1892
1893         txd->read.cmd_type_len |=
1894             htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
1895         txr->tx_avail -= nsegs;
1896         txr->next_avail_desc = i;
1897
1898         txbuf->m_head = m_head;
1899         /*
1900         ** Here we swap the map so the last descriptor,
1901         ** which gets the completion interrupt has the
1902         ** real map, and the first descriptor gets the
1903         ** unused map from this descriptor.
1904         */
1905         txr->tx_buffers[first].map = txbuf->map;
1906         txbuf->map = map;
1907         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1908
1909         /* Set the EOP descriptor that will be marked done */
1910         txbuf = &txr->tx_buffers[first];
1911         txbuf->eop = txd;
1912
1913         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1914             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1915         /*
1916          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1917          * hardware that this frame is available to transmit.
1918          */
1919         ++txr->total_packets;
1920         IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
1921
1922         return (0);
1923
1924 }
1925
1926 static void
1927 ixgbe_set_promisc(struct adapter *adapter)
1928 {
1929         u_int32_t       reg_rctl;
1930         struct ifnet   *ifp = adapter->ifp;
1931         int             mcnt = 0;
1932
1933         reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1934         reg_rctl &= (~IXGBE_FCTRL_UPE);
1935         if (ifp->if_flags & IFF_ALLMULTI)
1936                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
1937         else {
1938                 struct  ifmultiaddr *ifma;
1939 #if __FreeBSD_version < 800000
1940                 IF_ADDR_LOCK(ifp);
1941 #else
1942                 if_maddr_rlock(ifp);
1943 #endif
1944                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1945                         if (ifma->ifma_addr->sa_family != AF_LINK)
1946                                 continue;
1947                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1948                                 break;
1949                         mcnt++;
1950                 }
1951 #if __FreeBSD_version < 800000
1952                 IF_ADDR_UNLOCK(ifp);
1953 #else
1954                 if_maddr_runlock(ifp);
1955 #endif
1956         }
1957         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
1958                 reg_rctl &= (~IXGBE_FCTRL_MPE);
1959         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1960
1961         if (ifp->if_flags & IFF_PROMISC) {
1962                 reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1963                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1964         } else if (ifp->if_flags & IFF_ALLMULTI) {
1965                 reg_rctl |= IXGBE_FCTRL_MPE;
1966                 reg_rctl &= ~IXGBE_FCTRL_UPE;
1967                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1968         }
1969         return;
1970 }
1971
1972
1973 /*********************************************************************
1974  *  Multicast Update
1975  *
1976  *  This routine is called whenever multicast address list is updated.
1977  *
1978  **********************************************************************/
1979 #define IXGBE_RAR_ENTRIES 16
1980
1981 static void
1982 ixgbe_set_multi(struct adapter *adapter)
1983 {
1984         u32     fctrl;
1985         u8      *mta;
1986         u8      *update_ptr;
1987         struct  ifmultiaddr *ifma;
1988         int     mcnt = 0;
1989         struct ifnet   *ifp = adapter->ifp;
1990
1991         IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
1992
1993         mta = adapter->mta;
1994         bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
1995             MAX_NUM_MULTICAST_ADDRESSES);
1996
1997 #if __FreeBSD_version < 800000
1998         IF_ADDR_LOCK(ifp);
1999 #else
2000         if_maddr_rlock(ifp);
2001 #endif
2002         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2003                 if (ifma->ifma_addr->sa_family != AF_LINK)
2004                         continue;
2005                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2006                         break;
2007                 bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr),
2008                     &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
2009                     IXGBE_ETH_LENGTH_OF_ADDRESS);
2010                 mcnt++;
2011         }
2012 #if __FreeBSD_version < 800000
2013         IF_ADDR_UNLOCK(ifp);
2014 #else
2015         if_maddr_runlock(ifp);
2016 #endif
2017
2018         fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
2019         fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
2020         if (ifp->if_flags & IFF_PROMISC)
2021                 fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
2022         else if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES ||
2023             ifp->if_flags & IFF_ALLMULTI) {
2024                 fctrl |= IXGBE_FCTRL_MPE;
2025                 fctrl &= ~IXGBE_FCTRL_UPE;
2026         } else
2027                 fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
2028         
2029         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
2030
2031         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
2032                 update_ptr = mta;
2033                 ixgbe_update_mc_addr_list(&adapter->hw,
2034                     update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
2035         }
2036
2037         return;
2038 }
2039
2040 /*
2041  * This is an iterator function now needed by the multicast
2042  * shared code. It simply feeds the shared code routine the
2043  * addresses in the array of ixgbe_set_multi() one by one.
2044  */
2045 static u8 *
2046 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
2047 {
2048         u8 *addr = *update_ptr;
2049         u8 *newptr;
2050         *vmdq = 0;
2051
2052         newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
2053         *update_ptr = newptr;
2054         return addr;
2055 }
2056
2057
2058 /*********************************************************************
2059  *  Timer routine
2060  *
2061  *  This routine checks for link status,updates statistics,
2062  *  and runs the watchdog check.
2063  *
2064  **********************************************************************/
2065
2066 static void
2067 ixgbe_local_timer(void *arg)
2068 {
2069         struct adapter  *adapter = arg;
2070         device_t        dev = adapter->dev;
2071         struct ix_queue *que = adapter->queues;
2072         struct tx_ring  *txr = adapter->tx_rings;
2073         int             hung = 0, paused = 0;
2074
2075         mtx_assert(&adapter->core_mtx, MA_OWNED);
2076
2077         /* Check for pluggable optics */
2078         if (adapter->sfp_probe)
2079                 if (!ixgbe_sfp_probe(adapter))
2080                         goto out; /* Nothing to do */
2081
2082         ixgbe_update_link_status(adapter);
2083         ixgbe_update_stats_counters(adapter);
2084
2085         /*
2086          * If the interface has been paused
2087          * then don't do the watchdog check
2088          */
2089         if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
2090                 paused = 1;
2091
2092         /*
2093         ** Check the TX queues status
2094         **      - watchdog only if all queues show hung
2095         */          
2096         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2097                 if ((txr->queue_status == IXGBE_QUEUE_HUNG) &&
2098                     (paused == 0))
2099                         ++hung;
2100                 else if (txr->queue_status == IXGBE_QUEUE_WORKING)
2101                         taskqueue_enqueue(que->tq, &txr->txq_task);
2102         }
2103         /* Only truely watchdog if all queues show hung */
2104         if (hung == adapter->num_queues)
2105                 goto watchdog;
2106
2107 out:
2108         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
2109         return;
2110
2111 watchdog:
2112         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2113         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2114             IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
2115             IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
2116         device_printf(dev,"TX(%d) desc avail = %d,"
2117             "Next TX to Clean = %d\n",
2118             txr->me, txr->tx_avail, txr->next_to_clean);
2119         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2120         adapter->watchdog_events++;
2121         ixgbe_init_locked(adapter);
2122 }
2123
2124 /*
2125 ** Note: this routine updates the OS on the link state
2126 **      the real check of the hardware only happens with
2127 **      a link interrupt.
2128 */
2129 static void
2130 ixgbe_update_link_status(struct adapter *adapter)
2131 {
2132         struct ifnet    *ifp = adapter->ifp;
2133         device_t dev = adapter->dev;
2134
2135
2136         if (adapter->link_up){ 
2137                 if (adapter->link_active == FALSE) {
2138                         if (bootverbose)
2139                                 device_printf(dev,"Link is up %d Gbps %s \n",
2140                                     ((adapter->link_speed == 128)? 10:1),
2141                                     "Full Duplex");
2142                         adapter->link_active = TRUE;
2143                         /* Update any Flow Control changes */
2144                         ixgbe_fc_enable(&adapter->hw);
2145                         if_link_state_change(ifp, LINK_STATE_UP);
2146                 }
2147         } else { /* Link down */
2148                 if (adapter->link_active == TRUE) {
2149                         if (bootverbose)
2150                                 device_printf(dev,"Link is Down\n");
2151                         if_link_state_change(ifp, LINK_STATE_DOWN);
2152                         adapter->link_active = FALSE;
2153                 }
2154         }
2155
2156         return;
2157 }
2158
2159
2160 /*********************************************************************
2161  *
2162  *  This routine disables all traffic on the adapter by issuing a
2163  *  global reset on the MAC and deallocates TX/RX buffers.
2164  *
2165  **********************************************************************/
2166
2167 static void
2168 ixgbe_stop(void *arg)
2169 {
2170         struct ifnet   *ifp;
2171         struct adapter *adapter = arg;
2172         struct ixgbe_hw *hw = &adapter->hw;
2173         ifp = adapter->ifp;
2174
2175         mtx_assert(&adapter->core_mtx, MA_OWNED);
2176
2177         INIT_DEBUGOUT("ixgbe_stop: begin\n");
2178         ixgbe_disable_intr(adapter);
2179         callout_stop(&adapter->timer);
2180
2181         /* Let the stack know...*/
2182         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2183
2184         ixgbe_reset_hw(hw);
2185         hw->adapter_stopped = FALSE;
2186         ixgbe_stop_adapter(hw);
2187         if (hw->mac.type == ixgbe_mac_82599EB)
2188                 ixgbe_stop_mac_link_on_d3_82599(hw);
2189         /* Turn off the laser - noop with no optics */
2190         ixgbe_disable_tx_laser(hw);
2191
2192         /* Update the stack */
2193         adapter->link_up = FALSE;
2194         ixgbe_update_link_status(adapter);
2195
2196         /* reprogram the RAR[0] in case user changed it. */
2197         ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
2198
2199         return;
2200 }
2201
2202
2203 /*********************************************************************
2204  *
2205  *  Determine hardware revision.
2206  *
2207  **********************************************************************/
2208 static void
2209 ixgbe_identify_hardware(struct adapter *adapter)
2210 {
2211         device_t        dev = adapter->dev;
2212         struct ixgbe_hw *hw = &adapter->hw;
2213
2214         /* Save off the information about this board */
2215         hw->vendor_id = pci_get_vendor(dev);
2216         hw->device_id = pci_get_device(dev);
2217         hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
2218         hw->subsystem_vendor_id =
2219             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2220         hw->subsystem_device_id =
2221             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2222
2223         /* We need this here to set the num_segs below */
2224         ixgbe_set_mac_type(hw);
2225
2226         /* Pick up the 82599 and VF settings */
2227         if (hw->mac.type != ixgbe_mac_82598EB) {
2228                 hw->phy.smart_speed = ixgbe_smart_speed;
2229                 adapter->num_segs = IXGBE_82599_SCATTER;
2230         } else
2231                 adapter->num_segs = IXGBE_82598_SCATTER;
2232
2233         return;
2234 }
2235
2236 /*********************************************************************
2237  *
2238  *  Determine optic type
2239  *
2240  **********************************************************************/
2241 static void
2242 ixgbe_setup_optics(struct adapter *adapter)
2243 {
2244         struct ixgbe_hw *hw = &adapter->hw;
2245         int             layer;
2246
2247         layer = ixgbe_get_supported_physical_layer(hw);
2248
2249         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
2250                 adapter->optics = IFM_10G_T;
2251                 return;
2252         }
2253
2254         if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
2255                 adapter->optics = IFM_1000_T;
2256                 return;
2257         }
2258
2259         if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
2260                 adapter->optics = IFM_1000_SX;
2261                 return;
2262         }
2263
2264         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
2265             IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
2266                 adapter->optics = IFM_10G_LR;
2267                 return;
2268         }
2269
2270         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
2271                 adapter->optics = IFM_10G_SR;
2272                 return;
2273         }
2274
2275         if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
2276                 adapter->optics = IFM_10G_TWINAX;
2277                 return;
2278         }
2279
2280         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
2281             IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
2282                 adapter->optics = IFM_10G_CX4;
2283                 return;
2284         }
2285
2286         /* If we get here just set the default */
2287         adapter->optics = IFM_ETHER | IFM_AUTO;
2288         return;
2289 }
2290
2291 /*********************************************************************
2292  *
2293  *  Setup the Legacy or MSI Interrupt handler
2294  *
2295  **********************************************************************/
2296 static int
2297 ixgbe_allocate_legacy(struct adapter *adapter)
2298 {
2299         device_t        dev = adapter->dev;
2300         struct          ix_queue *que = adapter->queues;
2301 #ifndef IXGBE_LEGACY_TX
2302         struct tx_ring          *txr = adapter->tx_rings;
2303 #endif
2304         int             error, rid = 0;
2305
2306         /* MSI RID at 1 */
2307         if (adapter->msix == 1)
2308                 rid = 1;
2309
2310         /* We allocate a single interrupt resource */
2311         adapter->res = bus_alloc_resource_any(dev,
2312             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2313         if (adapter->res == NULL) {
2314                 device_printf(dev, "Unable to allocate bus resource: "
2315                     "interrupt\n");
2316                 return (ENXIO);
2317         }
2318
2319         /*
2320          * Try allocating a fast interrupt and the associated deferred
2321          * processing contexts.
2322          */
2323 #ifndef IXGBE_LEGACY_TX
2324         TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr);
2325 #endif
2326         TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2327         que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,
2328             taskqueue_thread_enqueue, &que->tq);
2329         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s ixq",
2330             device_get_nameunit(adapter->dev));
2331
2332         /* Tasklets for Link, SFP and Multispeed Fiber */
2333         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2334         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2335         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2336 #ifdef IXGBE_FDIR
2337         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2338 #endif
2339         adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT,
2340             taskqueue_thread_enqueue, &adapter->tq);
2341         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2342             device_get_nameunit(adapter->dev));
2343
2344         if ((error = bus_setup_intr(dev, adapter->res,
2345             INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_legacy_irq,
2346             que, &adapter->tag)) != 0) {
2347                 device_printf(dev, "Failed to register fast interrupt "
2348                     "handler: %d\n", error);
2349                 taskqueue_free(que->tq);
2350                 taskqueue_free(adapter->tq);
2351                 que->tq = NULL;
2352                 adapter->tq = NULL;
2353                 return (error);
2354         }
2355         /* For simplicity in the handlers */
2356         adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
2357
2358         return (0);
2359 }
2360
2361
2362 /*********************************************************************
2363  *
2364  *  Setup MSIX Interrupt resources and handlers 
2365  *
2366  **********************************************************************/
2367 static int
2368 ixgbe_allocate_msix(struct adapter *adapter)
2369 {
2370         device_t        dev = adapter->dev;
2371         struct          ix_queue *que = adapter->queues;
2372         struct          tx_ring *txr = adapter->tx_rings;
2373         int             error, rid, vector = 0;
2374         int             cpu_id = 0;
2375
2376 #ifdef  RSS
2377         /*
2378          * If we're doing RSS, the number of queues needs to
2379          * match the number of RSS buckets that are configured.
2380          *
2381          * + If there's more queues than RSS buckets, we'll end
2382          *   up with queues that get no traffic.
2383          *
2384          * + If there's more RSS buckets than queues, we'll end
2385          *   up having multiple RSS buckets map to the same queue,
2386          *   so there'll be some contention.
2387          */
2388         if (adapter->num_queues != rss_getnumbuckets()) {
2389                 device_printf(dev,
2390                     "%s: number of queues (%d) != number of RSS buckets (%d)"
2391                     "; performance will be impacted.\n",
2392                     __func__,
2393                     adapter->num_queues,
2394                     rss_getnumbuckets());
2395         }
2396 #endif
2397
2398
2399
2400         for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
2401                 rid = vector + 1;
2402                 que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2403                     RF_SHAREABLE | RF_ACTIVE);
2404                 if (que->res == NULL) {
2405                         device_printf(dev,"Unable to allocate"
2406                             " bus resource: que interrupt [%d]\n", vector);
2407                         return (ENXIO);
2408                 }
2409                 /* Set the handler function */
2410                 error = bus_setup_intr(dev, que->res,
2411                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2412                     ixgbe_msix_que, que, &que->tag);
2413                 if (error) {
2414                         que->res = NULL;
2415                         device_printf(dev, "Failed to register QUE handler");
2416                         return (error);
2417                 }
2418 #if __FreeBSD_version >= 800504
2419                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2420 #endif
2421                 que->msix = vector;
2422                 adapter->que_mask |= (u64)(1 << que->msix);
2423 #ifdef  RSS
2424                 /*
2425                  * The queue ID is used as the RSS layer bucket ID.
2426                  * We look up the queue ID -> RSS CPU ID and select
2427                  * that.
2428                  */
2429                 cpu_id = rss_getcpu(i % rss_getnumbuckets());
2430 #else
2431                 /*
2432                  * Bind the msix vector, and thus the
2433                  * rings to the corresponding cpu.
2434                  *
2435                  * This just happens to match the default RSS round-robin
2436                  * bucket -> queue -> CPU allocation.
2437                  */
2438                 if (adapter->num_queues > 1)
2439                         cpu_id = i;
2440 #endif
2441                 if (adapter->num_queues > 1)
2442                         bus_bind_intr(dev, que->res, cpu_id);
2443
2444 #ifdef  RSS
2445                 device_printf(dev,
2446                     "Bound RSS bucket %d to CPU %d\n",
2447                     i, cpu_id);
2448 #else
2449                 device_printf(dev,
2450                     "Bound queue %d to cpu %d\n",
2451                     i, cpu_id);
2452 #endif
2453
2454
2455 #ifndef IXGBE_LEGACY_TX
2456                 TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr);
2457 #endif
2458                 TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2459                 que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,
2460                     taskqueue_thread_enqueue, &que->tq);
2461 #ifdef  RSS
2462                 taskqueue_start_threads_pinned(&que->tq, 1, PI_NET,
2463                     cpu_id,
2464                     "%s (bucket %d)",
2465                     device_get_nameunit(adapter->dev),
2466                     cpu_id);
2467 #else
2468                 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2469                     device_get_nameunit(adapter->dev));
2470 #endif
2471         }
2472
2473         /* and Link */
2474         rid = vector + 1;
2475         adapter->res = bus_alloc_resource_any(dev,
2476             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2477         if (!adapter->res) {
2478                 device_printf(dev,"Unable to allocate"
2479             " bus resource: Link interrupt [%d]\n", rid);
2480                 return (ENXIO);
2481         }
2482         /* Set the link handler function */
2483         error = bus_setup_intr(dev, adapter->res,
2484             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2485             ixgbe_msix_link, adapter, &adapter->tag);
2486         if (error) {
2487                 adapter->res = NULL;
2488                 device_printf(dev, "Failed to register LINK handler");
2489                 return (error);
2490         }
2491 #if __FreeBSD_version >= 800504
2492         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2493 #endif
2494         adapter->linkvec = vector;
2495         /* Tasklets for Link, SFP and Multispeed Fiber */
2496         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2497         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2498         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2499 #ifdef IXGBE_FDIR
2500         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2501 #endif
2502         adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT,
2503             taskqueue_thread_enqueue, &adapter->tq);
2504         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2505             device_get_nameunit(adapter->dev));
2506
2507         return (0);
2508 }
2509
2510 /*
2511  * Setup Either MSI/X or MSI
2512  */
2513 static int
2514 ixgbe_setup_msix(struct adapter *adapter)
2515 {
2516         device_t dev = adapter->dev;
2517         int rid, want, queues, msgs;
2518
2519         /* Override by tuneable */
2520         if (ixgbe_enable_msix == 0)
2521                 goto msi;
2522
2523         /* First try MSI/X */
2524         msgs = pci_msix_count(dev); 
2525         if (msgs == 0)
2526                 goto msi;
2527         rid = PCIR_BAR(MSIX_82598_BAR);
2528         adapter->msix_mem = bus_alloc_resource_any(dev,
2529             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2530         if (adapter->msix_mem == NULL) {
2531                 rid += 4;       /* 82599 maps in higher BAR */
2532                 adapter->msix_mem = bus_alloc_resource_any(dev,
2533                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2534         }
2535         if (adapter->msix_mem == NULL) {
2536                 /* May not be enabled */
2537                 device_printf(adapter->dev,
2538                     "Unable to map MSIX table \n");
2539                 goto msi;
2540         }
2541
2542         /* Figure out a reasonable auto config value */
2543         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2544 #ifdef  RSS
2545         /* If we're doing RSS, clamp at the number of RSS buckets */
2546         if (queues > rss_getnumbuckets())
2547                 queues = rss_getnumbuckets();
2548 #endif
2549
2550         if (ixgbe_num_queues != 0)
2551                 queues = ixgbe_num_queues;
2552         /* Set max queues to 8 when autoconfiguring */
2553         else if ((ixgbe_num_queues == 0) && (queues > 8))
2554                 queues = 8;
2555
2556         /* reflect correct sysctl value */
2557         ixgbe_num_queues = queues;
2558
2559         /*
2560         ** Want one vector (RX/TX pair) per queue
2561         ** plus an additional for Link.
2562         */
2563         want = queues + 1;
2564         if (msgs >= want)
2565                 msgs = want;
2566         else {
2567                 device_printf(adapter->dev,
2568                     "MSIX Configuration Problem, "
2569                     "%d vectors but %d queues wanted!\n",
2570                     msgs, want);
2571                 goto msi;
2572         }
2573         if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2574                 device_printf(adapter->dev,
2575                     "Using MSIX interrupts with %d vectors\n", msgs);
2576                 adapter->num_queues = queues;
2577                 return (msgs);
2578         }
2579         /*
2580         ** If MSIX alloc failed or provided us with
2581         ** less than needed, free and fall through to MSI
2582         */
2583         pci_release_msi(dev);
2584
2585 msi:
2586         if (adapter->msix_mem != NULL) {
2587                 bus_release_resource(dev, SYS_RES_MEMORY,
2588                     rid, adapter->msix_mem);
2589                 adapter->msix_mem = NULL;
2590         }
2591         msgs = 1;
2592         if (pci_alloc_msi(dev, &msgs) == 0) {
2593                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2594                 return (msgs);
2595         }
2596         device_printf(adapter->dev,"Using a Legacy interrupt\n");
2597         return (0);
2598 }
2599
2600
2601 static int
2602 ixgbe_allocate_pci_resources(struct adapter *adapter)
2603 {
2604         int             rid;
2605         device_t        dev = adapter->dev;
2606
2607         rid = PCIR_BAR(0);
2608         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2609             &rid, RF_ACTIVE);
2610
2611         if (!(adapter->pci_mem)) {
2612                 device_printf(dev,"Unable to allocate bus resource: memory\n");
2613                 return (ENXIO);
2614         }
2615
2616         adapter->osdep.mem_bus_space_tag =
2617                 rman_get_bustag(adapter->pci_mem);
2618         adapter->osdep.mem_bus_space_handle =
2619                 rman_get_bushandle(adapter->pci_mem);
2620         adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle;
2621
2622         /* Legacy defaults */
2623         adapter->num_queues = 1;
2624         adapter->hw.back = &adapter->osdep;
2625
2626         /*
2627         ** Now setup MSI or MSI/X, should
2628         ** return us the number of supported
2629         ** vectors. (Will be 1 for MSI)
2630         */
2631         adapter->msix = ixgbe_setup_msix(adapter);
2632         return (0);
2633 }
2634
2635 static void
2636 ixgbe_free_pci_resources(struct adapter * adapter)
2637 {
2638         struct          ix_queue *que = adapter->queues;
2639         device_t        dev = adapter->dev;
2640         int             rid, memrid;
2641
2642         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
2643                 memrid = PCIR_BAR(MSIX_82598_BAR);
2644         else
2645                 memrid = PCIR_BAR(MSIX_82599_BAR);
2646
2647         /*
2648         ** There is a slight possibility of a failure mode
2649         ** in attach that will result in entering this function
2650         ** before interrupt resources have been initialized, and
2651         ** in that case we do not want to execute the loops below
2652         ** We can detect this reliably by the state of the adapter
2653         ** res pointer.
2654         */
2655         if (adapter->res == NULL)
2656                 goto mem;
2657
2658         /*
2659         **  Release all msix queue resources:
2660         */
2661         for (int i = 0; i < adapter->num_queues; i++, que++) {
2662                 rid = que->msix + 1;
2663                 if (que->tag != NULL) {
2664                         bus_teardown_intr(dev, que->res, que->tag);
2665                         que->tag = NULL;
2666                 }
2667                 if (que->res != NULL)
2668                         bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
2669         }
2670
2671
2672         /* Clean the Legacy or Link interrupt last */
2673         if (adapter->linkvec) /* we are doing MSIX */
2674                 rid = adapter->linkvec + 1;
2675         else
2676                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2677
2678         if (adapter->tag != NULL) {
2679                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2680                 adapter->tag = NULL;
2681         }
2682         if (adapter->res != NULL)
2683                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2684
2685 mem:
2686         if (adapter->msix)
2687                 pci_release_msi(dev);
2688
2689         if (adapter->msix_mem != NULL)
2690                 bus_release_resource(dev, SYS_RES_MEMORY,
2691                     memrid, adapter->msix_mem);
2692
2693         if (adapter->pci_mem != NULL)
2694                 bus_release_resource(dev, SYS_RES_MEMORY,
2695                     PCIR_BAR(0), adapter->pci_mem);
2696
2697         return;
2698 }
2699
2700 /*********************************************************************
2701  *
2702  *  Setup networking device structure and register an interface.
2703  *
2704  **********************************************************************/
2705 static int
2706 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
2707 {
2708         struct ixgbe_hw *hw = &adapter->hw;
2709         struct ifnet   *ifp;
2710
2711         INIT_DEBUGOUT("ixgbe_setup_interface: begin");
2712
2713         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2714         if (ifp == NULL) {
2715                 device_printf(dev, "can not allocate ifnet structure\n");
2716                 return (-1);
2717         }
2718         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2719         ifp->if_baudrate = IF_Gbps(10);
2720         ifp->if_init = ixgbe_init;
2721         ifp->if_softc = adapter;
2722         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2723         ifp->if_ioctl = ixgbe_ioctl;
2724 #ifndef IXGBE_LEGACY_TX
2725         ifp->if_transmit = ixgbe_mq_start;
2726         ifp->if_qflush = ixgbe_qflush;
2727 #else
2728         ifp->if_start = ixgbe_start;
2729         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
2730         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2;
2731         IFQ_SET_READY(&ifp->if_snd);
2732 #endif
2733
2734         ether_ifattach(ifp, adapter->hw.mac.addr);
2735
2736         adapter->max_frame_size =
2737             ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
2738
2739         /*
2740          * Tell the upper layer(s) we support long frames.
2741          */
2742         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2743
2744         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWCSUM;
2745         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2746         ifp->if_capabilities |= IFCAP_LRO;
2747         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2748                              |  IFCAP_VLAN_HWTSO
2749                              |  IFCAP_VLAN_MTU
2750                              |  IFCAP_HWSTATS;
2751         ifp->if_capenable = ifp->if_capabilities;
2752
2753         /*
2754         ** Don't turn this on by default, if vlans are
2755         ** created on another pseudo device (eg. lagg)
2756         ** then vlan events are not passed thru, breaking
2757         ** operation, but with HW FILTER off it works. If
2758         ** using vlans directly on the ixgbe driver you can
2759         ** enable this and get full hardware tag filtering.
2760         */
2761         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2762
2763         /*
2764          * Specify the media types supported by this adapter and register
2765          * callbacks to update media and link information
2766          */
2767         ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
2768                      ixgbe_media_status);
2769         ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
2770         ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
2771         if (hw->device_id == IXGBE_DEV_ID_82598AT) {
2772                 ifmedia_add(&adapter->media,
2773                     IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2774                 ifmedia_add(&adapter->media,
2775                     IFM_ETHER | IFM_1000_T, 0, NULL);
2776         }
2777         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2778         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2779
2780         return (0);
2781 }
2782
2783 static void
2784 ixgbe_config_link(struct adapter *adapter)
2785 {
2786         struct ixgbe_hw *hw = &adapter->hw;
2787         u32     autoneg, err = 0;
2788         bool    sfp, negotiate;
2789
2790         sfp = ixgbe_is_sfp(hw);
2791
2792         if (sfp) { 
2793                 if (hw->phy.multispeed_fiber) {
2794                         hw->mac.ops.setup_sfp(hw);
2795                         ixgbe_enable_tx_laser(hw);
2796                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
2797                 } else
2798                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
2799         } else {
2800                 if (hw->mac.ops.check_link)
2801                         err = ixgbe_check_link(hw, &adapter->link_speed,
2802                             &adapter->link_up, FALSE);
2803                 if (err)
2804                         goto out;
2805                 autoneg = hw->phy.autoneg_advertised;
2806                 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
2807                         err  = hw->mac.ops.get_link_capabilities(hw,
2808                             &autoneg, &negotiate);
2809                 if (err)
2810                         goto out;
2811                 if (hw->mac.ops.setup_link)
2812                         err = hw->mac.ops.setup_link(hw,
2813                             autoneg, adapter->link_up);
2814         }
2815 out:
2816         return;
2817 }
2818
2819 /********************************************************************
2820  * Manage DMA'able memory.
2821  *******************************************************************/
2822 static void
2823 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2824 {
2825         if (error)
2826                 return;
2827         *(bus_addr_t *) arg = segs->ds_addr;
2828         return;
2829 }
2830
2831 static int
2832 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2833                 struct ixgbe_dma_alloc *dma, int mapflags)
2834 {
2835         device_t dev = adapter->dev;
2836         int             r;
2837
2838         r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),   /* parent */
2839                                DBA_ALIGN, 0,    /* alignment, bounds */
2840                                BUS_SPACE_MAXADDR,       /* lowaddr */
2841                                BUS_SPACE_MAXADDR,       /* highaddr */
2842                                NULL, NULL,      /* filter, filterarg */
2843                                size,    /* maxsize */
2844                                1,       /* nsegments */
2845                                size,    /* maxsegsize */
2846                                BUS_DMA_ALLOCNOW,        /* flags */
2847                                NULL,    /* lockfunc */
2848                                NULL,    /* lockfuncarg */
2849                                &dma->dma_tag);
2850         if (r != 0) {
2851                 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2852                        "error %u\n", r);
2853                 goto fail_0;
2854         }
2855         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2856                              BUS_DMA_NOWAIT, &dma->dma_map);
2857         if (r != 0) {
2858                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2859                        "error %u\n", r);
2860                 goto fail_1;
2861         }
2862         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2863                             size,
2864                             ixgbe_dmamap_cb,
2865                             &dma->dma_paddr,
2866                             mapflags | BUS_DMA_NOWAIT);
2867         if (r != 0) {
2868                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2869                        "error %u\n", r);
2870                 goto fail_2;
2871         }
2872         dma->dma_size = size;
2873         return (0);
2874 fail_2:
2875         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2876 fail_1:
2877         bus_dma_tag_destroy(dma->dma_tag);
2878 fail_0:
2879         dma->dma_tag = NULL;
2880         return (r);
2881 }
2882
2883 static void
2884 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2885 {
2886         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2887             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2888         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2889         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2890         bus_dma_tag_destroy(dma->dma_tag);
2891 }
2892
2893
2894 /*********************************************************************
2895  *
2896  *  Allocate memory for the transmit and receive rings, and then
2897  *  the descriptors associated with each, called only once at attach.
2898  *
2899  **********************************************************************/
2900 static int
2901 ixgbe_allocate_queues(struct adapter *adapter)
2902 {
2903         device_t        dev = adapter->dev;
2904         struct ix_queue *que;
2905         struct tx_ring  *txr;
2906         struct rx_ring  *rxr;
2907         int rsize, tsize, error = IXGBE_SUCCESS;
2908         int txconf = 0, rxconf = 0;
2909
2910         /* First allocate the top level queue structs */
2911         if (!(adapter->queues =
2912             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2913             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2914                 device_printf(dev, "Unable to allocate queue memory\n");
2915                 error = ENOMEM;
2916                 goto fail;
2917         }
2918
2919         /* First allocate the TX ring struct memory */
2920         if (!(adapter->tx_rings =
2921             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2922             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2923                 device_printf(dev, "Unable to allocate TX ring memory\n");
2924                 error = ENOMEM;
2925                 goto tx_fail;
2926         }
2927
2928         /* Next allocate the RX */
2929         if (!(adapter->rx_rings =
2930             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2931             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2932                 device_printf(dev, "Unable to allocate RX ring memory\n");
2933                 error = ENOMEM;
2934                 goto rx_fail;
2935         }
2936
2937         /* For the ring itself */
2938         tsize = roundup2(adapter->num_tx_desc *
2939             sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2940
2941         /*
2942          * Now set up the TX queues, txconf is needed to handle the
2943          * possibility that things fail midcourse and we need to
2944          * undo memory gracefully
2945          */ 
2946         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2947                 /* Set up some basics */
2948                 txr = &adapter->tx_rings[i];
2949                 txr->adapter = adapter;
2950                 txr->me = i;
2951                 txr->num_desc = adapter->num_tx_desc;
2952
2953                 /* Initialize the TX side lock */
2954                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2955                     device_get_nameunit(dev), txr->me);
2956                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2957
2958                 if (ixgbe_dma_malloc(adapter, tsize,
2959                         &txr->txdma, BUS_DMA_NOWAIT)) {
2960                         device_printf(dev,
2961                             "Unable to allocate TX Descriptor memory\n");
2962                         error = ENOMEM;
2963                         goto err_tx_desc;
2964                 }
2965                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2966                 bzero((void *)txr->tx_base, tsize);
2967
2968                 /* Now allocate transmit buffers for the ring */
2969                 if (ixgbe_allocate_transmit_buffers(txr)) {
2970                         device_printf(dev,
2971                             "Critical Failure setting up transmit buffers\n");
2972                         error = ENOMEM;
2973                         goto err_tx_desc;
2974                 }
2975 #ifndef IXGBE_LEGACY_TX
2976                 /* Allocate a buf ring */
2977                 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2978                     M_WAITOK, &txr->tx_mtx);
2979                 if (txr->br == NULL) {
2980                         device_printf(dev,
2981                             "Critical Failure setting up buf ring\n");
2982                         error = ENOMEM;
2983                         goto err_tx_desc;
2984                 }
2985 #endif
2986         }
2987
2988         /*
2989          * Next the RX queues...
2990          */ 
2991         rsize = roundup2(adapter->num_rx_desc *
2992             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2993         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2994                 rxr = &adapter->rx_rings[i];
2995                 /* Set up some basics */
2996                 rxr->adapter = adapter;
2997                 rxr->me = i;
2998                 rxr->num_desc = adapter->num_rx_desc;
2999
3000                 /* Initialize the RX side lock */
3001                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3002                     device_get_nameunit(dev), rxr->me);
3003                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3004
3005                 if (ixgbe_dma_malloc(adapter, rsize,
3006                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3007                         device_printf(dev,
3008                             "Unable to allocate RxDescriptor memory\n");
3009                         error = ENOMEM;
3010                         goto err_rx_desc;
3011                 }
3012                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3013                 bzero((void *)rxr->rx_base, rsize);
3014
3015                 /* Allocate receive buffers for the ring*/
3016                 if (ixgbe_allocate_receive_buffers(rxr)) {
3017                         device_printf(dev,
3018                             "Critical Failure setting up receive buffers\n");
3019                         error = ENOMEM;
3020                         goto err_rx_desc;
3021                 }
3022         }
3023
3024         /*
3025         ** Finally set up the queue holding structs
3026         */
3027         for (int i = 0; i < adapter->num_queues; i++) {
3028                 que = &adapter->queues[i];
3029                 que->adapter = adapter;
3030                 que->txr = &adapter->tx_rings[i];
3031                 que->rxr = &adapter->rx_rings[i];
3032         }
3033
3034         return (0);
3035
3036 err_rx_desc:
3037         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3038                 ixgbe_dma_free(adapter, &rxr->rxdma);
3039 err_tx_desc:
3040         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3041                 ixgbe_dma_free(adapter, &txr->txdma);
3042         free(adapter->rx_rings, M_DEVBUF);
3043 rx_fail:
3044         free(adapter->tx_rings, M_DEVBUF);
3045 tx_fail:
3046         free(adapter->queues, M_DEVBUF);
3047 fail:
3048         return (error);
3049 }
3050
3051 /*********************************************************************
3052  *
3053  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3054  *  the information needed to transmit a packet on the wire. This is
3055  *  called only once at attach, setup is done every reset.
3056  *
3057  **********************************************************************/
3058 static int
3059 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
3060 {
3061         struct adapter *adapter = txr->adapter;
3062         device_t dev = adapter->dev;
3063         struct ixgbe_tx_buf *txbuf;
3064         int error, i;
3065
3066         /*
3067          * Setup DMA descriptor areas.
3068          */
3069         if ((error = bus_dma_tag_create(
3070                                bus_get_dma_tag(adapter->dev),   /* parent */
3071                                1, 0,            /* alignment, bounds */
3072                                BUS_SPACE_MAXADDR,       /* lowaddr */
3073                                BUS_SPACE_MAXADDR,       /* highaddr */
3074                                NULL, NULL,              /* filter, filterarg */
3075                                IXGBE_TSO_SIZE,          /* maxsize */
3076                                adapter->num_segs,       /* nsegments */
3077                                PAGE_SIZE,               /* maxsegsize */
3078                                0,                       /* flags */
3079                                NULL,                    /* lockfunc */
3080                                NULL,                    /* lockfuncarg */
3081                                &txr->txtag))) {
3082                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3083                 goto fail;
3084         }
3085
3086         if (!(txr->tx_buffers =
3087             (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
3088             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3089                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3090                 error = ENOMEM;
3091                 goto fail;
3092         }
3093
3094         /* Create the descriptor buffer dma maps */
3095         txbuf = txr->tx_buffers;
3096         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3097                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3098                 if (error != 0) {
3099                         device_printf(dev, "Unable to create TX DMA map\n");
3100                         goto fail;
3101                 }
3102         }
3103
3104         return 0;
3105 fail:
3106         /* We free all, it handles case where we are in the middle */
3107         ixgbe_free_transmit_structures(adapter);
3108         return (error);
3109 }
3110
3111 /*********************************************************************
3112  *
3113  *  Initialize a transmit ring.
3114  *
3115  **********************************************************************/
3116 static void
3117 ixgbe_setup_transmit_ring(struct tx_ring *txr)
3118 {
3119         struct adapter *adapter = txr->adapter;
3120         struct ixgbe_tx_buf *txbuf;
3121         int i;
3122 #ifdef DEV_NETMAP
3123         struct netmap_adapter *na = NA(adapter->ifp);
3124         struct netmap_slot *slot;
3125 #endif /* DEV_NETMAP */
3126
3127         /* Clear the old ring contents */
3128         IXGBE_TX_LOCK(txr);
3129 #ifdef DEV_NETMAP
3130         /*
3131          * (under lock): if in netmap mode, do some consistency
3132          * checks and set slot to entry 0 of the netmap ring.
3133          */
3134         slot = netmap_reset(na, NR_TX, txr->me, 0);
3135 #endif /* DEV_NETMAP */
3136         bzero((void *)txr->tx_base,
3137               (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
3138         /* Reset indices */
3139         txr->next_avail_desc = 0;
3140         txr->next_to_clean = 0;
3141
3142         /* Free any existing tx buffers. */
3143         txbuf = txr->tx_buffers;
3144         for (i = 0; i < txr->num_desc; i++, txbuf++) {
3145                 if (txbuf->m_head != NULL) {
3146                         bus_dmamap_sync(txr->txtag, txbuf->map,
3147                             BUS_DMASYNC_POSTWRITE);
3148                         bus_dmamap_unload(txr->txtag, txbuf->map);
3149                         m_freem(txbuf->m_head);
3150                         txbuf->m_head = NULL;
3151                 }
3152 #ifdef DEV_NETMAP
3153                 /*
3154                  * In netmap mode, set the map for the packet buffer.
3155                  * NOTE: Some drivers (not this one) also need to set
3156                  * the physical buffer address in the NIC ring.
3157                  * Slots in the netmap ring (indexed by "si") are
3158                  * kring->nkr_hwofs positions "ahead" wrt the
3159                  * corresponding slot in the NIC ring. In some drivers
3160                  * (not here) nkr_hwofs can be negative. Function
3161                  * netmap_idx_n2k() handles wraparounds properly.
3162                  */
3163                 if (slot) {
3164                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3165                         netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3166                 }
3167 #endif /* DEV_NETMAP */
3168                 /* Clear the EOP descriptor pointer */
3169                 txbuf->eop = NULL;
3170         }
3171
3172 #ifdef IXGBE_FDIR
3173         /* Set the rate at which we sample packets */
3174         if (adapter->hw.mac.type != ixgbe_mac_82598EB)
3175                 txr->atr_sample = atr_sample_rate;
3176 #endif
3177
3178         /* Set number of descriptors available */
3179         txr->tx_avail = adapter->num_tx_desc;
3180
3181         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3182             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3183         IXGBE_TX_UNLOCK(txr);
3184 }
3185
3186 /*********************************************************************
3187  *
3188  *  Initialize all transmit rings.
3189  *
3190  **********************************************************************/
3191 static int
3192 ixgbe_setup_transmit_structures(struct adapter *adapter)
3193 {
3194         struct tx_ring *txr = adapter->tx_rings;
3195
3196         for (int i = 0; i < adapter->num_queues; i++, txr++)
3197                 ixgbe_setup_transmit_ring(txr);
3198
3199         return (0);
3200 }
3201
3202 /*********************************************************************
3203  *
3204  *  Enable transmit unit.
3205  *
3206  **********************************************************************/
3207 static void
3208 ixgbe_initialize_transmit_units(struct adapter *adapter)
3209 {
3210         struct tx_ring  *txr = adapter->tx_rings;
3211         struct ixgbe_hw *hw = &adapter->hw;
3212
3213         /* Setup the Base and Length of the Tx Descriptor Ring */
3214
3215         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3216                 u64     tdba = txr->txdma.dma_paddr;
3217                 u32     txctrl;
3218
3219                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
3220                        (tdba & 0x00000000ffffffffULL));
3221                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
3222                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
3223                     adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
3224
3225                 /* Setup the HW Tx Head and Tail descriptor pointers */
3226                 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
3227                 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
3228
3229                 /* Setup Transmit Descriptor Cmd Settings */
3230                 txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
3231                 txr->queue_status = IXGBE_QUEUE_IDLE;
3232
3233                 /* Set the processing limit */
3234                 txr->process_limit = ixgbe_tx_process_limit;
3235
3236                 /* Disable Head Writeback */
3237                 switch (hw->mac.type) {
3238                 case ixgbe_mac_82598EB:
3239                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
3240                         break;
3241                 case ixgbe_mac_82599EB:
3242                 case ixgbe_mac_X540:
3243                 default:
3244                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
3245                         break;
3246                 }
3247                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3248                 switch (hw->mac.type) {
3249                 case ixgbe_mac_82598EB:
3250                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
3251                         break;
3252                 case ixgbe_mac_82599EB:
3253                 case ixgbe_mac_X540:
3254                 default:
3255                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
3256                         break;
3257                 }
3258
3259         }
3260
3261         if (hw->mac.type != ixgbe_mac_82598EB) {
3262                 u32 dmatxctl, rttdcs;
3263                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3264                 dmatxctl |= IXGBE_DMATXCTL_TE;
3265                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3266                 /* Disable arbiter to set MTQC */
3267                 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3268                 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3269                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3270                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
3271                 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3272                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3273         }
3274
3275         return;
3276 }
3277
3278 /*********************************************************************
3279  *
3280  *  Free all transmit rings.
3281  *
3282  **********************************************************************/
3283 static void
3284 ixgbe_free_transmit_structures(struct adapter *adapter)
3285 {
3286         struct tx_ring *txr = adapter->tx_rings;
3287
3288         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3289                 IXGBE_TX_LOCK(txr);
3290                 ixgbe_free_transmit_buffers(txr);
3291                 ixgbe_dma_free(adapter, &txr->txdma);
3292                 IXGBE_TX_UNLOCK(txr);
3293                 IXGBE_TX_LOCK_DESTROY(txr);
3294         }
3295         free(adapter->tx_rings, M_DEVBUF);
3296 }
3297
3298 /*********************************************************************
3299  *
3300  *  Free transmit ring related data structures.
3301  *
3302  **********************************************************************/
3303 static void
3304 ixgbe_free_transmit_buffers(struct tx_ring *txr)
3305 {
3306         struct adapter *adapter = txr->adapter;
3307         struct ixgbe_tx_buf *tx_buffer;
3308         int             i;
3309
3310         INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
3311
3312         if (txr->tx_buffers == NULL)
3313                 return;
3314
3315         tx_buffer = txr->tx_buffers;
3316         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3317                 if (tx_buffer->m_head != NULL) {
3318                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3319                             BUS_DMASYNC_POSTWRITE);
3320                         bus_dmamap_unload(txr->txtag,
3321                             tx_buffer->map);
3322                         m_freem(tx_buffer->m_head);
3323                         tx_buffer->m_head = NULL;
3324                         if (tx_buffer->map != NULL) {
3325                                 bus_dmamap_destroy(txr->txtag,
3326                                     tx_buffer->map);
3327                                 tx_buffer->map = NULL;
3328                         }
3329                 } else if (tx_buffer->map != NULL) {
3330                         bus_dmamap_unload(txr->txtag,
3331                             tx_buffer->map);
3332                         bus_dmamap_destroy(txr->txtag,
3333                             tx_buffer->map);
3334                         tx_buffer->map = NULL;
3335                 }
3336         }
3337 #ifdef IXGBE_LEGACY_TX
3338         if (txr->br != NULL)
3339                 buf_ring_free(txr->br, M_DEVBUF);
3340 #endif
3341         if (txr->tx_buffers != NULL) {
3342                 free(txr->tx_buffers, M_DEVBUF);
3343                 txr->tx_buffers = NULL;
3344         }
3345         if (txr->txtag != NULL) {
3346                 bus_dma_tag_destroy(txr->txtag);
3347                 txr->txtag = NULL;
3348         }
3349         return;
3350 }
3351
3352 /*********************************************************************
3353  *
3354  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3355  *
3356  **********************************************************************/
3357
3358 static int
3359 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3360     u32 *cmd_type_len, u32 *olinfo_status)
3361 {
3362         struct ixgbe_adv_tx_context_desc *TXD;
3363         struct ether_vlan_header *eh;
3364         struct ip *ip;
3365         struct ip6_hdr *ip6;
3366         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3367         int     ehdrlen, ip_hlen = 0;
3368         u16     etype;
3369         u8      ipproto = 0;
3370         int     offload = TRUE;
3371         int     ctxd = txr->next_avail_desc;
3372         u16     vtag = 0;
3373
3374         /* First check if TSO is to be used */
3375         if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3376                 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3377
3378         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3379                 offload = FALSE;
3380
3381         /* Indicate the whole packet as payload when not doing TSO */
3382         *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
3383
3384         /* Now ready a context descriptor */
3385         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3386
3387         /*
3388         ** In advanced descriptors the vlan tag must 
3389         ** be placed into the context descriptor. Hence
3390         ** we need to make one even if not doing offloads.
3391         */
3392         if (mp->m_flags & M_VLANTAG) {
3393                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3394                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3395         } else if (offload == FALSE) /* ... no offload to do */
3396                 return (0);
3397
3398         /*
3399          * Determine where frame payload starts.
3400          * Jump over vlan headers if already present,
3401          * helpful for QinQ too.
3402          */
3403         eh = mtod(mp, struct ether_vlan_header *);
3404         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3405                 etype = ntohs(eh->evl_proto);
3406                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3407         } else {
3408                 etype = ntohs(eh->evl_encap_proto);
3409                 ehdrlen = ETHER_HDR_LEN;
3410         }
3411
3412         /* Set the ether header length */
3413         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3414
3415         switch (etype) {
3416                 case ETHERTYPE_IP:
3417                         ip = (struct ip *)(mp->m_data + ehdrlen);
3418                         ip_hlen = ip->ip_hl << 2;
3419                         ipproto = ip->ip_p;
3420                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3421                         break;
3422                 case ETHERTYPE_IPV6:
3423                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3424                         ip_hlen = sizeof(struct ip6_hdr);
3425                         /* XXX-BZ this will go badly in case of ext hdrs. */
3426                         ipproto = ip6->ip6_nxt;
3427                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3428                         break;
3429                 default:
3430                         offload = FALSE;
3431                         break;
3432         }
3433
3434         vlan_macip_lens |= ip_hlen;
3435         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3436
3437         switch (ipproto) {
3438                 case IPPROTO_TCP:
3439                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3440                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3441                         break;
3442
3443                 case IPPROTO_UDP:
3444                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3445                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
3446                         break;
3447
3448 #if __FreeBSD_version >= 800000
3449                 case IPPROTO_SCTP:
3450                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3451                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
3452                         break;
3453 #endif
3454                 default:
3455                         offload = FALSE;
3456                         break;
3457         }
3458
3459         if (offload) /* For the TX descriptor setup */
3460                 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3461
3462         /* Now copy bits into descriptor */
3463         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3464         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3465         TXD->seqnum_seed = htole32(0);
3466         TXD->mss_l4len_idx = htole32(0);
3467
3468         /* We've consumed the first desc, adjust counters */
3469         if (++ctxd == txr->num_desc)
3470                 ctxd = 0;
3471         txr->next_avail_desc = ctxd;
3472         --txr->tx_avail;
3473
3474         return (0);
3475 }
3476
3477 /**********************************************************************
3478  *
3479  *  Setup work for hardware segmentation offload (TSO) on
3480  *  adapters using advanced tx descriptors
3481  *
3482  **********************************************************************/
3483 static int
3484 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3485     u32 *cmd_type_len, u32 *olinfo_status)
3486 {
3487         struct ixgbe_adv_tx_context_desc *TXD;
3488         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3489         u32 mss_l4len_idx = 0, paylen;
3490         u16 vtag = 0, eh_type;
3491         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3492         struct ether_vlan_header *eh;
3493 #ifdef INET6
3494         struct ip6_hdr *ip6;
3495 #endif
3496 #ifdef INET
3497         struct ip *ip;
3498 #endif
3499         struct tcphdr *th;
3500
3501
3502         /*
3503          * Determine where frame payload starts.
3504          * Jump over vlan headers if already present
3505          */
3506         eh = mtod(mp, struct ether_vlan_header *);
3507         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3508                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3509                 eh_type = eh->evl_proto;
3510         } else {
3511                 ehdrlen = ETHER_HDR_LEN;
3512                 eh_type = eh->evl_encap_proto;
3513         }
3514
3515         switch (ntohs(eh_type)) {
3516 #ifdef INET6
3517         case ETHERTYPE_IPV6:
3518                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3519                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3520                 if (ip6->ip6_nxt != IPPROTO_TCP)
3521                         return (ENXIO);
3522                 ip_hlen = sizeof(struct ip6_hdr);
3523                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3524                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3525                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3526                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3527                 break;
3528 #endif
3529 #ifdef INET
3530         case ETHERTYPE_IP:
3531                 ip = (struct ip *)(mp->m_data + ehdrlen);
3532                 if (ip->ip_p != IPPROTO_TCP)
3533                         return (ENXIO);
3534                 ip->ip_sum = 0;
3535                 ip_hlen = ip->ip_hl << 2;
3536                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3537                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3538                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3539                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3540                 /* Tell transmit desc to also do IPv4 checksum. */
3541                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3542                 break;
3543 #endif
3544         default:
3545                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3546                     __func__, ntohs(eh_type));
3547                 break;
3548         }
3549
3550         ctxd = txr->next_avail_desc;
3551         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3552
3553         tcp_hlen = th->th_off << 2;
3554
3555         /* This is used in the transmit desc in encap */
3556         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3557
3558         /* VLAN MACLEN IPLEN */
3559         if (mp->m_flags & M_VLANTAG) {
3560                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3561                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3562         }
3563
3564         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3565         vlan_macip_lens |= ip_hlen;
3566         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3567
3568         /* ADV DTYPE TUCMD */
3569         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3570         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3571         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3572
3573         /* MSS L4LEN IDX */
3574         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
3575         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
3576         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3577
3578         TXD->seqnum_seed = htole32(0);
3579
3580         if (++ctxd == txr->num_desc)
3581                 ctxd = 0;
3582
3583         txr->tx_avail--;
3584         txr->next_avail_desc = ctxd;
3585         *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
3586         *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3587         *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
3588         ++txr->tso_tx;
3589         return (0);
3590 }
3591
3592 #ifdef IXGBE_FDIR
3593 /*
3594 ** This routine parses packet headers so that Flow
3595 ** Director can make a hashed filter table entry 
3596 ** allowing traffic flows to be identified and kept
3597 ** on the same cpu.  This would be a performance
3598 ** hit, but we only do it at IXGBE_FDIR_RATE of
3599 ** packets.
3600 */
3601 static void
3602 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
3603 {
3604         struct adapter                  *adapter = txr->adapter;
3605         struct ix_queue                 *que;
3606         struct ip                       *ip;
3607         struct tcphdr                   *th;
3608         struct udphdr                   *uh;
3609         struct ether_vlan_header        *eh;
3610         union ixgbe_atr_hash_dword      input = {.dword = 0}; 
3611         union ixgbe_atr_hash_dword      common = {.dword = 0}; 
3612         int                             ehdrlen, ip_hlen;
3613         u16                             etype;
3614
3615         eh = mtod(mp, struct ether_vlan_header *);
3616         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3617                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3618                 etype = eh->evl_proto;
3619         } else {
3620                 ehdrlen = ETHER_HDR_LEN;
3621                 etype = eh->evl_encap_proto;
3622         }
3623
3624         /* Only handling IPv4 */
3625         if (etype != htons(ETHERTYPE_IP))
3626                 return;
3627
3628         ip = (struct ip *)(mp->m_data + ehdrlen);
3629         ip_hlen = ip->ip_hl << 2;
3630
3631         /* check if we're UDP or TCP */
3632         switch (ip->ip_p) {
3633         case IPPROTO_TCP:
3634                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3635                 /* src and dst are inverted */
3636                 common.port.dst ^= th->th_sport;
3637                 common.port.src ^= th->th_dport;
3638                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
3639                 break;
3640         case IPPROTO_UDP:
3641                 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
3642                 /* src and dst are inverted */
3643                 common.port.dst ^= uh->uh_sport;
3644                 common.port.src ^= uh->uh_dport;
3645                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
3646                 break;
3647         default:
3648                 return;
3649         }
3650
3651         input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
3652         if (mp->m_pkthdr.ether_vtag)
3653                 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
3654         else
3655                 common.flex_bytes ^= etype;
3656         common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
3657
3658         que = &adapter->queues[txr->me];
3659         /*
3660         ** This assumes the Rx queue and Tx
3661         ** queue are bound to the same CPU
3662         */
3663         ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
3664             input, common, que->msix);
3665 }
3666 #endif /* IXGBE_FDIR */
3667
3668 /**********************************************************************
3669  *
3670  *  Examine each tx_buffer in the used queue. If the hardware is done
3671  *  processing the packet then free associated resources. The
3672  *  tx_buffer is put back on the free queue.
3673  *
3674  **********************************************************************/
3675 static void
3676 ixgbe_txeof(struct tx_ring *txr)
3677 {
3678 #ifdef DEV_NETMAP
3679         struct adapter          *adapter = txr->adapter;
3680         struct ifnet            *ifp = adapter->ifp;
3681 #endif
3682         u32                     work, processed = 0;
3683         u16                     limit = txr->process_limit;
3684         struct ixgbe_tx_buf     *buf;
3685         union ixgbe_adv_tx_desc *txd;
3686
3687         mtx_assert(&txr->tx_mtx, MA_OWNED);
3688
3689 #ifdef DEV_NETMAP
3690         if (ifp->if_capenable & IFCAP_NETMAP) {
3691                 struct netmap_adapter *na = NA(ifp);
3692                 struct netmap_kring *kring = &na->tx_rings[txr->me];
3693                 txd = txr->tx_base;
3694                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3695                     BUS_DMASYNC_POSTREAD);
3696                 /*
3697                  * In netmap mode, all the work is done in the context
3698                  * of the client thread. Interrupt handlers only wake up
3699                  * clients, which may be sleeping on individual rings
3700                  * or on a global resource for all rings.
3701                  * To implement tx interrupt mitigation, we wake up the client
3702                  * thread roughly every half ring, even if the NIC interrupts
3703                  * more frequently. This is implemented as follows:
3704                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
3705                  *   the slot that should wake up the thread (nkr_num_slots
3706                  *   means the user thread should not be woken up);
3707                  * - the driver ignores tx interrupts unless netmap_mitigate=0
3708                  *   or the slot has the DD bit set.
3709                  */
3710                 if (!netmap_mitigate ||
3711                     (kring->nr_kflags < kring->nkr_num_slots &&
3712                     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
3713                         netmap_tx_irq(ifp, txr->me);
3714                 }
3715                 return;
3716         }
3717 #endif /* DEV_NETMAP */
3718
3719         if (txr->tx_avail == txr->num_desc) {
3720                 txr->queue_status = IXGBE_QUEUE_IDLE;
3721                 return;
3722         }
3723
3724         /* Get work starting point */
3725         work = txr->next_to_clean;
3726         buf = &txr->tx_buffers[work];
3727         txd = &txr->tx_base[work];
3728         work -= txr->num_desc; /* The distance to ring end */
3729         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3730             BUS_DMASYNC_POSTREAD);
3731
3732         do {
3733                 union ixgbe_adv_tx_desc *eop= buf->eop;
3734                 if (eop == NULL) /* No work */
3735                         break;
3736
3737                 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
3738                         break;  /* I/O not complete */
3739
3740                 if (buf->m_head) {
3741                         txr->bytes +=
3742                             buf->m_head->m_pkthdr.len;
3743                         bus_dmamap_sync(txr->txtag,
3744                             buf->map,
3745                             BUS_DMASYNC_POSTWRITE);
3746                         bus_dmamap_unload(txr->txtag,
3747                             buf->map);
3748                         m_freem(buf->m_head);
3749                         buf->m_head = NULL;
3750                         buf->map = NULL;
3751                 }
3752                 buf->eop = NULL;
3753                 ++txr->tx_avail;
3754
3755                 /* We clean the range if multi segment */
3756                 while (txd != eop) {
3757                         ++txd;
3758                         ++buf;
3759                         ++work;
3760                         /* wrap the ring? */
3761                         if (__predict_false(!work)) {
3762                                 work -= txr->num_desc;
3763                                 buf = txr->tx_buffers;
3764                                 txd = txr->tx_base;
3765                         }
3766                         if (buf->m_head) {
3767                                 txr->bytes +=
3768                                     buf->m_head->m_pkthdr.len;
3769                                 bus_dmamap_sync(txr->txtag,
3770                                     buf->map,
3771                                     BUS_DMASYNC_POSTWRITE);
3772                                 bus_dmamap_unload(txr->txtag,
3773                                     buf->map);
3774                                 m_freem(buf->m_head);
3775                                 buf->m_head = NULL;
3776                                 buf->map = NULL;
3777                         }
3778                         ++txr->tx_avail;
3779                         buf->eop = NULL;
3780
3781                 }
3782                 ++txr->packets;
3783                 ++processed;
3784                 txr->watchdog_time = ticks;
3785
3786                 /* Try the next packet */
3787                 ++txd;
3788                 ++buf;
3789                 ++work;
3790                 /* reset with a wrap */
3791                 if (__predict_false(!work)) {
3792                         work -= txr->num_desc;
3793                         buf = txr->tx_buffers;
3794                         txd = txr->tx_base;
3795                 }
3796                 prefetch(txd);
3797         } while (__predict_true(--limit));
3798
3799         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3800             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3801
3802         work += txr->num_desc;
3803         txr->next_to_clean = work;
3804
3805         /*
3806         ** Watchdog calculation, we know there's
3807         ** work outstanding or the first return
3808         ** would have been taken, so none processed
3809         ** for too long indicates a hang.
3810         */
3811         if ((!processed) && ((ticks - txr->watchdog_time) > IXGBE_WATCHDOG))
3812                 txr->queue_status = IXGBE_QUEUE_HUNG;
3813
3814         if (txr->tx_avail == txr->num_desc)
3815                 txr->queue_status = IXGBE_QUEUE_IDLE;
3816
3817         return;
3818 }
3819
3820 /*********************************************************************
3821  *
3822  *  Refresh mbuf buffers for RX descriptor rings
3823  *   - now keeps its own state so discards due to resource
3824  *     exhaustion are unnecessary, if an mbuf cannot be obtained
3825  *     it just returns, keeping its placeholder, thus it can simply
3826  *     be recalled to try again.
3827  *
3828  **********************************************************************/
3829 static void
3830 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
3831 {
3832         struct adapter          *adapter = rxr->adapter;
3833         bus_dma_segment_t       seg[1];
3834         struct ixgbe_rx_buf     *rxbuf;
3835         struct mbuf             *mp;
3836         int                     i, j, nsegs, error;
3837         bool                    refreshed = FALSE;
3838
3839         i = j = rxr->next_to_refresh;
3840         /* Control the loop with one beyond */
3841         if (++j == rxr->num_desc)
3842                 j = 0;
3843
3844         while (j != limit) {
3845                 rxbuf = &rxr->rx_buffers[i];
3846                 if (rxbuf->buf == NULL) {
3847                         mp = m_getjcl(M_NOWAIT, MT_DATA,
3848                             M_PKTHDR, rxr->mbuf_sz);
3849                         if (mp == NULL)
3850                                 goto update;
3851                         if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3852                                 m_adj(mp, ETHER_ALIGN);
3853                 } else
3854                         mp = rxbuf->buf;
3855
3856                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
3857
3858                 /* If we're dealing with an mbuf that was copied rather
3859                  * than replaced, there's no need to go through busdma.
3860                  */
3861                 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
3862                         /* Get the memory mapping */
3863                         error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3864                             rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
3865                         if (error != 0) {
3866                                 printf("Refresh mbufs: payload dmamap load"
3867                                     " failure - %d\n", error);
3868                                 m_free(mp);
3869                                 rxbuf->buf = NULL;
3870                                 goto update;
3871                         }
3872                         rxbuf->buf = mp;
3873                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3874                             BUS_DMASYNC_PREREAD);
3875                         rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
3876                             htole64(seg[0].ds_addr);
3877                 } else {
3878                         rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
3879                         rxbuf->flags &= ~IXGBE_RX_COPY;
3880                 }
3881
3882                 refreshed = TRUE;
3883                 /* Next is precalculated */
3884                 i = j;
3885                 rxr->next_to_refresh = i;
3886                 if (++j == rxr->num_desc)
3887                         j = 0;
3888         }
3889 update:
3890         if (refreshed) /* Update hardware tail index */
3891                 IXGBE_WRITE_REG(&adapter->hw,
3892                     IXGBE_RDT(rxr->me), rxr->next_to_refresh);
3893         return;
3894 }
3895
3896 /*********************************************************************
3897  *
3898  *  Allocate memory for rx_buffer structures. Since we use one
3899  *  rx_buffer per received packet, the maximum number of rx_buffer's
3900  *  that we'll need is equal to the number of receive descriptors
3901  *  that we've allocated.
3902  *
3903  **********************************************************************/
3904 static int
3905 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
3906 {
3907         struct  adapter         *adapter = rxr->adapter;
3908         device_t                dev = adapter->dev;
3909         struct ixgbe_rx_buf     *rxbuf;
3910         int                     i, bsize, error;
3911
3912         bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
3913         if (!(rxr->rx_buffers =
3914             (struct ixgbe_rx_buf *) malloc(bsize,
3915             M_DEVBUF, M_NOWAIT | M_ZERO))) {
3916                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3917                 error = ENOMEM;
3918                 goto fail;
3919         }
3920
3921         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),   /* parent */
3922                                    1, 0,        /* alignment, bounds */
3923                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3924                                    BUS_SPACE_MAXADDR,   /* highaddr */
3925                                    NULL, NULL,          /* filter, filterarg */
3926                                    MJUM16BYTES,         /* maxsize */
3927                                    1,                   /* nsegments */
3928                                    MJUM16BYTES,         /* maxsegsize */
3929                                    0,                   /* flags */
3930                                    NULL,                /* lockfunc */
3931                                    NULL,                /* lockfuncarg */
3932                                    &rxr->ptag))) {
3933                 device_printf(dev, "Unable to create RX DMA tag\n");
3934                 goto fail;
3935         }
3936
3937         for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
3938                 rxbuf = &rxr->rx_buffers[i];
3939                 error = bus_dmamap_create(rxr->ptag,
3940                     BUS_DMA_NOWAIT, &rxbuf->pmap);
3941                 if (error) {
3942                         device_printf(dev, "Unable to create RX dma map\n");
3943                         goto fail;
3944                 }
3945         }
3946
3947         return (0);
3948
3949 fail:
3950         /* Frees all, but can handle partial completion */
3951         ixgbe_free_receive_structures(adapter);
3952         return (error);
3953 }
3954
3955 /*
3956 ** Used to detect a descriptor that has
3957 ** been merged by Hardware RSC.
3958 */
3959 static inline u32
3960 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
3961 {
3962         return (le32toh(rx->wb.lower.lo_dword.data) &
3963             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
3964 }
3965
3966 /*********************************************************************
3967  *
3968  *  Initialize Hardware RSC (LRO) feature on 82599
3969  *  for an RX ring, this is toggled by the LRO capability
3970  *  even though it is transparent to the stack.
3971  *
3972  *  NOTE: since this HW feature only works with IPV4 and 
3973  *        our testing has shown soft LRO to be as effective
3974  *        I have decided to disable this by default.
3975  *
3976  **********************************************************************/
3977 static void
3978 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
3979 {
3980         struct  adapter         *adapter = rxr->adapter;
3981         struct  ixgbe_hw        *hw = &adapter->hw;
3982         u32                     rscctrl, rdrxctl;
3983
3984         /* If turning LRO/RSC off we need to disable it */
3985         if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
3986                 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
3987                 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
3988                 return;
3989         }
3990
3991         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
3992         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
3993 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
3994         if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
3995 #endif /* DEV_NETMAP */
3996         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
3997         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
3998         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
3999
4000         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
4001         rscctrl |= IXGBE_RSCCTL_RSCEN;
4002         /*
4003         ** Limit the total number of descriptors that
4004         ** can be combined, so it does not exceed 64K
4005         */
4006         if (rxr->mbuf_sz == MCLBYTES)
4007                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
4008         else if (rxr->mbuf_sz == MJUMPAGESIZE)
4009                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
4010         else if (rxr->mbuf_sz == MJUM9BYTES)
4011                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
4012         else  /* Using 16K cluster */
4013                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
4014
4015         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
4016
4017         /* Enable TCP header recognition */
4018         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
4019             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
4020             IXGBE_PSRTYPE_TCPHDR));
4021
4022         /* Disable RSC for ACK packets */
4023         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
4024             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
4025
4026         rxr->hw_rsc = TRUE;
4027 }
4028
4029
4030 static void     
4031 ixgbe_free_receive_ring(struct rx_ring *rxr)
4032
4033         struct ixgbe_rx_buf       *rxbuf;
4034         int i;
4035
4036         for (i = 0; i < rxr->num_desc; i++) {
4037                 rxbuf = &rxr->rx_buffers[i];
4038                 if (rxbuf->buf != NULL) {
4039                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4040                             BUS_DMASYNC_POSTREAD);
4041                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4042                         rxbuf->buf->m_flags |= M_PKTHDR;
4043                         m_freem(rxbuf->buf);
4044                         rxbuf->buf = NULL;
4045                         rxbuf->flags = 0;
4046                 }
4047         }
4048 }
4049
4050
4051 /*********************************************************************
4052  *
4053  *  Initialize a receive ring and its buffers.
4054  *
4055  **********************************************************************/
4056 static int
4057 ixgbe_setup_receive_ring(struct rx_ring *rxr)
4058 {
4059         struct  adapter         *adapter;
4060         struct ifnet            *ifp;
4061         device_t                dev;
4062         struct ixgbe_rx_buf     *rxbuf;
4063         bus_dma_segment_t       seg[1];
4064         struct lro_ctrl         *lro = &rxr->lro;
4065         int                     rsize, nsegs, error = 0;
4066 #ifdef DEV_NETMAP
4067         struct netmap_adapter *na = NA(rxr->adapter->ifp);
4068         struct netmap_slot *slot;
4069 #endif /* DEV_NETMAP */
4070
4071         adapter = rxr->adapter;
4072         ifp = adapter->ifp;
4073         dev = adapter->dev;
4074
4075         /* Clear the ring contents */
4076         IXGBE_RX_LOCK(rxr);
4077 #ifdef DEV_NETMAP
4078         /* same as in ixgbe_setup_transmit_ring() */
4079         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4080 #endif /* DEV_NETMAP */
4081         rsize = roundup2(adapter->num_rx_desc *
4082             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
4083         bzero((void *)rxr->rx_base, rsize);
4084         /* Cache the size */
4085         rxr->mbuf_sz = adapter->rx_mbuf_sz;
4086
4087         /* Free current RX buffer structs and their mbufs */
4088         ixgbe_free_receive_ring(rxr);
4089
4090         /* Now replenish the mbufs */
4091         for (int j = 0; j != rxr->num_desc; ++j) {
4092                 struct mbuf     *mp;
4093
4094                 rxbuf = &rxr->rx_buffers[j];
4095 #ifdef DEV_NETMAP
4096                 /*
4097                  * In netmap mode, fill the map and set the buffer
4098                  * address in the NIC ring, considering the offset
4099                  * between the netmap and NIC rings (see comment in
4100                  * ixgbe_setup_transmit_ring() ). No need to allocate
4101                  * an mbuf, so end the block with a continue;
4102                  */
4103                 if (slot) {
4104                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4105                         uint64_t paddr;
4106                         void *addr;
4107
4108                         addr = PNMB(na, slot + sj, &paddr);
4109                         netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4110                         /* Update descriptor and the cached value */
4111                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4112                         rxbuf->addr = htole64(paddr);
4113                         continue;
4114                 }
4115 #endif /* DEV_NETMAP */
4116                 rxbuf->flags = 0; 
4117                 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
4118                     M_PKTHDR, adapter->rx_mbuf_sz);
4119                 if (rxbuf->buf == NULL) {
4120                         error = ENOBUFS;
4121                         goto fail;
4122                 }
4123                 mp = rxbuf->buf;
4124                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
4125                 /* Get the memory mapping */
4126                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4127                     rxbuf->pmap, mp, seg,
4128                     &nsegs, BUS_DMA_NOWAIT);
4129                 if (error != 0)
4130                         goto fail;
4131                 bus_dmamap_sync(rxr->ptag,
4132                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4133                 /* Update the descriptor and the cached value */
4134                 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
4135                 rxbuf->addr = htole64(seg[0].ds_addr);
4136         }
4137
4138
4139         /* Setup our descriptor indices */
4140         rxr->next_to_check = 0;
4141         rxr->next_to_refresh = 0;
4142         rxr->lro_enabled = FALSE;
4143         rxr->rx_copies = 0;
4144         rxr->rx_bytes = 0;
4145         rxr->discard = FALSE;
4146         rxr->vtag_strip = FALSE;
4147
4148         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4149             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4150
4151         /*
4152         ** Now set up the LRO interface:
4153         */
4154         if (ixgbe_rsc_enable)
4155                 ixgbe_setup_hw_rsc(rxr);
4156         else if (ifp->if_capenable & IFCAP_LRO) {
4157                 int err = tcp_lro_init(lro);
4158                 if (err) {
4159                         device_printf(dev, "LRO Initialization failed!\n");
4160                         goto fail;
4161                 }
4162                 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
4163                 rxr->lro_enabled = TRUE;
4164                 lro->ifp = adapter->ifp;
4165         }
4166
4167         IXGBE_RX_UNLOCK(rxr);
4168         return (0);
4169
4170 fail:
4171         ixgbe_free_receive_ring(rxr);
4172         IXGBE_RX_UNLOCK(rxr);
4173         return (error);
4174 }
4175
4176 /*********************************************************************
4177  *
4178  *  Initialize all receive rings.
4179  *
4180  **********************************************************************/
4181 static int
4182 ixgbe_setup_receive_structures(struct adapter *adapter)
4183 {
4184         struct rx_ring *rxr = adapter->rx_rings;
4185         int j;
4186
4187         for (j = 0; j < adapter->num_queues; j++, rxr++)
4188                 if (ixgbe_setup_receive_ring(rxr))
4189                         goto fail;
4190
4191         return (0);
4192 fail:
4193         /*
4194          * Free RX buffers allocated so far, we will only handle
4195          * the rings that completed, the failing case will have
4196          * cleaned up for itself. 'j' failed, so its the terminus.
4197          */
4198         for (int i = 0; i < j; ++i) {
4199                 rxr = &adapter->rx_rings[i];
4200                 ixgbe_free_receive_ring(rxr);
4201         }
4202
4203         return (ENOBUFS);
4204 }
4205
4206 static void
4207 ixgbe_initialise_rss_mapping(struct adapter *adapter)
4208 {
4209         struct ixgbe_hw *hw = &adapter->hw;
4210         uint32_t reta;
4211         int i, j, queue_id;
4212         uint32_t rss_key[10];
4213         uint32_t mrqc;
4214 #ifdef  RSS
4215         uint32_t rss_hash_config;
4216 #endif
4217
4218         /* Setup RSS */
4219         reta = 0;
4220
4221 #ifdef  RSS
4222         /* Fetch the configured RSS key */
4223         rss_getkey((uint8_t *) &rss_key);
4224 #else
4225         /* set up random bits */
4226         arc4rand(&rss_key, sizeof(rss_key), 0);
4227 #endif
4228
4229         /* Set up the redirection table */
4230         for (i = 0, j = 0; i < 128; i++, j++) {
4231                 if (j == adapter->num_queues) j = 0;
4232 #ifdef  RSS
4233                 /*
4234                  * Fetch the RSS bucket id for the given indirection entry.
4235                  * Cap it at the number of configured buckets (which is
4236                  * num_queues.)
4237                  */
4238                 queue_id = rss_get_indirection_to_bucket(i);
4239                 queue_id = queue_id % adapter->num_queues;
4240 #else
4241                 queue_id = (j * 0x11);
4242 #endif
4243                 /*
4244                  * The low 8 bits are for hash value (n+0);
4245                  * The next 8 bits are for hash value (n+1), etc.
4246                  */
4247                 reta = reta >> 8;
4248                 reta = reta | ( ((uint32_t) queue_id) << 24);
4249                 if ((i & 3) == 3) {
4250                         IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
4251                         reta = 0;
4252                 }
4253         }
4254
4255         /* Now fill our hash function seeds */
4256         for (int i = 0; i < 10; i++)
4257                 IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rss_key[i]);
4258
4259         /* Perform hash on these packet types */
4260 #ifdef  RSS
4261         mrqc = IXGBE_MRQC_RSSEN;
4262         rss_hash_config = rss_gethashconfig();
4263         if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4)
4264                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
4265         if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4)
4266                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
4267         if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6)
4268                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
4269         if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6)
4270                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
4271         if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX)
4272                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
4273         if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6_EX)
4274                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
4275         if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4)
4276                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
4277         if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4_EX)
4278                 device_printf(adapter->dev,
4279                     "%s: RSS_HASHTYPE_RSS_UDP_IPV4_EX defined, "
4280                     "but not supported\n", __func__);
4281         if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6)
4282                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
4283         if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6_EX)
4284                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
4285 #else
4286         /*
4287          * Disable UDP - IP fragments aren't currently being handled
4288          * and so we end up with a mix of 2-tuple and 4-tuple
4289          * traffic.
4290          */
4291         mrqc = IXGBE_MRQC_RSSEN
4292              | IXGBE_MRQC_RSS_FIELD_IPV4
4293              | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
4294 #if 0
4295              | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
4296 #endif
4297              | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
4298              | IXGBE_MRQC_RSS_FIELD_IPV6_EX
4299              | IXGBE_MRQC_RSS_FIELD_IPV6
4300              | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
4301 #if 0
4302              | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
4303              | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP
4304 #endif
4305         ;
4306 #endif /* RSS */
4307         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4308 }
4309
4310
4311 /*********************************************************************
4312  *
4313  *  Setup receive registers and features.
4314  *
4315  **********************************************************************/
4316 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
4317
4318 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
4319         
4320 static void
4321 ixgbe_initialize_receive_units(struct adapter *adapter)
4322 {
4323         struct  rx_ring *rxr = adapter->rx_rings;
4324         struct ixgbe_hw *hw = &adapter->hw;
4325         struct ifnet   *ifp = adapter->ifp;
4326         u32             bufsz, rxctrl, fctrl, srrctl, rxcsum;
4327         u32             hlreg;
4328
4329
4330         /*
4331          * Make sure receives are disabled while
4332          * setting up the descriptor ring
4333          */
4334         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4335         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
4336             rxctrl & ~IXGBE_RXCTRL_RXEN);
4337
4338         /* Enable broadcasts */
4339         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4340         fctrl |= IXGBE_FCTRL_BAM;
4341         fctrl |= IXGBE_FCTRL_DPF;
4342         fctrl |= IXGBE_FCTRL_PMCF;
4343         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4344
4345         /* Set for Jumbo Frames? */
4346         hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4347         if (ifp->if_mtu > ETHERMTU)
4348                 hlreg |= IXGBE_HLREG0_JUMBOEN;
4349         else
4350                 hlreg &= ~IXGBE_HLREG0_JUMBOEN;
4351 #ifdef DEV_NETMAP
4352         /* crcstrip is conditional in netmap (in RDRXCTL too ?) */
4353         if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
4354                 hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
4355         else
4356                 hlreg |= IXGBE_HLREG0_RXCRCSTRP;
4357 #endif /* DEV_NETMAP */
4358         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
4359
4360         bufsz = (adapter->rx_mbuf_sz +
4361             BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
4362
4363         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4364                 u64 rdba = rxr->rxdma.dma_paddr;
4365
4366                 /* Setup the Base and Length of the Rx Descriptor Ring */
4367                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
4368                                (rdba & 0x00000000ffffffffULL));
4369                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
4370                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
4371                     adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4372
4373                 /* Set up the SRRCTL register */
4374                 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
4375                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4376                 srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
4377                 srrctl |= bufsz;
4378                 srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4379                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
4380
4381                 /* Setup the HW Rx Head and Tail Descriptor Pointers */
4382                 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
4383                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
4384
4385                 /* Set the processing limit */
4386                 rxr->process_limit = ixgbe_rx_process_limit;
4387         }
4388
4389         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
4390                 u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
4391                               IXGBE_PSRTYPE_UDPHDR |
4392                               IXGBE_PSRTYPE_IPV4HDR |
4393                               IXGBE_PSRTYPE_IPV6HDR;
4394                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
4395         }
4396
4397         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4398
4399         ixgbe_initialise_rss_mapping(adapter);
4400
4401         if (adapter->num_queues > 1) {
4402                 /* RSS and RX IPP Checksum are mutually exclusive */
4403                 rxcsum |= IXGBE_RXCSUM_PCSD;
4404         }
4405
4406         if (ifp->if_capenable & IFCAP_RXCSUM)
4407                 rxcsum |= IXGBE_RXCSUM_PCSD;
4408
4409         if (!(rxcsum & IXGBE_RXCSUM_PCSD))
4410                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4411
4412         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4413
4414         return;
4415 }
4416
4417 /*********************************************************************
4418  *
4419  *  Free all receive rings.
4420  *
4421  **********************************************************************/
4422 static void
4423 ixgbe_free_receive_structures(struct adapter *adapter)
4424 {
4425         struct rx_ring *rxr = adapter->rx_rings;
4426
4427         INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
4428
4429         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4430                 struct lro_ctrl         *lro = &rxr->lro;
4431                 ixgbe_free_receive_buffers(rxr);
4432                 /* Free LRO memory */
4433                 tcp_lro_free(lro);
4434                 /* Free the ring memory as well */
4435                 ixgbe_dma_free(adapter, &rxr->rxdma);
4436         }
4437
4438         free(adapter->rx_rings, M_DEVBUF);
4439 }
4440
4441
4442 /*********************************************************************
4443  *
4444  *  Free receive ring data structures
4445  *
4446  **********************************************************************/
4447 static void
4448 ixgbe_free_receive_buffers(struct rx_ring *rxr)
4449 {
4450         struct adapter          *adapter = rxr->adapter;
4451         struct ixgbe_rx_buf     *rxbuf;
4452
4453         INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
4454
4455         /* Cleanup any existing buffers */
4456         if (rxr->rx_buffers != NULL) {
4457                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4458                         rxbuf = &rxr->rx_buffers[i];
4459                         if (rxbuf->buf != NULL) {
4460                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4461                                     BUS_DMASYNC_POSTREAD);
4462                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4463                                 rxbuf->buf->m_flags |= M_PKTHDR;
4464                                 m_freem(rxbuf->buf);
4465                         }
4466                         rxbuf->buf = NULL;
4467                         if (rxbuf->pmap != NULL) {
4468                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4469                                 rxbuf->pmap = NULL;
4470                         }
4471                 }
4472                 if (rxr->rx_buffers != NULL) {
4473                         free(rxr->rx_buffers, M_DEVBUF);
4474                         rxr->rx_buffers = NULL;
4475                 }
4476         }
4477
4478         if (rxr->ptag != NULL) {
4479                 bus_dma_tag_destroy(rxr->ptag);
4480                 rxr->ptag = NULL;
4481         }
4482
4483         return;
4484 }
4485
4486 static __inline void
4487 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4488 {
4489                  
4490         /*
4491          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
4492          * should be computed by hardware. Also it should not have VLAN tag in
4493          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
4494          */
4495         if (rxr->lro_enabled &&
4496             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4497             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4498             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4499             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
4500             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4501             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
4502             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4503             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4504                 /*
4505                  * Send to the stack if:
4506                  **  - LRO not enabled, or
4507                  **  - no LRO resources, or
4508                  **  - lro enqueue fails
4509                  */
4510                 if (rxr->lro.lro_cnt != 0)
4511                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4512                                 return;
4513         }
4514         IXGBE_RX_UNLOCK(rxr);
4515         (*ifp->if_input)(ifp, m);
4516         IXGBE_RX_LOCK(rxr);
4517 }
4518
4519 static __inline void
4520 ixgbe_rx_discard(struct rx_ring *rxr, int i)
4521 {
4522         struct ixgbe_rx_buf     *rbuf;
4523
4524         rbuf = &rxr->rx_buffers[i];
4525
4526         if (rbuf->fmp != NULL) {/* Partial chain ? */
4527                 rbuf->fmp->m_flags |= M_PKTHDR;
4528                 m_freem(rbuf->fmp);
4529                 rbuf->fmp = NULL;
4530         }
4531
4532         /*
4533         ** With advanced descriptors the writeback
4534         ** clobbers the buffer addrs, so its easier
4535         ** to just free the existing mbufs and take
4536         ** the normal refresh path to get new buffers
4537         ** and mapping.
4538         */
4539         if (rbuf->buf) {
4540                 m_free(rbuf->buf);
4541                 rbuf->buf = NULL;
4542         }
4543
4544         rbuf->flags = 0;
4545  
4546         return;
4547 }
4548
4549
4550 /*********************************************************************
4551  *
4552  *  This routine executes in interrupt context. It replenishes
4553  *  the mbufs in the descriptor and sends data which has been
4554  *  dma'ed into host memory to upper layer.
4555  *
4556  *  We loop at most count times if count is > 0, or until done if
4557  *  count < 0.
4558  *
4559  *  Return TRUE for more work, FALSE for all clean.
4560  *********************************************************************/
4561 static bool
4562 ixgbe_rxeof(struct ix_queue *que)
4563 {
4564         struct adapter          *adapter = que->adapter;
4565         struct rx_ring          *rxr = que->rxr;
4566         struct ifnet            *ifp = adapter->ifp;
4567         struct lro_ctrl         *lro = &rxr->lro;
4568         struct lro_entry        *queued;
4569         int                     i, nextp, processed = 0;
4570         u32                     staterr = 0;
4571         u16                     count = rxr->process_limit;
4572         union ixgbe_adv_rx_desc *cur;
4573         struct ixgbe_rx_buf     *rbuf, *nbuf;
4574         u16                     pkt_info;
4575
4576         IXGBE_RX_LOCK(rxr);
4577
4578 #ifdef DEV_NETMAP
4579         /* Same as the txeof routine: wakeup clients on intr. */
4580         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4581                 IXGBE_RX_UNLOCK(rxr);
4582                 return (FALSE);
4583         }
4584 #endif /* DEV_NETMAP */
4585
4586         for (i = rxr->next_to_check; count != 0;) {
4587                 struct mbuf     *sendmp, *mp;
4588                 u32             rsc, ptype;
4589                 u16             len;
4590                 u16             vtag = 0;
4591                 bool            eop;
4592  
4593                 /* Sync the ring. */
4594                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4595                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4596
4597                 cur = &rxr->rx_base[i];
4598                 staterr = le32toh(cur->wb.upper.status_error);
4599                 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
4600
4601                 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
4602                         break;
4603                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4604                         break;
4605
4606                 count--;
4607                 sendmp = NULL;
4608                 nbuf = NULL;
4609                 rsc = 0;
4610                 cur->wb.upper.status_error = 0;
4611                 rbuf = &rxr->rx_buffers[i];
4612                 mp = rbuf->buf;
4613
4614                 len = le16toh(cur->wb.upper.length);
4615                 ptype = le32toh(cur->wb.lower.lo_dword.data) &
4616                     IXGBE_RXDADV_PKTTYPE_MASK;
4617                 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
4618
4619                 /* Make sure bad packets are discarded */
4620                 if (((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) ||
4621                     (rxr->discard)) {
4622                         rxr->rx_discarded++;
4623                         if (eop)
4624                                 rxr->discard = FALSE;
4625                         else
4626                                 rxr->discard = TRUE;
4627                         ixgbe_rx_discard(rxr, i);
4628                         goto next_desc;
4629                 }
4630
4631                 /*
4632                 ** On 82599 which supports a hardware
4633                 ** LRO (called HW RSC), packets need
4634                 ** not be fragmented across sequential
4635                 ** descriptors, rather the next descriptor
4636                 ** is indicated in bits of the descriptor.
4637                 ** This also means that we might proceses
4638                 ** more than one packet at a time, something
4639                 ** that has never been true before, it
4640                 ** required eliminating global chain pointers
4641                 ** in favor of what we are doing here.  -jfv
4642                 */
4643                 if (!eop) {
4644                         /*
4645                         ** Figure out the next descriptor
4646                         ** of this frame.
4647                         */
4648                         if (rxr->hw_rsc == TRUE) {
4649                                 rsc = ixgbe_rsc_count(cur);
4650                                 rxr->rsc_num += (rsc - 1);
4651                         }
4652                         if (rsc) { /* Get hardware index */
4653                                 nextp = ((staterr &
4654                                     IXGBE_RXDADV_NEXTP_MASK) >>
4655                                     IXGBE_RXDADV_NEXTP_SHIFT);
4656                         } else { /* Just sequential */
4657                                 nextp = i + 1;
4658                                 if (nextp == adapter->num_rx_desc)
4659                                         nextp = 0;
4660                         }
4661                         nbuf = &rxr->rx_buffers[nextp];
4662                         prefetch(nbuf);
4663                 }
4664                 /*
4665                 ** Rather than using the fmp/lmp global pointers
4666                 ** we now keep the head of a packet chain in the
4667                 ** buffer struct and pass this along from one
4668                 ** descriptor to the next, until we get EOP.
4669                 */
4670                 mp->m_len = len;
4671                 /*
4672                 ** See if there is a stored head
4673                 ** that determines what we are
4674                 */
4675                 sendmp = rbuf->fmp;
4676                 if (sendmp != NULL) {  /* secondary frag */
4677                         rbuf->buf = rbuf->fmp = NULL;
4678                         mp->m_flags &= ~M_PKTHDR;
4679                         sendmp->m_pkthdr.len += mp->m_len;
4680                 } else {
4681                         /*
4682                          * Optimize.  This might be a small packet,
4683                          * maybe just a TCP ACK.  Do a fast copy that
4684                          * is cache aligned into a new mbuf, and
4685                          * leave the old mbuf+cluster for re-use.
4686                          */
4687                         if (eop && len <= IXGBE_RX_COPY_LEN) {
4688                                 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
4689                                 if (sendmp != NULL) {
4690                                         sendmp->m_data +=
4691                                             IXGBE_RX_COPY_ALIGN;
4692                                         ixgbe_bcopy(mp->m_data,
4693                                             sendmp->m_data, len);
4694                                         sendmp->m_len = len;
4695                                         rxr->rx_copies++;
4696                                         rbuf->flags |= IXGBE_RX_COPY;
4697                                 }
4698                         }
4699                         if (sendmp == NULL) {
4700                                 rbuf->buf = rbuf->fmp = NULL;
4701                                 sendmp = mp;
4702                         }
4703
4704                         /* first desc of a non-ps chain */
4705                         sendmp->m_flags |= M_PKTHDR;
4706                         sendmp->m_pkthdr.len = mp->m_len;
4707                 }
4708                 ++processed;
4709
4710                 /* Pass the head pointer on */
4711                 if (eop == 0) {
4712                         nbuf->fmp = sendmp;
4713                         sendmp = NULL;
4714                         mp->m_next = nbuf->buf;
4715                 } else { /* Sending this frame */
4716                         sendmp->m_pkthdr.rcvif = ifp;
4717                         rxr->rx_packets++;
4718                         /* capture data for AIM */
4719                         rxr->bytes += sendmp->m_pkthdr.len;
4720                         rxr->rx_bytes += sendmp->m_pkthdr.len;
4721                         /* Process vlan info */
4722                         if ((rxr->vtag_strip) &&
4723                             (staterr & IXGBE_RXD_STAT_VP))
4724                                 vtag = le16toh(cur->wb.upper.vlan);
4725                         if (vtag) {
4726                                 sendmp->m_pkthdr.ether_vtag = vtag;
4727                                 sendmp->m_flags |= M_VLANTAG;
4728                         }
4729                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4730                                 ixgbe_rx_checksum(staterr, sendmp, ptype);
4731 #if __FreeBSD_version >= 800000
4732 #ifdef RSS
4733                         sendmp->m_pkthdr.flowid =
4734                             le32toh(cur->wb.lower.hi_dword.rss);
4735                         sendmp->m_flags |= M_FLOWID;
4736                         switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
4737                         case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
4738                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4);
4739                                 break;
4740                         case IXGBE_RXDADV_RSSTYPE_IPV4:
4741                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4);
4742                                 break;
4743                         case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
4744                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6);
4745                                 break;
4746                         case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
4747                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX);
4748                                 break;
4749                         case IXGBE_RXDADV_RSSTYPE_IPV6:
4750                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6);
4751                                 break;
4752                         case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
4753                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX);
4754                                 break;
4755                         case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
4756                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV4);
4757                                 break;
4758                         case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
4759                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6);
4760                                 break;
4761                         case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
4762                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6_EX);
4763                                 break;
4764                         default:
4765                                 /* XXX fallthrough */
4766                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_NONE);
4767                         }
4768 #else /* RSS */
4769                         sendmp->m_pkthdr.flowid = que->msix;
4770                         sendmp->m_flags |= M_FLOWID;
4771 #endif /* RSS */
4772 #endif /* FreeBSD_version */
4773                 }
4774 next_desc:
4775                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4776                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4777
4778                 /* Advance our pointers to the next descriptor. */
4779                 if (++i == rxr->num_desc)
4780                         i = 0;
4781
4782                 /* Now send to the stack or do LRO */
4783                 if (sendmp != NULL) {
4784                         rxr->next_to_check = i;
4785                         ixgbe_rx_input(rxr, ifp, sendmp, ptype);
4786                         i = rxr->next_to_check;
4787                 }
4788
4789                /* Every 8 descriptors we go to refresh mbufs */
4790                 if (processed == 8) {
4791                         ixgbe_refresh_mbufs(rxr, i);
4792                         processed = 0;
4793                 }
4794         }
4795
4796         /* Refresh any remaining buf structs */
4797         if (ixgbe_rx_unrefreshed(rxr))
4798                 ixgbe_refresh_mbufs(rxr, i);
4799
4800         rxr->next_to_check = i;
4801
4802         /*
4803          * Flush any outstanding LRO work
4804          */
4805         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4806                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4807                 tcp_lro_flush(lro, queued);
4808         }
4809
4810         IXGBE_RX_UNLOCK(rxr);
4811
4812         /*
4813         ** Still have cleaning to do?
4814         */
4815         if ((staterr & IXGBE_RXD_STAT_DD) != 0)
4816                 return (TRUE);
4817         else
4818                 return (FALSE);
4819 }
4820
4821
4822 /*********************************************************************
4823  *
4824  *  Verify that the hardware indicated that the checksum is valid.
4825  *  Inform the stack about the status of checksum so that stack
4826  *  doesn't spend time verifying the checksum.
4827  *
4828  *********************************************************************/
4829 static void
4830 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
4831 {
4832         u16     status = (u16) staterr;
4833         u8      errors = (u8) (staterr >> 24);
4834         bool    sctp = FALSE;
4835
4836         if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4837             (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
4838                 sctp = TRUE;
4839
4840         if (status & IXGBE_RXD_STAT_IPCS) {
4841                 if (!(errors & IXGBE_RXD_ERR_IPE)) {
4842                         /* IP Checksum Good */
4843                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4844                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4845
4846                 } else
4847                         mp->m_pkthdr.csum_flags = 0;
4848         }
4849         if (status & IXGBE_RXD_STAT_L4CS) {
4850                 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4851 #if __FreeBSD_version >= 800000
4852                 if (sctp)
4853                         type = CSUM_SCTP_VALID;
4854 #endif
4855                 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
4856                         mp->m_pkthdr.csum_flags |= type;
4857                         if (!sctp)
4858                                 mp->m_pkthdr.csum_data = htons(0xffff);
4859                 } 
4860         }
4861         return;
4862 }
4863
4864
4865 /*
4866 ** This routine is run via an vlan config EVENT,
4867 ** it enables us to use the HW Filter table since
4868 ** we can get the vlan id. This just creates the
4869 ** entry in the soft version of the VFTA, init will
4870 ** repopulate the real table.
4871 */
4872 static void
4873 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4874 {
4875         struct adapter  *adapter = ifp->if_softc;
4876         u16             index, bit;
4877
4878         if (ifp->if_softc !=  arg)   /* Not our event */
4879                 return;
4880
4881         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4882                 return;
4883
4884         IXGBE_CORE_LOCK(adapter);
4885         index = (vtag >> 5) & 0x7F;
4886         bit = vtag & 0x1F;
4887         adapter->shadow_vfta[index] |= (1 << bit);
4888         ++adapter->num_vlans;
4889         ixgbe_setup_vlan_hw_support(adapter);
4890         IXGBE_CORE_UNLOCK(adapter);
4891 }
4892
4893 /*
4894 ** This routine is run via an vlan
4895 ** unconfig EVENT, remove our entry
4896 ** in the soft vfta.
4897 */
4898 static void
4899 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4900 {
4901         struct adapter  *adapter = ifp->if_softc;
4902         u16             index, bit;
4903
4904         if (ifp->if_softc !=  arg)
4905                 return;
4906
4907         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4908                 return;
4909
4910         IXGBE_CORE_LOCK(adapter);
4911         index = (vtag >> 5) & 0x7F;
4912         bit = vtag & 0x1F;
4913         adapter->shadow_vfta[index] &= ~(1 << bit);
4914         --adapter->num_vlans;
4915         /* Re-init to load the changes */
4916         ixgbe_setup_vlan_hw_support(adapter);
4917         IXGBE_CORE_UNLOCK(adapter);
4918 }
4919
4920 static void
4921 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
4922 {
4923         struct ifnet    *ifp = adapter->ifp;
4924         struct ixgbe_hw *hw = &adapter->hw;
4925         struct rx_ring  *rxr;
4926         u32             ctrl;
4927
4928
4929         /*
4930         ** We get here thru init_locked, meaning
4931         ** a soft reset, this has already cleared
4932         ** the VFTA and other state, so if there
4933         ** have been no vlan's registered do nothing.
4934         */
4935         if (adapter->num_vlans == 0)
4936                 return;
4937
4938         /* Setup the queues for vlans */
4939         for (int i = 0; i < adapter->num_queues; i++) {
4940                 rxr = &adapter->rx_rings[i];
4941                 /* On 82599 the VLAN enable is per/queue in RXDCTL */
4942                 if (hw->mac.type != ixgbe_mac_82598EB) {
4943                         ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
4944                         ctrl |= IXGBE_RXDCTL_VME;
4945                         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
4946                 }
4947                 rxr->vtag_strip = TRUE;
4948         }
4949
4950         if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
4951                 return;
4952         /*
4953         ** A soft reset zero's out the VFTA, so
4954         ** we need to repopulate it now.
4955         */
4956         for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
4957                 if (adapter->shadow_vfta[i] != 0)
4958                         IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
4959                             adapter->shadow_vfta[i]);
4960
4961         ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4962         /* Enable the Filter Table if enabled */
4963         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
4964                 ctrl &= ~IXGBE_VLNCTRL_CFIEN;
4965                 ctrl |= IXGBE_VLNCTRL_VFE;
4966         }
4967         if (hw->mac.type == ixgbe_mac_82598EB)
4968                 ctrl |= IXGBE_VLNCTRL_VME;
4969         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
4970 }
4971
4972 static void
4973 ixgbe_enable_intr(struct adapter *adapter)
4974 {
4975         struct ixgbe_hw *hw = &adapter->hw;
4976         struct ix_queue *que = adapter->queues;
4977         u32             mask, fwsm;
4978
4979         mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
4980         /* Enable Fan Failure detection */
4981         if (hw->device_id == IXGBE_DEV_ID_82598AT)
4982                     mask |= IXGBE_EIMS_GPI_SDP1;
4983
4984         switch (adapter->hw.mac.type) {
4985                 case ixgbe_mac_82599EB:
4986                         mask |= IXGBE_EIMS_ECC;
4987                         mask |= IXGBE_EIMS_GPI_SDP0;
4988                         mask |= IXGBE_EIMS_GPI_SDP1;
4989                         mask |= IXGBE_EIMS_GPI_SDP2;
4990 #ifdef IXGBE_FDIR
4991                         mask |= IXGBE_EIMS_FLOW_DIR;
4992 #endif
4993                         break;
4994                 case ixgbe_mac_X540:
4995                         mask |= IXGBE_EIMS_ECC;
4996                         /* Detect if Thermal Sensor is enabled */
4997                         fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
4998                         if (fwsm & IXGBE_FWSM_TS_ENABLED)
4999                                 mask |= IXGBE_EIMS_TS;
5000 #ifdef IXGBE_FDIR
5001                         mask |= IXGBE_EIMS_FLOW_DIR;
5002 #endif
5003                 /* falls through */
5004                 default:
5005                         break;
5006         }
5007
5008         IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
5009
5010         /* With RSS we use auto clear */
5011         if (adapter->msix_mem) {
5012                 mask = IXGBE_EIMS_ENABLE_MASK;
5013                 /* Don't autoclear Link */
5014                 mask &= ~IXGBE_EIMS_OTHER;
5015                 mask &= ~IXGBE_EIMS_LSC;
5016                 IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
5017         }
5018
5019         /*
5020         ** Now enable all queues, this is done separately to
5021         ** allow for handling the extended (beyond 32) MSIX
5022         ** vectors that can be used by 82599
5023         */
5024         for (int i = 0; i < adapter->num_queues; i++, que++)
5025                 ixgbe_enable_queue(adapter, que->msix);
5026
5027         IXGBE_WRITE_FLUSH(hw);
5028
5029         return;
5030 }
5031
5032 static void
5033 ixgbe_disable_intr(struct adapter *adapter)
5034 {
5035         if (adapter->msix_mem)
5036                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
5037         if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
5038                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
5039         } else {
5040                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
5041                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
5042                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
5043         }
5044         IXGBE_WRITE_FLUSH(&adapter->hw);
5045         return;
5046 }
5047
5048 u16
5049 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
5050 {
5051         u16 value;
5052
5053         value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
5054             reg, 2);
5055
5056         return (value);
5057 }
5058
5059 void
5060 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
5061 {
5062         pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
5063             reg, value, 2);
5064
5065         return;
5066 }
5067
5068 /*
5069 ** Get the width and transaction speed of
5070 ** the slot this adapter is plugged into.
5071 */
5072 static void
5073 ixgbe_get_slot_info(struct ixgbe_hw *hw)
5074 {
5075         device_t                dev = ((struct ixgbe_osdep *)hw->back)->dev;
5076         struct ixgbe_mac_info   *mac = &hw->mac;
5077         u16                     link;
5078         u32                     offset;
5079
5080         /* For most devices simply call the shared code routine */
5081         if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
5082                 ixgbe_get_bus_info(hw);
5083                 goto display;
5084         }
5085
5086         /*
5087         ** For the Quad port adapter we need to parse back
5088         ** up the PCI tree to find the speed of the expansion
5089         ** slot into which this adapter is plugged. A bit more work.
5090         */
5091         dev = device_get_parent(device_get_parent(dev));
5092 #ifdef IXGBE_DEBUG
5093         device_printf(dev, "parent pcib = %x,%x,%x\n",
5094             pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
5095 #endif
5096         dev = device_get_parent(device_get_parent(dev));
5097 #ifdef IXGBE_DEBUG
5098         device_printf(dev, "slot pcib = %x,%x,%x\n",
5099             pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
5100 #endif
5101         /* Now get the PCI Express Capabilities offset */
5102         pci_find_cap(dev, PCIY_EXPRESS, &offset);
5103         /* ...and read the Link Status Register */
5104         link = pci_read_config(dev, offset + PCIER_LINK_STA, 2);
5105         switch (link & IXGBE_PCI_LINK_WIDTH) {
5106         case IXGBE_PCI_LINK_WIDTH_1:
5107                 hw->bus.width = ixgbe_bus_width_pcie_x1;
5108                 break;
5109         case IXGBE_PCI_LINK_WIDTH_2:
5110                 hw->bus.width = ixgbe_bus_width_pcie_x2;
5111                 break;
5112         case IXGBE_PCI_LINK_WIDTH_4:
5113                 hw->bus.width = ixgbe_bus_width_pcie_x4;
5114                 break;
5115         case IXGBE_PCI_LINK_WIDTH_8:
5116                 hw->bus.width = ixgbe_bus_width_pcie_x8;
5117                 break;
5118         default:
5119                 hw->bus.width = ixgbe_bus_width_unknown;
5120                 break;
5121         }
5122
5123         switch (link & IXGBE_PCI_LINK_SPEED) {
5124         case IXGBE_PCI_LINK_SPEED_2500:
5125                 hw->bus.speed = ixgbe_bus_speed_2500;
5126                 break;
5127         case IXGBE_PCI_LINK_SPEED_5000:
5128                 hw->bus.speed = ixgbe_bus_speed_5000;
5129                 break;
5130         case IXGBE_PCI_LINK_SPEED_8000:
5131                 hw->bus.speed = ixgbe_bus_speed_8000;
5132                 break;
5133         default:
5134                 hw->bus.speed = ixgbe_bus_speed_unknown;
5135                 break;
5136         }
5137
5138         mac->ops.set_lan_id(hw);
5139
5140 display:
5141         device_printf(dev,"PCI Express Bus: Speed %s %s\n",
5142             ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s":
5143             (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s":
5144             (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s":"Unknown"),
5145             (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
5146             (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
5147             (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
5148             ("Unknown"));
5149
5150         if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) &&
5151             ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
5152             (hw->bus.speed == ixgbe_bus_speed_2500))) {
5153                 device_printf(dev, "PCI-Express bandwidth available"
5154                     " for this card\n     is not sufficient for"
5155                     " optimal performance.\n");
5156                 device_printf(dev, "For optimal performance a x8 "
5157                     "PCIE, or x4 PCIE Gen2 slot is required.\n");
5158         }
5159         if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) &&
5160             ((hw->bus.width <= ixgbe_bus_width_pcie_x8) &&
5161             (hw->bus.speed < ixgbe_bus_speed_8000))) {
5162                 device_printf(dev, "PCI-Express bandwidth available"
5163                     " for this card\n     is not sufficient for"
5164                     " optimal performance.\n");
5165                 device_printf(dev, "For optimal performance a x8 "
5166                     "PCIE Gen3 slot is required.\n");
5167         }
5168
5169         return;
5170 }
5171
5172
5173 /*
5174 ** Setup the correct IVAR register for a particular MSIX interrupt
5175 **   (yes this is all very magic and confusing :)
5176 **  - entry is the register array entry
5177 **  - vector is the MSIX vector for this queue
5178 **  - type is RX/TX/MISC
5179 */
5180 static void
5181 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
5182 {
5183         struct ixgbe_hw *hw = &adapter->hw;
5184         u32 ivar, index;
5185
5186         vector |= IXGBE_IVAR_ALLOC_VAL;
5187
5188         switch (hw->mac.type) {
5189
5190         case ixgbe_mac_82598EB:
5191                 if (type == -1)
5192                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
5193                 else
5194                         entry += (type * 64);
5195                 index = (entry >> 2) & 0x1F;
5196                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
5197                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
5198                 ivar |= (vector << (8 * (entry & 0x3)));
5199                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
5200                 break;
5201
5202         case ixgbe_mac_82599EB:
5203         case ixgbe_mac_X540:
5204                 if (type == -1) { /* MISC IVAR */
5205                         index = (entry & 1) * 8;
5206                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
5207                         ivar &= ~(0xFF << index);
5208                         ivar |= (vector << index);
5209                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
5210                 } else {        /* RX/TX IVARS */
5211                         index = (16 * (entry & 1)) + (8 * type);
5212                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
5213                         ivar &= ~(0xFF << index);
5214                         ivar |= (vector << index);
5215                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
5216                 }
5217
5218         default:
5219                 break;
5220         }
5221 }
5222
5223 static void
5224 ixgbe_configure_ivars(struct adapter *adapter)
5225 {
5226         struct  ix_queue *que = adapter->queues;
5227         u32 newitr;
5228
5229         if (ixgbe_max_interrupt_rate > 0)
5230                 newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
5231         else
5232                 newitr = 0;
5233
5234         for (int i = 0; i < adapter->num_queues; i++, que++) {
5235                 /* First the RX queue entry */
5236                 ixgbe_set_ivar(adapter, i, que->msix, 0);
5237                 /* ... and the TX */
5238                 ixgbe_set_ivar(adapter, i, que->msix, 1);
5239                 /* Set an Initial EITR value */
5240                 IXGBE_WRITE_REG(&adapter->hw,
5241                     IXGBE_EITR(que->msix), newitr);
5242         }
5243
5244         /* For the Link interrupt */
5245         ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
5246 }
5247
5248 /*
5249 ** ixgbe_sfp_probe - called in the local timer to
5250 ** determine if a port had optics inserted.
5251 */  
5252 static bool ixgbe_sfp_probe(struct adapter *adapter)
5253 {
5254         struct ixgbe_hw *hw = &adapter->hw;
5255         device_t        dev = adapter->dev;
5256         bool            result = FALSE;
5257
5258         if ((hw->phy.type == ixgbe_phy_nl) &&
5259             (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
5260                 s32 ret = hw->phy.ops.identify_sfp(hw);
5261                 if (ret)
5262                         goto out;
5263                 ret = hw->phy.ops.reset(hw);
5264                 if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5265                         device_printf(dev,"Unsupported SFP+ module detected!");
5266                         printf(" Reload driver with supported module.\n");
5267                         adapter->sfp_probe = FALSE;
5268                         goto out;
5269                 } else
5270                         device_printf(dev,"SFP+ module detected!\n");
5271                 /* We now have supported optics */
5272                 adapter->sfp_probe = FALSE;
5273                 /* Set the optics type so system reports correctly */
5274                 ixgbe_setup_optics(adapter);
5275                 result = TRUE;
5276         }
5277 out:
5278         return (result);
5279 }
5280
5281 /*
5282 ** Tasklet handler for MSIX Link interrupts
5283 **  - do outside interrupt since it might sleep
5284 */
5285 static void
5286 ixgbe_handle_link(void *context, int pending)
5287 {
5288         struct adapter  *adapter = context;
5289
5290         ixgbe_check_link(&adapter->hw,
5291             &adapter->link_speed, &adapter->link_up, 0);
5292         ixgbe_update_link_status(adapter);
5293 }
5294
5295 /*
5296 ** Tasklet for handling SFP module interrupts
5297 */
5298 static void
5299 ixgbe_handle_mod(void *context, int pending)
5300 {
5301         struct adapter  *adapter = context;
5302         struct ixgbe_hw *hw = &adapter->hw;
5303         device_t        dev = adapter->dev;
5304         u32 err;
5305
5306         err = hw->phy.ops.identify_sfp(hw);
5307         if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5308                 device_printf(dev,
5309                     "Unsupported SFP+ module type was detected.\n");
5310                 return;
5311         }
5312         err = hw->mac.ops.setup_sfp(hw);
5313         if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5314                 device_printf(dev,
5315                     "Setup failure - unsupported SFP+ module type.\n");
5316                 return;
5317         }
5318         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
5319         return;
5320 }
5321
5322
5323 /*
5324 ** Tasklet for handling MSF (multispeed fiber) interrupts
5325 */
5326 static void
5327 ixgbe_handle_msf(void *context, int pending)
5328 {
5329         struct adapter  *adapter = context;
5330         struct ixgbe_hw *hw = &adapter->hw;
5331         u32 autoneg;
5332         bool negotiate;
5333
5334         autoneg = hw->phy.autoneg_advertised;
5335         if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
5336                 hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
5337         if (hw->mac.ops.setup_link)
5338                 hw->mac.ops.setup_link(hw, autoneg, TRUE);
5339         return;
5340 }
5341
5342 #ifdef IXGBE_FDIR
5343 /*
5344 ** Tasklet for reinitializing the Flow Director filter table
5345 */
5346 static void
5347 ixgbe_reinit_fdir(void *context, int pending)
5348 {
5349         struct adapter  *adapter = context;
5350         struct ifnet   *ifp = adapter->ifp;
5351
5352         if (adapter->fdir_reinit != 1) /* Shouldn't happen */
5353                 return;
5354         ixgbe_reinit_fdir_tables_82599(&adapter->hw);
5355         adapter->fdir_reinit = 0;
5356         /* re-enable flow director interrupts */
5357         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
5358         /* Restart the interface */
5359         ifp->if_drv_flags |= IFF_DRV_RUNNING;
5360         return;
5361 }
5362 #endif
5363
5364 /**********************************************************************
5365  *
5366  *  Update the board statistics counters.
5367  *
5368  **********************************************************************/
5369 static void
5370 ixgbe_update_stats_counters(struct adapter *adapter)
5371 {
5372         struct ifnet   *ifp = adapter->ifp;
5373         struct ixgbe_hw *hw = &adapter->hw;
5374         u32  missed_rx = 0, bprc, lxon, lxoff, total;
5375         u64  total_missed_rx = 0;
5376
5377         adapter->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
5378         adapter->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
5379         adapter->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
5380         adapter->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
5381
5382         /*
5383         ** Note: these are for the 8 possible traffic classes,
5384         **       which in current implementation is unused,
5385         **       therefore only 0 should read real data.
5386         */
5387         for (int i = 0; i < 8; i++) {
5388                 u32 mp;
5389                 mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
5390                 /* missed_rx tallies misses for the gprc workaround */
5391                 missed_rx += mp;
5392                 /* global total per queue */
5393                 adapter->stats.mpc[i] += mp;
5394                 /* Running comprehensive total for stats display */
5395                 total_missed_rx += adapter->stats.mpc[i];
5396                 if (hw->mac.type == ixgbe_mac_82598EB) {
5397                         adapter->stats.rnbc[i] +=
5398                             IXGBE_READ_REG(hw, IXGBE_RNBC(i));
5399                         adapter->stats.qbtc[i] +=
5400                             IXGBE_READ_REG(hw, IXGBE_QBTC(i));
5401                         adapter->stats.qbrc[i] +=
5402                             IXGBE_READ_REG(hw, IXGBE_QBRC(i));
5403                         adapter->stats.pxonrxc[i] +=
5404                             IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
5405                 } else
5406                         adapter->stats.pxonrxc[i] +=
5407                             IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
5408                 adapter->stats.pxontxc[i] +=
5409                     IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
5410                 adapter->stats.pxofftxc[i] +=
5411                     IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
5412                 adapter->stats.pxoffrxc[i] +=
5413                     IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
5414                 adapter->stats.pxon2offc[i] +=
5415                     IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
5416         }
5417         for (int i = 0; i < 16; i++) {
5418                 adapter->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
5419                 adapter->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
5420                 adapter->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
5421         }
5422         adapter->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
5423         adapter->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
5424         adapter->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
5425
5426         /* Hardware workaround, gprc counts missed packets */
5427         adapter->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
5428         adapter->stats.gprc -= missed_rx;
5429
5430         if (hw->mac.type != ixgbe_mac_82598EB) {
5431                 adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
5432                     ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
5433                 adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
5434                     ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
5435                 adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
5436                     ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
5437                 adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
5438                 adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
5439         } else {
5440                 adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
5441                 adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
5442                 /* 82598 only has a counter in the high register */
5443                 adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
5444                 adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
5445                 adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
5446         }
5447
5448         /*
5449          * Workaround: mprc hardware is incorrectly counting
5450          * broadcasts, so for now we subtract those.
5451          */
5452         bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
5453         adapter->stats.bprc += bprc;
5454         adapter->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
5455         if (hw->mac.type == ixgbe_mac_82598EB)
5456                 adapter->stats.mprc -= bprc;
5457
5458         adapter->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
5459         adapter->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
5460         adapter->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
5461         adapter->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
5462         adapter->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
5463         adapter->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
5464
5465         lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
5466         adapter->stats.lxontxc += lxon;
5467         lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
5468         adapter->stats.lxofftxc += lxoff;
5469         total = lxon + lxoff;
5470
5471         adapter->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
5472         adapter->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
5473         adapter->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
5474         adapter->stats.gptc -= total;
5475         adapter->stats.mptc -= total;
5476         adapter->stats.ptc64 -= total;
5477         adapter->stats.gotc -= total * ETHER_MIN_LEN;
5478
5479         adapter->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
5480         adapter->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
5481         adapter->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
5482         adapter->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
5483         adapter->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
5484         adapter->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
5485         adapter->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
5486         adapter->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
5487         adapter->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
5488         adapter->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
5489         adapter->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
5490         adapter->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
5491         adapter->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
5492         adapter->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
5493         adapter->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
5494         adapter->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
5495         adapter->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
5496         adapter->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
5497         /* Only read FCOE on 82599 */
5498         if (hw->mac.type != ixgbe_mac_82598EB) {
5499                 adapter->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
5500                 adapter->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
5501                 adapter->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
5502                 adapter->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
5503                 adapter->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
5504         }
5505
5506         /* Fill out the OS statistics structure */
5507         ifp->if_ipackets = adapter->stats.gprc;
5508         ifp->if_opackets = adapter->stats.gptc;
5509         ifp->if_ibytes = adapter->stats.gorc;
5510         ifp->if_obytes = adapter->stats.gotc;
5511         ifp->if_imcasts = adapter->stats.mprc;
5512         ifp->if_omcasts = adapter->stats.mptc;
5513         ifp->if_collisions = 0;
5514
5515         /* Rx Errors */
5516         ifp->if_iqdrops = total_missed_rx;
5517         ifp->if_ierrors = adapter->stats.crcerrs + adapter->stats.rlec;
5518 }
5519
5520 /** ixgbe_sysctl_tdh_handler - Handler function
5521  *  Retrieves the TDH value from the hardware
5522  */
5523 static int 
5524 ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS)
5525 {
5526         int error;
5527
5528         struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
5529         if (!txr) return 0;
5530
5531         unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
5532         error = sysctl_handle_int(oidp, &val, 0, req);
5533         if (error || !req->newptr)
5534                 return error;
5535         return 0;
5536 }
5537
5538 /** ixgbe_sysctl_tdt_handler - Handler function
5539  *  Retrieves the TDT value from the hardware
5540  */
5541 static int 
5542 ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS)
5543 {
5544         int error;
5545
5546         struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
5547         if (!txr) return 0;
5548
5549         unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
5550         error = sysctl_handle_int(oidp, &val, 0, req);
5551         if (error || !req->newptr)
5552                 return error;
5553         return 0;
5554 }
5555
5556 /** ixgbe_sysctl_rdh_handler - Handler function
5557  *  Retrieves the RDH value from the hardware
5558  */
5559 static int 
5560 ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS)
5561 {
5562         int error;
5563
5564         struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
5565         if (!rxr) return 0;
5566
5567         unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
5568         error = sysctl_handle_int(oidp, &val, 0, req);
5569         if (error || !req->newptr)
5570                 return error;
5571         return 0;
5572 }
5573
5574 /** ixgbe_sysctl_rdt_handler - Handler function
5575  *  Retrieves the RDT value from the hardware
5576  */
5577 static int 
5578 ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS)
5579 {
5580         int error;
5581
5582         struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
5583         if (!rxr) return 0;
5584
5585         unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
5586         error = sysctl_handle_int(oidp, &val, 0, req);
5587         if (error || !req->newptr)
5588                 return error;
5589         return 0;
5590 }
5591
5592 static int
5593 ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5594 {
5595         int error;
5596         struct ix_queue *que = ((struct ix_queue *)oidp->oid_arg1);
5597         unsigned int reg, usec, rate;
5598
5599         reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
5600         usec = ((reg & 0x0FF8) >> 3);
5601         if (usec > 0)
5602                 rate = 500000 / usec;
5603         else
5604                 rate = 0;
5605         error = sysctl_handle_int(oidp, &rate, 0, req);
5606         if (error || !req->newptr)
5607                 return error;
5608         reg &= ~0xfff; /* default, no limitation */
5609         ixgbe_max_interrupt_rate = 0;
5610         if (rate > 0 && rate < 500000) {
5611                 if (rate < 1000)
5612                         rate = 1000;
5613                 ixgbe_max_interrupt_rate = rate;
5614                 reg |= ((4000000/rate) & 0xff8 );
5615         }
5616         IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
5617         return 0;
5618 }
5619
5620 /*
5621  * Add sysctl variables, one per statistic, to the system.
5622  */
5623 static void
5624 ixgbe_add_hw_stats(struct adapter *adapter)
5625 {
5626
5627         device_t dev = adapter->dev;
5628
5629         struct tx_ring *txr = adapter->tx_rings;
5630         struct rx_ring *rxr = adapter->rx_rings;
5631
5632         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5633         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5634         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5635         struct ixgbe_hw_stats *stats = &adapter->stats;
5636
5637         struct sysctl_oid *stat_node, *queue_node;
5638         struct sysctl_oid_list *stat_list, *queue_list;
5639
5640 #define QUEUE_NAME_LEN 32
5641         char namebuf[QUEUE_NAME_LEN];
5642
5643         /* Driver Statistics */
5644         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5645                         CTLFLAG_RD, &adapter->dropped_pkts,
5646                         "Driver dropped packets");
5647         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_failed",
5648                         CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5649                         "m_defrag() failed");
5650         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events",
5651                         CTLFLAG_RD, &adapter->watchdog_events,
5652                         "Watchdog timeouts");
5653         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5654                         CTLFLAG_RD, &adapter->link_irq,
5655                         "Link MSIX IRQ Handled");
5656
5657         for (int i = 0; i < adapter->num_queues; i++, txr++) {
5658                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5659                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5660                                             CTLFLAG_RD, NULL, "Queue Name");
5661                 queue_list = SYSCTL_CHILDREN(queue_node);
5662
5663                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5664                                 CTLTYPE_UINT | CTLFLAG_RW, &adapter->queues[i],
5665                                 sizeof(&adapter->queues[i]),
5666                                 ixgbe_sysctl_interrupt_rate_handler, "IU",
5667                                 "Interrupt Rate");
5668                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs",
5669                                 CTLFLAG_RD, &(adapter->queues[i].irqs),
5670                                 "irqs on this queue");
5671                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5672                                 CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
5673                                 ixgbe_sysctl_tdh_handler, "IU",
5674                                 "Transmit Descriptor Head");
5675                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5676                                 CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
5677                                 ixgbe_sysctl_tdt_handler, "IU",
5678                                 "Transmit Descriptor Tail");
5679                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tso_tx",
5680                                 CTLFLAG_RD, &txr->tso_tx,
5681                                 "TSO");
5682                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_tx_dma_setup",
5683                                 CTLFLAG_RD, &txr->no_tx_dma_setup,
5684                                 "Driver tx dma failure in xmit");
5685                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5686                                 CTLFLAG_RD, &txr->no_desc_avail,
5687                                 "Queue No Descriptor Available");
5688                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5689                                 CTLFLAG_RD, &txr->total_packets,
5690                                 "Queue Packets Transmitted");
5691         }
5692
5693         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
5694                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5695                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
5696                                             CTLFLAG_RD, NULL, "Queue Name");
5697                 queue_list = SYSCTL_CHILDREN(queue_node);
5698
5699                 struct lro_ctrl *lro = &rxr->lro;
5700
5701                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5702                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
5703                                             CTLFLAG_RD, NULL, "Queue Name");
5704                 queue_list = SYSCTL_CHILDREN(queue_node);
5705
5706                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5707                                 CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
5708                                 ixgbe_sysctl_rdh_handler, "IU",
5709                                 "Receive Descriptor Head");
5710                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5711                                 CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
5712                                 ixgbe_sysctl_rdt_handler, "IU",
5713                                 "Receive Descriptor Tail");
5714                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5715                                 CTLFLAG_RD, &rxr->rx_packets,
5716                                 "Queue Packets Received");
5717                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5718                                 CTLFLAG_RD, &rxr->rx_bytes,
5719                                 "Queue Bytes Received");
5720                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_copies",
5721                                 CTLFLAG_RD, &rxr->rx_copies,
5722                                 "Copied RX Frames");
5723                 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
5724                                 CTLFLAG_RD, &lro->lro_queued, 0,
5725                                 "LRO Queued");
5726                 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
5727                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5728                                 "LRO Flushed");
5729         }
5730
5731         /* MAC stats get the own sub node */
5732
5733         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5734                                     CTLFLAG_RD, NULL, "MAC Statistics");
5735         stat_list = SYSCTL_CHILDREN(stat_node);
5736
5737         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5738                         CTLFLAG_RD, &stats->crcerrs,
5739                         "CRC Errors");
5740         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
5741                         CTLFLAG_RD, &stats->illerrc,
5742                         "Illegal Byte Errors");
5743         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
5744                         CTLFLAG_RD, &stats->errbc,
5745                         "Byte Errors");
5746         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
5747                         CTLFLAG_RD, &stats->mspdc,
5748                         "MAC Short Packets Discarded");
5749         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
5750                         CTLFLAG_RD, &stats->mlfc,
5751                         "MAC Local Faults");
5752         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
5753                         CTLFLAG_RD, &stats->mrfc,
5754                         "MAC Remote Faults");
5755         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
5756                         CTLFLAG_RD, &stats->rlec,
5757                         "Receive Length Errors");
5758
5759         /* Flow Control stats */
5760         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5761                         CTLFLAG_RD, &stats->lxontxc,
5762                         "Link XON Transmitted");
5763         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5764                         CTLFLAG_RD, &stats->lxonrxc,
5765                         "Link XON Received");
5766         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5767                         CTLFLAG_RD, &stats->lxofftxc,
5768                         "Link XOFF Transmitted");
5769         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5770                         CTLFLAG_RD, &stats->lxoffrxc,
5771                         "Link XOFF Received");
5772
5773         /* Packet Reception Stats */
5774         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
5775                         CTLFLAG_RD, &stats->tor, 
5776                         "Total Octets Received"); 
5777         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
5778                         CTLFLAG_RD, &stats->gorc, 
5779                         "Good Octets Received"); 
5780         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
5781                         CTLFLAG_RD, &stats->tpr,
5782                         "Total Packets Received");
5783         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
5784                         CTLFLAG_RD, &stats->gprc,
5785                         "Good Packets Received");
5786         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
5787                         CTLFLAG_RD, &stats->mprc,
5788                         "Multicast Packets Received");
5789         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
5790                         CTLFLAG_RD, &stats->bprc,
5791                         "Broadcast Packets Received");
5792         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5793                         CTLFLAG_RD, &stats->prc64,
5794                         "64 byte frames received ");
5795         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5796                         CTLFLAG_RD, &stats->prc127,
5797                         "65-127 byte frames received");
5798         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5799                         CTLFLAG_RD, &stats->prc255,
5800                         "128-255 byte frames received");
5801         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5802                         CTLFLAG_RD, &stats->prc511,
5803                         "256-511 byte frames received");
5804         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5805                         CTLFLAG_RD, &stats->prc1023,
5806                         "512-1023 byte frames received");
5807         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5808                         CTLFLAG_RD, &stats->prc1522,
5809                         "1023-1522 byte frames received");
5810         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
5811                         CTLFLAG_RD, &stats->ruc,
5812                         "Receive Undersized");
5813         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5814                         CTLFLAG_RD, &stats->rfc,
5815                         "Fragmented Packets Received ");
5816         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
5817                         CTLFLAG_RD, &stats->roc,
5818                         "Oversized Packets Received");
5819         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
5820                         CTLFLAG_RD, &stats->rjc,
5821                         "Received Jabber");
5822         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
5823                         CTLFLAG_RD, &stats->mngprc,
5824                         "Management Packets Received");
5825         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
5826                         CTLFLAG_RD, &stats->mngptc,
5827                         "Management Packets Dropped");
5828         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
5829                         CTLFLAG_RD, &stats->xec,
5830                         "Checksum Errors");
5831
5832         /* Packet Transmission Stats */
5833         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5834                         CTLFLAG_RD, &stats->gotc, 
5835                         "Good Octets Transmitted"); 
5836         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5837                         CTLFLAG_RD, &stats->tpt,
5838                         "Total Packets Transmitted");
5839         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5840                         CTLFLAG_RD, &stats->gptc,
5841                         "Good Packets Transmitted");
5842         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5843                         CTLFLAG_RD, &stats->bptc,
5844                         "Broadcast Packets Transmitted");
5845         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5846                         CTLFLAG_RD, &stats->mptc,
5847                         "Multicast Packets Transmitted");
5848         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
5849                         CTLFLAG_RD, &stats->mngptc,
5850                         "Management Packets Transmitted");
5851         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5852                         CTLFLAG_RD, &stats->ptc64,
5853                         "64 byte frames transmitted ");
5854         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5855                         CTLFLAG_RD, &stats->ptc127,
5856                         "65-127 byte frames transmitted");
5857         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5858                         CTLFLAG_RD, &stats->ptc255,
5859                         "128-255 byte frames transmitted");
5860         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5861                         CTLFLAG_RD, &stats->ptc511,
5862                         "256-511 byte frames transmitted");
5863         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5864                         CTLFLAG_RD, &stats->ptc1023,
5865                         "512-1023 byte frames transmitted");
5866         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5867                         CTLFLAG_RD, &stats->ptc1522,
5868                         "1024-1522 byte frames transmitted");
5869 }
5870
5871 /*
5872 ** Set flow control using sysctl:
5873 ** Flow control values:
5874 **      0 - off
5875 **      1 - rx pause
5876 **      2 - tx pause
5877 **      3 - full
5878 */
5879 static int
5880 ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS)
5881 {
5882         int error, last;
5883         struct adapter *adapter = (struct adapter *) arg1;
5884
5885         last = adapter->fc;
5886         error = sysctl_handle_int(oidp, &adapter->fc, 0, req);
5887         if ((error) || (req->newptr == NULL))
5888                 return (error);
5889
5890         /* Don't bother if it's not changed */
5891         if (adapter->fc == last)
5892                 return (0);
5893
5894         switch (adapter->fc) {
5895                 case ixgbe_fc_rx_pause:
5896                 case ixgbe_fc_tx_pause:
5897                 case ixgbe_fc_full:
5898                         adapter->hw.fc.requested_mode = adapter->fc;
5899                         if (adapter->num_queues > 1)
5900                                 ixgbe_disable_rx_drop(adapter);
5901                         break;
5902                 case ixgbe_fc_none:
5903                         adapter->hw.fc.requested_mode = ixgbe_fc_none;
5904                         if (adapter->num_queues > 1)
5905                                 ixgbe_enable_rx_drop(adapter);
5906                         break;
5907                 default:
5908                         adapter->fc = last;
5909                         return (EINVAL);
5910         }
5911         /* Don't autoneg if forcing a value */
5912         adapter->hw.fc.disable_fc_autoneg = TRUE;
5913         ixgbe_fc_enable(&adapter->hw);
5914         return error;
5915 }
5916
5917 /*
5918 ** Control link advertise speed:
5919 **      1 - advertise only 1G
5920 **      2 - advertise 100Mb
5921 **      3 - advertise normal
5922 */
5923 static int
5924 ixgbe_set_advertise(SYSCTL_HANDLER_ARGS)
5925 {
5926         int                     error = 0;
5927         struct adapter          *adapter;
5928         device_t                dev;
5929         struct ixgbe_hw         *hw;
5930         ixgbe_link_speed        speed, last;
5931
5932         adapter = (struct adapter *) arg1;
5933         dev = adapter->dev;
5934         hw = &adapter->hw;
5935         last = adapter->advertise;
5936
5937         error = sysctl_handle_int(oidp, &adapter->advertise, 0, req);
5938         if ((error) || (req->newptr == NULL))
5939                 return (error);
5940
5941         if (adapter->advertise == last) /* no change */
5942                 return (0);
5943
5944         if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
5945             (hw->phy.multispeed_fiber)))
5946                 return (EINVAL);
5947
5948         if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
5949                 device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
5950                 return (EINVAL);
5951         }
5952
5953         if (adapter->advertise == 1)
5954                 speed = IXGBE_LINK_SPEED_1GB_FULL;
5955         else if (adapter->advertise == 2)
5956                 speed = IXGBE_LINK_SPEED_100_FULL;
5957         else if (adapter->advertise == 3)
5958                 speed = IXGBE_LINK_SPEED_1GB_FULL |
5959                         IXGBE_LINK_SPEED_10GB_FULL;
5960         else {  /* bogus value */
5961                 adapter->advertise = last;
5962                 return (EINVAL);
5963         }
5964
5965         hw->mac.autotry_restart = TRUE;
5966         hw->mac.ops.setup_link(hw, speed, TRUE);
5967
5968         return (error);
5969 }
5970
5971 /*
5972 ** Thermal Shutdown Trigger
5973 **   - cause a Thermal Overtemp IRQ
5974 **   - this now requires firmware enabling
5975 */
5976 static int
5977 ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS)
5978 {
5979         int             error, fire = 0;
5980         struct adapter  *adapter = (struct adapter *) arg1;
5981         struct ixgbe_hw *hw = &adapter->hw;
5982
5983
5984         if (hw->mac.type != ixgbe_mac_X540)
5985                 return (0);
5986
5987         error = sysctl_handle_int(oidp, &fire, 0, req);
5988         if ((error) || (req->newptr == NULL))
5989                 return (error);
5990
5991         if (fire) {
5992                 u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
5993                 reg |= IXGBE_EICR_TS;
5994                 IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
5995         }
5996
5997         return (0);
5998 }
5999
6000 /*
6001 ** Enable the hardware to drop packets when the buffer is
6002 ** full. This is useful when multiqueue,so that no single
6003 ** queue being full stalls the entire RX engine. We only
6004 ** enable this when Multiqueue AND when Flow Control is 
6005 ** disabled.
6006 */
6007 static void
6008 ixgbe_enable_rx_drop(struct adapter *adapter)
6009 {
6010         struct ixgbe_hw *hw = &adapter->hw;
6011
6012         for (int i = 0; i < adapter->num_queues; i++) {
6013                 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
6014                 srrctl |= IXGBE_SRRCTL_DROP_EN;
6015                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
6016         }
6017 }
6018
6019 static void
6020 ixgbe_disable_rx_drop(struct adapter *adapter)
6021 {
6022         struct ixgbe_hw *hw = &adapter->hw;
6023
6024         for (int i = 0; i < adapter->num_queues; i++) {
6025                 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
6026                 srrctl &= ~IXGBE_SRRCTL_DROP_EN;
6027                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
6028         }
6029 }