]> CyberLeo.Net >> Repos - FreeBSD/releng/9.2.git/blob - sys/dev/ixgbe/ixgbe.c
MFC r254008,254262: Improve the MSIX setup logic, making sure the requested
[FreeBSD/releng/9.2.git] / sys / dev / ixgbe / ixgbe.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2013, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38 #include "ixgbe.h"
39
40 /*********************************************************************
41  *  Set this to one to display debug statistics
42  *********************************************************************/
43 int             ixgbe_display_debug_stats = 0;
44
45 /*********************************************************************
46  *  Driver version
47  *********************************************************************/
48 char ixgbe_driver_version[] = "2.5.15";
49
50 /*********************************************************************
51  *  PCI Device ID Table
52  *
53  *  Used by probe to select devices to load on
54  *  Last field stores an index into ixgbe_strings
55  *  Last entry must be all 0s
56  *
57  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
58  *********************************************************************/
59
60 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
61 {
62         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
63         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
64         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
65         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
66         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
67         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
68         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
69         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
70         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
71         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
72         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
73         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
74         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
75         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
76         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
77         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
78         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
79         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
80         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
81         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
82         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
83         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
84         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0},
85         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
86         /* required last entry */
87         {0, 0, 0, 0, 0}
88 };
89
90 /*********************************************************************
91  *  Table of branding strings
92  *********************************************************************/
93
94 static char    *ixgbe_strings[] = {
95         "Intel(R) PRO/10GbE PCI-Express Network Driver"
96 };
97
98 /*********************************************************************
99  *  Function prototypes
100  *********************************************************************/
101 static int      ixgbe_probe(device_t);
102 static int      ixgbe_attach(device_t);
103 static int      ixgbe_detach(device_t);
104 static int      ixgbe_shutdown(device_t);
105 #ifdef IXGBE_LEGACY_TX
106 static void     ixgbe_start(struct ifnet *);
107 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
108 #else /* ! IXGBE_LEGACY_TX */
109 static int      ixgbe_mq_start(struct ifnet *, struct mbuf *);
110 static int      ixgbe_mq_start_locked(struct ifnet *, struct tx_ring *);
111 static void     ixgbe_qflush(struct ifnet *);
112 static void     ixgbe_deferred_mq_start(void *, int);
113 #endif /* IXGBE_LEGACY_TX */
114 static int      ixgbe_ioctl(struct ifnet *, u_long, caddr_t);
115 static void     ixgbe_init(void *);
116 static void     ixgbe_init_locked(struct adapter *);
117 static void     ixgbe_stop(void *);
118 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
119 static int      ixgbe_media_change(struct ifnet *);
120 static void     ixgbe_identify_hardware(struct adapter *);
121 static int      ixgbe_allocate_pci_resources(struct adapter *);
122 static void     ixgbe_get_slot_info(struct ixgbe_hw *);
123 static int      ixgbe_allocate_msix(struct adapter *);
124 static int      ixgbe_allocate_legacy(struct adapter *);
125 static int      ixgbe_allocate_queues(struct adapter *);
126 static int      ixgbe_setup_msix(struct adapter *);
127 static void     ixgbe_free_pci_resources(struct adapter *);
128 static void     ixgbe_local_timer(void *);
129 static int      ixgbe_setup_interface(device_t, struct adapter *);
130 static void     ixgbe_config_link(struct adapter *);
131
132 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
133 static int      ixgbe_setup_transmit_structures(struct adapter *);
134 static void     ixgbe_setup_transmit_ring(struct tx_ring *);
135 static void     ixgbe_initialize_transmit_units(struct adapter *);
136 static void     ixgbe_free_transmit_structures(struct adapter *);
137 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
138
139 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
140 static int      ixgbe_setup_receive_structures(struct adapter *);
141 static int      ixgbe_setup_receive_ring(struct rx_ring *);
142 static void     ixgbe_initialize_receive_units(struct adapter *);
143 static void     ixgbe_free_receive_structures(struct adapter *);
144 static void     ixgbe_free_receive_buffers(struct rx_ring *);
145 static void     ixgbe_setup_hw_rsc(struct rx_ring *);
146
147 static void     ixgbe_enable_intr(struct adapter *);
148 static void     ixgbe_disable_intr(struct adapter *);
149 static void     ixgbe_update_stats_counters(struct adapter *);
150 static void     ixgbe_txeof(struct tx_ring *);
151 static bool     ixgbe_rxeof(struct ix_queue *);
152 static void     ixgbe_rx_checksum(u32, struct mbuf *, u32);
153 static void     ixgbe_set_promisc(struct adapter *);
154 static void     ixgbe_set_multi(struct adapter *);
155 static void     ixgbe_update_link_status(struct adapter *);
156 static void     ixgbe_refresh_mbufs(struct rx_ring *, int);
157 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
158 static int      ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS);
159 static int      ixgbe_set_advertise(SYSCTL_HANDLER_ARGS);
160 static int      ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS);
161 static int      ixgbe_dma_malloc(struct adapter *, bus_size_t,
162                     struct ixgbe_dma_alloc *, int);
163 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
164 static int      ixgbe_tx_ctx_setup(struct tx_ring *,
165                     struct mbuf *, u32 *, u32 *);
166 static int      ixgbe_tso_setup(struct tx_ring *,
167                     struct mbuf *, u32 *, u32 *);
168 static void     ixgbe_set_ivar(struct adapter *, u8, u8, s8);
169 static void     ixgbe_configure_ivars(struct adapter *);
170 static u8 *     ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
171
172 static void     ixgbe_setup_vlan_hw_support(struct adapter *);
173 static void     ixgbe_register_vlan(void *, struct ifnet *, u16);
174 static void     ixgbe_unregister_vlan(void *, struct ifnet *, u16);
175
176 static void     ixgbe_add_hw_stats(struct adapter *adapter);
177
178 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
179 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
180                     struct mbuf *, u32);
181
182 static void     ixgbe_enable_rx_drop(struct adapter *);
183 static void     ixgbe_disable_rx_drop(struct adapter *);
184
185 /* Support for pluggable optic modules */
186 static bool     ixgbe_sfp_probe(struct adapter *);
187 static void     ixgbe_setup_optics(struct adapter *);
188
189 /* Legacy (single vector interrupt handler */
190 static void     ixgbe_legacy_irq(void *);
191
192 /* The MSI/X Interrupt handlers */
193 static void     ixgbe_msix_que(void *);
194 static void     ixgbe_msix_link(void *);
195
196 /* Deferred interrupt tasklets */
197 static void     ixgbe_handle_que(void *, int);
198 static void     ixgbe_handle_link(void *, int);
199 static void     ixgbe_handle_msf(void *, int);
200 static void     ixgbe_handle_mod(void *, int);
201
202 #ifdef IXGBE_FDIR
203 static void     ixgbe_atr(struct tx_ring *, struct mbuf *);
204 static void     ixgbe_reinit_fdir(void *, int);
205 #endif
206
207 /* Missing shared code prototype */
208 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw);
209
210 /*********************************************************************
211  *  FreeBSD Device Interface Entry Points
212  *********************************************************************/
213
214 static device_method_t ixgbe_methods[] = {
215         /* Device interface */
216         DEVMETHOD(device_probe, ixgbe_probe),
217         DEVMETHOD(device_attach, ixgbe_attach),
218         DEVMETHOD(device_detach, ixgbe_detach),
219         DEVMETHOD(device_shutdown, ixgbe_shutdown),
220         DEVMETHOD_END
221 };
222
223 static driver_t ixgbe_driver = {
224         "ix", ixgbe_methods, sizeof(struct adapter),
225 };
226
227 devclass_t ixgbe_devclass;
228 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
229
230 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
231 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
232
233 /*
234 ** TUNEABLE PARAMETERS:
235 */
236
237 /*
238 ** AIM: Adaptive Interrupt Moderation
239 ** which means that the interrupt rate
240 ** is varied over time based on the
241 ** traffic for that interrupt vector
242 */
243 static int ixgbe_enable_aim = TRUE;
244 TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
245
246 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
247 TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
248
249 /* How many packets rxeof tries to clean at a time */
250 static int ixgbe_rx_process_limit = 256;
251 TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
252
253 /* How many packets txeof tries to clean at a time */
254 static int ixgbe_tx_process_limit = 256;
255 TUNABLE_INT("hw.ixgbe.tx_process_limit", &ixgbe_tx_process_limit);
256
257 /*
258 ** Smart speed setting, default to on
259 ** this only works as a compile option
260 ** right now as its during attach, set
261 ** this to 'ixgbe_smart_speed_off' to
262 ** disable.
263 */
264 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
265
266 /*
267  * MSIX should be the default for best performance,
268  * but this allows it to be forced off for testing.
269  */
270 static int ixgbe_enable_msix = 1;
271 TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
272
273 /*
274  * Number of Queues, can be set to 0,
275  * it then autoconfigures based on the
276  * number of cpus with a max of 8. This
277  * can be overriden manually here.
278  */
279 static int ixgbe_num_queues = 0;
280 TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
281
282 /*
283 ** Number of TX descriptors per ring,
284 ** setting higher than RX as this seems
285 ** the better performing choice.
286 */
287 static int ixgbe_txd = PERFORM_TXD;
288 TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd);
289
290 /* Number of RX descriptors per ring */
291 static int ixgbe_rxd = PERFORM_RXD;
292 TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd);
293
294 /*
295 ** Defining this on will allow the use
296 ** of unsupported SFP+ modules, note that
297 ** doing so you are on your own :)
298 */
299 static int allow_unsupported_sfp = FALSE;
300 TUNABLE_INT("hw.ixgbe.unsupported_sfp", &allow_unsupported_sfp);
301
302 /*
303 ** HW RSC control: 
304 **  this feature only works with
305 **  IPv4, and only on 82599 and later.
306 **  Also this will cause IP forwarding to
307 **  fail and that can't be controlled by
308 **  the stack as LRO can. For all these
309 **  reasons I've deemed it best to leave
310 **  this off and not bother with a tuneable
311 **  interface, this would need to be compiled
312 **  to enable.
313 */
314 static bool ixgbe_rsc_enable = FALSE;
315
316 /* Keep running tab on them for sanity check */
317 static int ixgbe_total_ports;
318
319 #ifdef IXGBE_FDIR
320 /*
321 ** For Flow Director: this is the
322 ** number of TX packets we sample
323 ** for the filter pool, this means
324 ** every 20th packet will be probed.
325 **
326 ** This feature can be disabled by 
327 ** setting this to 0.
328 */
329 static int atr_sample_rate = 20;
330 /* 
331 ** Flow Director actually 'steals'
332 ** part of the packet buffer as its
333 ** filter pool, this variable controls
334 ** how much it uses:
335 **  0 = 64K, 1 = 128K, 2 = 256K
336 */
337 static int fdir_pballoc = 1;
338 #endif
339
340 #ifdef DEV_NETMAP
341 /*
342  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
343  * be a reference on how to implement netmap support in a driver.
344  * Additional comments are in ixgbe_netmap.h .
345  *
346  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
347  * that extend the standard driver.
348  */
349 #include <dev/netmap/ixgbe_netmap.h>
350 #endif /* DEV_NETMAP */
351
352 /*********************************************************************
353  *  Device identification routine
354  *
355  *  ixgbe_probe determines if the driver should be loaded on
356  *  adapter based on PCI vendor/device id of the adapter.
357  *
358  *  return BUS_PROBE_DEFAULT on success, positive on failure
359  *********************************************************************/
360
361 static int
362 ixgbe_probe(device_t dev)
363 {
364         ixgbe_vendor_info_t *ent;
365
366         u16     pci_vendor_id = 0;
367         u16     pci_device_id = 0;
368         u16     pci_subvendor_id = 0;
369         u16     pci_subdevice_id = 0;
370         char    adapter_name[256];
371
372         INIT_DEBUGOUT("ixgbe_probe: begin");
373
374         pci_vendor_id = pci_get_vendor(dev);
375         if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID)
376                 return (ENXIO);
377
378         pci_device_id = pci_get_device(dev);
379         pci_subvendor_id = pci_get_subvendor(dev);
380         pci_subdevice_id = pci_get_subdevice(dev);
381
382         ent = ixgbe_vendor_info_array;
383         while (ent->vendor_id != 0) {
384                 if ((pci_vendor_id == ent->vendor_id) &&
385                     (pci_device_id == ent->device_id) &&
386
387                     ((pci_subvendor_id == ent->subvendor_id) ||
388                      (ent->subvendor_id == 0)) &&
389
390                     ((pci_subdevice_id == ent->subdevice_id) ||
391                      (ent->subdevice_id == 0))) {
392                         sprintf(adapter_name, "%s, Version - %s",
393                                 ixgbe_strings[ent->index],
394                                 ixgbe_driver_version);
395                         device_set_desc_copy(dev, adapter_name);
396                         ++ixgbe_total_ports;
397                         return (BUS_PROBE_DEFAULT);
398                 }
399                 ent++;
400         }
401         return (ENXIO);
402 }
403
404 /*********************************************************************
405  *  Device initialization routine
406  *
407  *  The attach entry point is called when the driver is being loaded.
408  *  This routine identifies the type of hardware, allocates all resources
409  *  and initializes the hardware.
410  *
411  *  return 0 on success, positive on failure
412  *********************************************************************/
413
414 static int
415 ixgbe_attach(device_t dev)
416 {
417         struct adapter *adapter;
418         struct ixgbe_hw *hw;
419         int             error = 0;
420         u16             csum;
421         u32             ctrl_ext;
422
423         INIT_DEBUGOUT("ixgbe_attach: begin");
424
425         /* Allocate, clear, and link in our adapter structure */
426         adapter = device_get_softc(dev);
427         adapter->dev = adapter->osdep.dev = dev;
428         hw = &adapter->hw;
429
430         /* Core Lock Init*/
431         IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
432
433         /* SYSCTL APIs */
434
435         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
436                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
437                         OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW,
438                         adapter, 0, ixgbe_set_flowcntl, "I", "Flow Control");
439
440         SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
441                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
442                         OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
443                         &ixgbe_enable_aim, 1, "Interrupt Moderation");
444
445         /*
446         ** Allow a kind of speed control by forcing the autoneg
447         ** advertised speed list to only a certain value, this
448         ** supports 1G on 82599 devices, and 100Mb on x540.
449         */
450         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
451                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
452                         OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW,
453                         adapter, 0, ixgbe_set_advertise, "I", "Link Speed");
454
455         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
456                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
457                         OID_AUTO, "ts", CTLTYPE_INT | CTLFLAG_RW, adapter,
458                         0, ixgbe_set_thermal_test, "I", "Thermal Test");
459
460         /* Set up the timer callout */
461         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
462
463         /* Determine hardware revision */
464         ixgbe_identify_hardware(adapter);
465
466         /* Do base PCI setup - map BAR0 */
467         if (ixgbe_allocate_pci_resources(adapter)) {
468                 device_printf(dev, "Allocation of PCI resources failed\n");
469                 error = ENXIO;
470                 goto err_out;
471         }
472
473         /* Do descriptor calc and sanity checks */
474         if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
475             ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
476                 device_printf(dev, "TXD config issue, using default!\n");
477                 adapter->num_tx_desc = DEFAULT_TXD;
478         } else
479                 adapter->num_tx_desc = ixgbe_txd;
480
481         /*
482         ** With many RX rings it is easy to exceed the
483         ** system mbuf allocation. Tuning nmbclusters
484         ** can alleviate this.
485         */
486         if (nmbclusters > 0 ) {
487                 int s;
488                 s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
489                 if (s > nmbclusters) {
490                         device_printf(dev, "RX Descriptors exceed "
491                             "system mbuf max, using default instead!\n");
492                         ixgbe_rxd = DEFAULT_RXD;
493                 }
494         }
495
496         if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
497             ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) {
498                 device_printf(dev, "RXD config issue, using default!\n");
499                 adapter->num_rx_desc = DEFAULT_RXD;
500         } else
501                 adapter->num_rx_desc = ixgbe_rxd;
502
503         /* Allocate our TX/RX Queues */
504         if (ixgbe_allocate_queues(adapter)) {
505                 error = ENOMEM;
506                 goto err_out;
507         }
508
509         /* Allocate multicast array memory. */
510         adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
511             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
512         if (adapter->mta == NULL) {
513                 device_printf(dev, "Can not allocate multicast setup array\n");
514                 error = ENOMEM;
515                 goto err_late;
516         }
517
518         /* Initialize the shared code */
519         hw->allow_unsupported_sfp = allow_unsupported_sfp;
520         error = ixgbe_init_shared_code(hw);
521         if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
522                 /*
523                 ** No optics in this port, set up
524                 ** so the timer routine will probe 
525                 ** for later insertion.
526                 */
527                 adapter->sfp_probe = TRUE;
528                 error = 0;
529         } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
530                 device_printf(dev,"Unsupported SFP+ module detected!\n");
531                 error = EIO;
532                 goto err_late;
533         } else if (error) {
534                 device_printf(dev,"Unable to initialize the shared code\n");
535                 error = EIO;
536                 goto err_late;
537         }
538
539         /* Make sure we have a good EEPROM before we read from it */
540         if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
541                 device_printf(dev,"The EEPROM Checksum Is Not Valid\n");
542                 error = EIO;
543                 goto err_late;
544         }
545
546         error = ixgbe_init_hw(hw);
547         switch (error) {
548         case IXGBE_ERR_EEPROM_VERSION:
549                 device_printf(dev, "This device is a pre-production adapter/"
550                     "LOM.  Please be aware there may be issues associated "
551                     "with your hardware.\n If you are experiencing problems "
552                     "please contact your Intel or hardware representative "
553                     "who provided you with this hardware.\n");
554                 break;
555         case IXGBE_ERR_SFP_NOT_SUPPORTED:
556                 device_printf(dev,"Unsupported SFP+ Module\n");
557                 error = EIO;
558                 goto err_late;
559         case IXGBE_ERR_SFP_NOT_PRESENT:
560                 device_printf(dev,"No SFP+ Module found\n");
561                 /* falls thru */
562         default:
563                 break;
564         }
565
566         /* Detect and set physical type */
567         ixgbe_setup_optics(adapter);
568
569         if ((adapter->msix > 1) && (ixgbe_enable_msix))
570                 error = ixgbe_allocate_msix(adapter); 
571         else
572                 error = ixgbe_allocate_legacy(adapter); 
573         if (error) 
574                 goto err_late;
575
576         /* Setup OS specific network interface */
577         if (ixgbe_setup_interface(dev, adapter) != 0)
578                 goto err_late;
579
580         /* Initialize statistics */
581         ixgbe_update_stats_counters(adapter);
582
583         /* Register for VLAN events */
584         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
585             ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
586         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
587             ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
588
589         /*
590         ** Check PCIE slot type/speed/width
591         */
592         ixgbe_get_slot_info(hw);
593
594         /* Set an initial default flow control value */
595         adapter->fc =  ixgbe_fc_full;
596
597         /* let hardware know driver is loaded */
598         ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
599         ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
600         IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
601
602         ixgbe_add_hw_stats(adapter);
603
604 #ifdef DEV_NETMAP
605         ixgbe_netmap_attach(adapter);
606 #endif /* DEV_NETMAP */
607         INIT_DEBUGOUT("ixgbe_attach: end");
608         return (0);
609 err_late:
610         ixgbe_free_transmit_structures(adapter);
611         ixgbe_free_receive_structures(adapter);
612 err_out:
613         if (adapter->ifp != NULL)
614                 if_free(adapter->ifp);
615         ixgbe_free_pci_resources(adapter);
616         free(adapter->mta, M_DEVBUF);
617         return (error);
618
619 }
620
621 /*********************************************************************
622  *  Device removal routine
623  *
624  *  The detach entry point is called when the driver is being removed.
625  *  This routine stops the adapter and deallocates all the resources
626  *  that were allocated for driver operation.
627  *
628  *  return 0 on success, positive on failure
629  *********************************************************************/
630
631 static int
632 ixgbe_detach(device_t dev)
633 {
634         struct adapter *adapter = device_get_softc(dev);
635         struct ix_queue *que = adapter->queues;
636         struct tx_ring *txr = adapter->tx_rings;
637         u32     ctrl_ext;
638
639         INIT_DEBUGOUT("ixgbe_detach: begin");
640
641         /* Make sure VLANS are not using driver */
642         if (adapter->ifp->if_vlantrunk != NULL) {
643                 device_printf(dev,"Vlan in use, detach first\n");
644                 return (EBUSY);
645         }
646
647         IXGBE_CORE_LOCK(adapter);
648         ixgbe_stop(adapter);
649         IXGBE_CORE_UNLOCK(adapter);
650
651         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
652                 if (que->tq) {
653 #ifndef IXGBE_LEGACY_TX
654                         taskqueue_drain(que->tq, &txr->txq_task);
655 #endif
656                         taskqueue_drain(que->tq, &que->que_task);
657                         taskqueue_free(que->tq);
658                 }
659         }
660
661         /* Drain the Link queue */
662         if (adapter->tq) {
663                 taskqueue_drain(adapter->tq, &adapter->link_task);
664                 taskqueue_drain(adapter->tq, &adapter->mod_task);
665                 taskqueue_drain(adapter->tq, &adapter->msf_task);
666 #ifdef IXGBE_FDIR
667                 taskqueue_drain(adapter->tq, &adapter->fdir_task);
668 #endif
669                 taskqueue_free(adapter->tq);
670         }
671
672         /* let hardware know driver is unloading */
673         ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
674         ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
675         IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
676
677         /* Unregister VLAN events */
678         if (adapter->vlan_attach != NULL)
679                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
680         if (adapter->vlan_detach != NULL)
681                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
682
683         ether_ifdetach(adapter->ifp);
684         callout_drain(&adapter->timer);
685 #ifdef DEV_NETMAP
686         netmap_detach(adapter->ifp);
687 #endif /* DEV_NETMAP */
688         ixgbe_free_pci_resources(adapter);
689         bus_generic_detach(dev);
690         if_free(adapter->ifp);
691
692         ixgbe_free_transmit_structures(adapter);
693         ixgbe_free_receive_structures(adapter);
694         free(adapter->mta, M_DEVBUF);
695
696         IXGBE_CORE_LOCK_DESTROY(adapter);
697         return (0);
698 }
699
700 /*********************************************************************
701  *
702  *  Shutdown entry point
703  *
704  **********************************************************************/
705
706 static int
707 ixgbe_shutdown(device_t dev)
708 {
709         struct adapter *adapter = device_get_softc(dev);
710         IXGBE_CORE_LOCK(adapter);
711         ixgbe_stop(adapter);
712         IXGBE_CORE_UNLOCK(adapter);
713         return (0);
714 }
715
716
717 #ifdef IXGBE_LEGACY_TX
718 /*********************************************************************
719  *  Transmit entry point
720  *
721  *  ixgbe_start is called by the stack to initiate a transmit.
722  *  The driver will remain in this routine as long as there are
723  *  packets to transmit and transmit resources are available.
724  *  In case resources are not available stack is notified and
725  *  the packet is requeued.
726  **********************************************************************/
727
728 static void
729 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
730 {
731         struct mbuf    *m_head;
732         struct adapter *adapter = txr->adapter;
733
734         IXGBE_TX_LOCK_ASSERT(txr);
735
736         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
737                 return;
738         if (!adapter->link_active)
739                 return;
740
741         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
742                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
743                         break;
744
745                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
746                 if (m_head == NULL)
747                         break;
748
749                 if (ixgbe_xmit(txr, &m_head)) {
750                         if (m_head != NULL)
751                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
752                         break;
753                 }
754                 /* Send a copy of the frame to the BPF listener */
755                 ETHER_BPF_MTAP(ifp, m_head);
756
757                 /* Set watchdog on */
758                 txr->watchdog_time = ticks;
759                 txr->queue_status = IXGBE_QUEUE_WORKING;
760
761         }
762         return;
763 }
764
765 /*
766  * Legacy TX start - called by the stack, this
767  * always uses the first tx ring, and should
768  * not be used with multiqueue tx enabled.
769  */
770 static void
771 ixgbe_start(struct ifnet *ifp)
772 {
773         struct adapter *adapter = ifp->if_softc;
774         struct tx_ring  *txr = adapter->tx_rings;
775
776         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
777                 IXGBE_TX_LOCK(txr);
778                 ixgbe_start_locked(txr, ifp);
779                 IXGBE_TX_UNLOCK(txr);
780         }
781         return;
782 }
783
784 #else /* ! IXGBE_LEGACY_TX */
785
786 /*
787 ** Multiqueue Transmit driver
788 **
789 */
790 static int
791 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
792 {
793         struct adapter  *adapter = ifp->if_softc;
794         struct ix_queue *que;
795         struct tx_ring  *txr;
796         int             i, err = 0;
797
798         /* Which queue to use */
799         if ((m->m_flags & M_FLOWID) != 0)
800                 i = m->m_pkthdr.flowid % adapter->num_queues;
801         else
802                 i = curcpu % adapter->num_queues;
803
804         txr = &adapter->tx_rings[i];
805         que = &adapter->queues[i];
806
807         err = drbr_enqueue(ifp, txr->br, m);
808         if (err)
809                 return (err);
810         if (IXGBE_TX_TRYLOCK(txr)) {
811                 err = ixgbe_mq_start_locked(ifp, txr);
812                 IXGBE_TX_UNLOCK(txr);
813         } else
814                 taskqueue_enqueue(que->tq, &txr->txq_task);
815
816         return (err);
817 }
818
819 static int
820 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
821 {
822         struct adapter  *adapter = txr->adapter;
823         struct mbuf     *next;
824         int             enqueued = 0, err = 0;
825
826         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
827             adapter->link_active == 0)
828                 return (ENETDOWN);
829
830         /* Process the queue */
831 #if __FreeBSD_version < 901504
832         next = drbr_dequeue(ifp, txr->br);
833         while (next != NULL) {
834                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
835                         if (next != NULL)
836                                 err = drbr_enqueue(ifp, txr->br, next);
837 #else
838         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
839                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
840                         if (next == NULL) {
841                                 drbr_advance(ifp, txr->br);
842                         } else {
843                                 drbr_putback(ifp, txr->br, next);
844                         }
845 #endif
846                         break;
847                 }
848 #if __FreeBSD_version >= 901504
849                 drbr_advance(ifp, txr->br);
850 #endif
851                 enqueued++;
852                 /* Send a copy of the frame to the BPF listener */
853                 ETHER_BPF_MTAP(ifp, next);
854                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
855                         break;
856 #if __FreeBSD_version < 901504
857                 next = drbr_dequeue(ifp, txr->br);
858 #endif
859         }
860
861         if (enqueued > 0) {
862                 /* Set watchdog on */
863                 txr->queue_status = IXGBE_QUEUE_WORKING;
864                 txr->watchdog_time = ticks;
865         }
866
867         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
868                 ixgbe_txeof(txr);
869
870         return (err);
871 }
872
873 /*
874  * Called from a taskqueue to drain queued transmit packets.
875  */
876 static void
877 ixgbe_deferred_mq_start(void *arg, int pending)
878 {
879         struct tx_ring *txr = arg;
880         struct adapter *adapter = txr->adapter;
881         struct ifnet *ifp = adapter->ifp;
882
883         IXGBE_TX_LOCK(txr);
884         if (!drbr_empty(ifp, txr->br))
885                 ixgbe_mq_start_locked(ifp, txr);
886         IXGBE_TX_UNLOCK(txr);
887 }
888
889 /*
890 ** Flush all ring buffers
891 */
892 static void
893 ixgbe_qflush(struct ifnet *ifp)
894 {
895         struct adapter  *adapter = ifp->if_softc;
896         struct tx_ring  *txr = adapter->tx_rings;
897         struct mbuf     *m;
898
899         for (int i = 0; i < adapter->num_queues; i++, txr++) {
900                 IXGBE_TX_LOCK(txr);
901                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
902                         m_freem(m);
903                 IXGBE_TX_UNLOCK(txr);
904         }
905         if_qflush(ifp);
906 }
907 #endif /* IXGBE_LEGACY_TX */
908
909 /*********************************************************************
910  *  Ioctl entry point
911  *
912  *  ixgbe_ioctl is called when the user wants to configure the
913  *  interface.
914  *
915  *  return 0 on success, positive on failure
916  **********************************************************************/
917
918 static int
919 ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data)
920 {
921         struct adapter  *adapter = ifp->if_softc;
922         struct ixgbe_hw *hw = &adapter->hw;
923         struct ifreq    *ifr = (struct ifreq *) data;
924 #if defined(INET) || defined(INET6)
925         struct ifaddr *ifa = (struct ifaddr *)data;
926         bool            avoid_reset = FALSE;
927 #endif
928         int             error = 0;
929
930         switch (command) {
931
932         case SIOCSIFADDR:
933 #ifdef INET
934                 if (ifa->ifa_addr->sa_family == AF_INET)
935                         avoid_reset = TRUE;
936 #endif
937 #ifdef INET6
938                 if (ifa->ifa_addr->sa_family == AF_INET6)
939                         avoid_reset = TRUE;
940 #endif
941 #if defined(INET) || defined(INET6)
942                 /*
943                 ** Calling init results in link renegotiation,
944                 ** so we avoid doing it when possible.
945                 */
946                 if (avoid_reset) {
947                         ifp->if_flags |= IFF_UP;
948                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
949                                 ixgbe_init(adapter);
950                         if (!(ifp->if_flags & IFF_NOARP))
951                                 arp_ifinit(ifp, ifa);
952                 } else
953                         error = ether_ioctl(ifp, command, data);
954 #endif
955                 break;
956         case SIOCSIFMTU:
957                 IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
958                 if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
959                         error = EINVAL;
960                 } else {
961                         IXGBE_CORE_LOCK(adapter);
962                         ifp->if_mtu = ifr->ifr_mtu;
963                         adapter->max_frame_size =
964                                 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
965                         ixgbe_init_locked(adapter);
966                         IXGBE_CORE_UNLOCK(adapter);
967                 }
968                 break;
969         case SIOCSIFFLAGS:
970                 IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
971                 IXGBE_CORE_LOCK(adapter);
972                 if (ifp->if_flags & IFF_UP) {
973                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
974                                 if ((ifp->if_flags ^ adapter->if_flags) &
975                                     (IFF_PROMISC | IFF_ALLMULTI)) {
976                                         ixgbe_set_promisc(adapter);
977                                 }
978                         } else
979                                 ixgbe_init_locked(adapter);
980                 } else
981                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
982                                 ixgbe_stop(adapter);
983                 adapter->if_flags = ifp->if_flags;
984                 IXGBE_CORE_UNLOCK(adapter);
985                 break;
986         case SIOCADDMULTI:
987         case SIOCDELMULTI:
988                 IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
989                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
990                         IXGBE_CORE_LOCK(adapter);
991                         ixgbe_disable_intr(adapter);
992                         ixgbe_set_multi(adapter);
993                         ixgbe_enable_intr(adapter);
994                         IXGBE_CORE_UNLOCK(adapter);
995                 }
996                 break;
997         case SIOCSIFMEDIA:
998         case SIOCGIFMEDIA:
999                 IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
1000                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1001                 break;
1002         case SIOCSIFCAP:
1003         {
1004                 int mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1005                 IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
1006                 if (mask & IFCAP_HWCSUM)
1007                         ifp->if_capenable ^= IFCAP_HWCSUM;
1008                 if (mask & IFCAP_TSO4)
1009                         ifp->if_capenable ^= IFCAP_TSO4;
1010                 if (mask & IFCAP_TSO6)
1011                         ifp->if_capenable ^= IFCAP_TSO6;
1012                 if (mask & IFCAP_LRO)
1013                         ifp->if_capenable ^= IFCAP_LRO;
1014                 if (mask & IFCAP_VLAN_HWTAGGING)
1015                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1016                 if (mask & IFCAP_VLAN_HWFILTER)
1017                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1018                 if (mask & IFCAP_VLAN_HWTSO)
1019                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1020                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1021                         IXGBE_CORE_LOCK(adapter);
1022                         ixgbe_init_locked(adapter);
1023                         IXGBE_CORE_UNLOCK(adapter);
1024                 }
1025                 VLAN_CAPABILITIES(ifp);
1026                 break;
1027         }
1028         case SIOCGI2C:
1029         {
1030                 struct ixgbe_i2c_req    i2c;
1031                 IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)");
1032                 error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
1033                 if (error)
1034                         break;
1035                 if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
1036                         error = EINVAL;
1037                         break;
1038                 }
1039                 hw->phy.ops.read_i2c_byte(hw, i2c.offset,
1040                     i2c.dev_addr, i2c.data);
1041                 error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
1042                 break;
1043         }
1044         default:
1045                 IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
1046                 error = ether_ioctl(ifp, command, data);
1047                 break;
1048         }
1049
1050         return (error);
1051 }
1052
1053 /*********************************************************************
1054  *  Init entry point
1055  *
1056  *  This routine is used in two ways. It is used by the stack as
1057  *  init entry point in network interface structure. It is also used
1058  *  by the driver as a hw/sw initialization routine to get to a
1059  *  consistent state.
1060  *
1061  *  return 0 on success, positive on failure
1062  **********************************************************************/
1063 #define IXGBE_MHADD_MFS_SHIFT 16
1064
1065 static void
1066 ixgbe_init_locked(struct adapter *adapter)
1067 {
1068         struct ifnet   *ifp = adapter->ifp;
1069         device_t        dev = adapter->dev;
1070         struct ixgbe_hw *hw = &adapter->hw;
1071         u32             k, txdctl, mhadd, gpie;
1072         u32             rxdctl, rxctrl;
1073
1074         mtx_assert(&adapter->core_mtx, MA_OWNED);
1075         INIT_DEBUGOUT("ixgbe_init_locked: begin");
1076         hw->adapter_stopped = FALSE;
1077         ixgbe_stop_adapter(hw);
1078         callout_stop(&adapter->timer);
1079
1080         /* reprogram the RAR[0] in case user changed it. */
1081         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
1082
1083         /* Get the latest mac address, User can use a LAA */
1084         bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr,
1085               IXGBE_ETH_LENGTH_OF_ADDRESS);
1086         ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
1087         hw->addr_ctrl.rar_used_count = 1;
1088
1089         /* Set the various hardware offload abilities */
1090         ifp->if_hwassist = 0;
1091         if (ifp->if_capenable & IFCAP_TSO)
1092                 ifp->if_hwassist |= CSUM_TSO;
1093         if (ifp->if_capenable & IFCAP_TXCSUM) {
1094                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1095 #if __FreeBSD_version >= 800000
1096                 if (hw->mac.type != ixgbe_mac_82598EB)
1097                         ifp->if_hwassist |= CSUM_SCTP;
1098 #endif
1099         }
1100
1101         /* Prepare transmit descriptors and buffers */
1102         if (ixgbe_setup_transmit_structures(adapter)) {
1103                 device_printf(dev,"Could not setup transmit structures\n");
1104                 ixgbe_stop(adapter);
1105                 return;
1106         }
1107
1108         ixgbe_init_hw(hw);
1109         ixgbe_initialize_transmit_units(adapter);
1110
1111         /* Setup Multicast table */
1112         ixgbe_set_multi(adapter);
1113
1114         /*
1115         ** Determine the correct mbuf pool
1116         ** for doing jumbo frames
1117         */
1118         if (adapter->max_frame_size <= 2048)
1119                 adapter->rx_mbuf_sz = MCLBYTES;
1120         else if (adapter->max_frame_size <= 4096)
1121                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1122         else if (adapter->max_frame_size <= 9216)
1123                 adapter->rx_mbuf_sz = MJUM9BYTES;
1124         else
1125                 adapter->rx_mbuf_sz = MJUM16BYTES;
1126
1127         /* Prepare receive descriptors and buffers */
1128         if (ixgbe_setup_receive_structures(adapter)) {
1129                 device_printf(dev,"Could not setup receive structures\n");
1130                 ixgbe_stop(adapter);
1131                 return;
1132         }
1133
1134         /* Configure RX settings */
1135         ixgbe_initialize_receive_units(adapter);
1136
1137         gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
1138
1139         /* Enable Fan Failure Interrupt */
1140         gpie |= IXGBE_SDP1_GPIEN;
1141
1142         /* Add for Module detection */
1143         if (hw->mac.type == ixgbe_mac_82599EB)
1144                 gpie |= IXGBE_SDP2_GPIEN;
1145
1146         /* Thermal Failure Detection */
1147         if (hw->mac.type == ixgbe_mac_X540)
1148                 gpie |= IXGBE_SDP0_GPIEN;
1149
1150         if (adapter->msix > 1) {
1151                 /* Enable Enhanced MSIX mode */
1152                 gpie |= IXGBE_GPIE_MSIX_MODE;
1153                 gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
1154                     IXGBE_GPIE_OCD;
1155         }
1156         IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
1157
1158         /* Set MTU size */
1159         if (ifp->if_mtu > ETHERMTU) {
1160                 mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
1161                 mhadd &= ~IXGBE_MHADD_MFS_MASK;
1162                 mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
1163                 IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
1164         }
1165         
1166         /* Now enable all the queues */
1167
1168         for (int i = 0; i < adapter->num_queues; i++) {
1169                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
1170                 txdctl |= IXGBE_TXDCTL_ENABLE;
1171                 /* Set WTHRESH to 8, burst writeback */
1172                 txdctl |= (8 << 16);
1173                 /*
1174                  * When the internal queue falls below PTHRESH (32),
1175                  * start prefetching as long as there are at least
1176                  * HTHRESH (1) buffers ready. The values are taken
1177                  * from the Intel linux driver 3.8.21.
1178                  * Prefetching enables tx line rate even with 1 queue.
1179                  */
1180                 txdctl |= (32 << 0) | (1 << 8);
1181                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
1182         }
1183
1184         for (int i = 0; i < adapter->num_queues; i++) {
1185                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
1186                 if (hw->mac.type == ixgbe_mac_82598EB) {
1187                         /*
1188                         ** PTHRESH = 21
1189                         ** HTHRESH = 4
1190                         ** WTHRESH = 8
1191                         */
1192                         rxdctl &= ~0x3FFFFF;
1193                         rxdctl |= 0x080420;
1194                 }
1195                 rxdctl |= IXGBE_RXDCTL_ENABLE;
1196                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
1197                 for (k = 0; k < 10; k++) {
1198                         if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
1199                             IXGBE_RXDCTL_ENABLE)
1200                                 break;
1201                         else
1202                                 msec_delay(1);
1203                 }
1204                 wmb();
1205 #ifdef DEV_NETMAP
1206                 /*
1207                  * In netmap mode, we must preserve the buffers made
1208                  * available to userspace before the if_init()
1209                  * (this is true by default on the TX side, because
1210                  * init makes all buffers available to userspace).
1211                  *
1212                  * netmap_reset() and the device specific routines
1213                  * (e.g. ixgbe_setup_receive_rings()) map these
1214                  * buffers at the end of the NIC ring, so here we
1215                  * must set the RDT (tail) register to make sure
1216                  * they are not overwritten.
1217                  *
1218                  * In this driver the NIC ring starts at RDH = 0,
1219                  * RDT points to the last slot available for reception (?),
1220                  * so RDT = num_rx_desc - 1 means the whole ring is available.
1221                  */
1222                 if (ifp->if_capenable & IFCAP_NETMAP) {
1223                         struct netmap_adapter *na = NA(adapter->ifp);
1224                         struct netmap_kring *kring = &na->rx_rings[i];
1225                         int t = na->num_rx_desc - 1 - kring->nr_hwavail;
1226
1227                         IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
1228                 } else
1229 #endif /* DEV_NETMAP */
1230                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
1231         }
1232
1233         /* Set up VLAN support and filter */
1234         ixgbe_setup_vlan_hw_support(adapter);
1235
1236         /* Enable Receive engine */
1237         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1238         if (hw->mac.type == ixgbe_mac_82598EB)
1239                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
1240         rxctrl |= IXGBE_RXCTRL_RXEN;
1241         ixgbe_enable_rx_dma(hw, rxctrl);
1242
1243         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
1244
1245         /* Set up MSI/X routing */
1246         if (ixgbe_enable_msix)  {
1247                 ixgbe_configure_ivars(adapter);
1248                 /* Set up auto-mask */
1249                 if (hw->mac.type == ixgbe_mac_82598EB)
1250                         IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1251                 else {
1252                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1253                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1254                 }
1255         } else {  /* Simple settings for Legacy/MSI */
1256                 ixgbe_set_ivar(adapter, 0, 0, 0);
1257                 ixgbe_set_ivar(adapter, 0, 0, 1);
1258                 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1259         }
1260
1261 #ifdef IXGBE_FDIR
1262         /* Init Flow director */
1263         if (hw->mac.type != ixgbe_mac_82598EB) {
1264                 u32 hdrm = 32 << fdir_pballoc;
1265
1266                 hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
1267                 ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
1268         }
1269 #endif
1270
1271         /*
1272         ** Check on any SFP devices that
1273         ** need to be kick-started
1274         */
1275         if (hw->phy.type == ixgbe_phy_none) {
1276                 int err = hw->phy.ops.identify(hw);
1277                 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1278                         device_printf(dev,
1279                             "Unsupported SFP+ module type was detected.\n");
1280                         return;
1281                 }
1282         }
1283
1284         /* Set moderation on the Link interrupt */
1285         IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
1286
1287         /* Config/Enable Link */
1288         ixgbe_config_link(adapter);
1289
1290         /* Hardware Packet Buffer & Flow Control setup */
1291         {
1292                 u32 rxpb, frame, size, tmp;
1293
1294                 frame = adapter->max_frame_size;
1295
1296                 /* Calculate High Water */
1297                 if (hw->mac.type == ixgbe_mac_X540)
1298                         tmp = IXGBE_DV_X540(frame, frame);
1299                 else
1300                         tmp = IXGBE_DV(frame, frame);
1301                 size = IXGBE_BT2KB(tmp);
1302                 rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
1303                 hw->fc.high_water[0] = rxpb - size;
1304
1305                 /* Now calculate Low Water */
1306                 if (hw->mac.type == ixgbe_mac_X540)
1307                         tmp = IXGBE_LOW_DV_X540(frame);
1308                 else
1309                         tmp = IXGBE_LOW_DV(frame);
1310                 hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1311                 
1312                 hw->fc.requested_mode = adapter->fc;
1313                 hw->fc.pause_time = IXGBE_FC_PAUSE;
1314                 hw->fc.send_xon = TRUE;
1315         }
1316         /* Initialize the FC settings */
1317         ixgbe_start_hw(hw);
1318
1319         /* And now turn on interrupts */
1320         ixgbe_enable_intr(adapter);
1321
1322         /* Now inform the stack we're ready */
1323         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1324
1325         return;
1326 }
1327
1328 static void
1329 ixgbe_init(void *arg)
1330 {
1331         struct adapter *adapter = arg;
1332
1333         IXGBE_CORE_LOCK(adapter);
1334         ixgbe_init_locked(adapter);
1335         IXGBE_CORE_UNLOCK(adapter);
1336         return;
1337 }
1338
1339
1340 /*
1341 **
1342 ** MSIX Interrupt Handlers and Tasklets
1343 **
1344 */
1345
1346 static inline void
1347 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
1348 {
1349         struct ixgbe_hw *hw = &adapter->hw;
1350         u64     queue = (u64)(1 << vector);
1351         u32     mask;
1352
1353         if (hw->mac.type == ixgbe_mac_82598EB) {
1354                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1355                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
1356         } else {
1357                 mask = (queue & 0xFFFFFFFF);
1358                 if (mask)
1359                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
1360                 mask = (queue >> 32);
1361                 if (mask)
1362                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
1363         }
1364 }
1365
1366 static inline void
1367 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
1368 {
1369         struct ixgbe_hw *hw = &adapter->hw;
1370         u64     queue = (u64)(1 << vector);
1371         u32     mask;
1372
1373         if (hw->mac.type == ixgbe_mac_82598EB) {
1374                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1375                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
1376         } else {
1377                 mask = (queue & 0xFFFFFFFF);
1378                 if (mask)
1379                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
1380                 mask = (queue >> 32);
1381                 if (mask)
1382                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
1383         }
1384 }
1385
1386 static void
1387 ixgbe_handle_que(void *context, int pending)
1388 {
1389         struct ix_queue *que = context;
1390         struct adapter  *adapter = que->adapter;
1391         struct tx_ring  *txr = que->txr;
1392         struct ifnet    *ifp = adapter->ifp;
1393         bool            more;
1394
1395         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1396                 more = ixgbe_rxeof(que);
1397                 IXGBE_TX_LOCK(txr);
1398                 ixgbe_txeof(txr);
1399 #ifndef IXGBE_LEGACY_TX
1400                 if (!drbr_empty(ifp, txr->br))
1401                         ixgbe_mq_start_locked(ifp, txr);
1402 #else
1403                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1404                         ixgbe_start_locked(txr, ifp);
1405 #endif
1406                 IXGBE_TX_UNLOCK(txr);
1407         }
1408
1409         /* Reenable this interrupt */
1410         if (que->res != NULL)
1411                 ixgbe_enable_queue(adapter, que->msix);
1412         else
1413                 ixgbe_enable_intr(adapter);
1414         return;
1415 }
1416
1417
1418 /*********************************************************************
1419  *
1420  *  Legacy Interrupt Service routine
1421  *
1422  **********************************************************************/
1423
1424 static void
1425 ixgbe_legacy_irq(void *arg)
1426 {
1427         struct ix_queue *que = arg;
1428         struct adapter  *adapter = que->adapter;
1429         struct ixgbe_hw *hw = &adapter->hw;
1430         struct ifnet    *ifp = adapter->ifp;
1431         struct          tx_ring *txr = adapter->tx_rings;
1432         bool            more;
1433         u32             reg_eicr;
1434
1435
1436         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1437
1438         ++que->irqs;
1439         if (reg_eicr == 0) {
1440                 ixgbe_enable_intr(adapter);
1441                 return;
1442         }
1443
1444         more = ixgbe_rxeof(que);
1445
1446         IXGBE_TX_LOCK(txr);
1447         ixgbe_txeof(txr);
1448 #ifdef IXGBE_LEGACY_TX
1449         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1450                 ixgbe_start_locked(txr, ifp);
1451 #else
1452         if (!drbr_empty(ifp, txr->br))
1453                 ixgbe_mq_start_locked(ifp, txr);
1454 #endif
1455         IXGBE_TX_UNLOCK(txr);
1456
1457         /* Check for fan failure */
1458         if ((hw->phy.media_type == ixgbe_media_type_copper) &&
1459             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1460                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1461                     "REPLACE IMMEDIATELY!!\n");
1462                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
1463         }
1464
1465         /* Link status change */
1466         if (reg_eicr & IXGBE_EICR_LSC)
1467                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1468
1469         if (more)
1470                 taskqueue_enqueue(que->tq, &que->que_task);
1471         else
1472                 ixgbe_enable_intr(adapter);
1473         return;
1474 }
1475
1476
1477 /*********************************************************************
1478  *
1479  *  MSIX Queue Interrupt Service routine
1480  *
1481  **********************************************************************/
1482 void
1483 ixgbe_msix_que(void *arg)
1484 {
1485         struct ix_queue *que = arg;
1486         struct adapter  *adapter = que->adapter;
1487         struct ifnet    *ifp = adapter->ifp;
1488         struct tx_ring  *txr = que->txr;
1489         struct rx_ring  *rxr = que->rxr;
1490         bool            more;
1491         u32             newitr = 0;
1492
1493         /* Protect against spurious interrupts */
1494         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1495                 return;
1496
1497         ixgbe_disable_queue(adapter, que->msix);
1498         ++que->irqs;
1499
1500         more = ixgbe_rxeof(que);
1501
1502         IXGBE_TX_LOCK(txr);
1503         ixgbe_txeof(txr);
1504 #ifdef IXGBE_LEGACY_TX
1505         if (!IFQ_DRV_IS_EMPTY(ifp->if_snd))
1506                 ixgbe_start_locked(txr, ifp);
1507 #else
1508         if (!drbr_empty(ifp, txr->br))
1509                 ixgbe_mq_start_locked(ifp, txr);
1510 #endif
1511         IXGBE_TX_UNLOCK(txr);
1512
1513         /* Do AIM now? */
1514
1515         if (ixgbe_enable_aim == FALSE)
1516                 goto no_calc;
1517         /*
1518         ** Do Adaptive Interrupt Moderation:
1519         **  - Write out last calculated setting
1520         **  - Calculate based on average size over
1521         **    the last interval.
1522         */
1523         if (que->eitr_setting)
1524                 IXGBE_WRITE_REG(&adapter->hw,
1525                     IXGBE_EITR(que->msix), que->eitr_setting);
1526  
1527         que->eitr_setting = 0;
1528
1529         /* Idle, do nothing */
1530         if ((txr->bytes == 0) && (rxr->bytes == 0))
1531                 goto no_calc;
1532                                 
1533         if ((txr->bytes) && (txr->packets))
1534                 newitr = txr->bytes/txr->packets;
1535         if ((rxr->bytes) && (rxr->packets))
1536                 newitr = max(newitr,
1537                     (rxr->bytes / rxr->packets));
1538         newitr += 24; /* account for hardware frame, crc */
1539
1540         /* set an upper boundary */
1541         newitr = min(newitr, 3000);
1542
1543         /* Be nice to the mid range */
1544         if ((newitr > 300) && (newitr < 1200))
1545                 newitr = (newitr / 3);
1546         else
1547                 newitr = (newitr / 2);
1548
1549         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
1550                 newitr |= newitr << 16;
1551         else
1552                 newitr |= IXGBE_EITR_CNT_WDIS;
1553                  
1554         /* save for next interrupt */
1555         que->eitr_setting = newitr;
1556
1557         /* Reset state */
1558         txr->bytes = 0;
1559         txr->packets = 0;
1560         rxr->bytes = 0;
1561         rxr->packets = 0;
1562
1563 no_calc:
1564         if (more)
1565                 taskqueue_enqueue(que->tq, &que->que_task);
1566         else
1567                 ixgbe_enable_queue(adapter, que->msix);
1568         return;
1569 }
1570
1571
1572 static void
1573 ixgbe_msix_link(void *arg)
1574 {
1575         struct adapter  *adapter = arg;
1576         struct ixgbe_hw *hw = &adapter->hw;
1577         u32             reg_eicr;
1578
1579         ++adapter->link_irq;
1580
1581         /* First get the cause */
1582         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
1583         /* Be sure the queue bits are not cleared */
1584         reg_eicr &= ~IXGBE_EICR_RTX_QUEUE;
1585         /* Clear interrupt with write */
1586         IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
1587
1588         /* Link status change */
1589         if (reg_eicr & IXGBE_EICR_LSC)
1590                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1591
1592         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
1593 #ifdef IXGBE_FDIR
1594                 if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
1595                         /* This is probably overkill :) */
1596                         if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
1597                                 return;
1598                         /* Disable the interrupt */
1599                         IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
1600                         taskqueue_enqueue(adapter->tq, &adapter->fdir_task);
1601                 } else
1602 #endif
1603                 if (reg_eicr & IXGBE_EICR_ECC) {
1604                         device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
1605                             "Please Reboot!!\n");
1606                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
1607                 } else
1608
1609                 if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
1610                         /* Clear the interrupt */
1611                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1612                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
1613                 } else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
1614                         /* Clear the interrupt */
1615                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
1616                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
1617                 }
1618         } 
1619
1620         /* Check for fan failure */
1621         if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
1622             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1623                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1624                     "REPLACE IMMEDIATELY!!\n");
1625                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1626         }
1627
1628         /* Check for over temp condition */
1629         if ((hw->mac.type == ixgbe_mac_X540) &&
1630             (reg_eicr & IXGBE_EICR_TS)) {
1631                 device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
1632                     "PHY IS SHUT DOWN!!\n");
1633                 device_printf(adapter->dev, "System shutdown required\n");
1634                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
1635         }
1636
1637         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
1638         return;
1639 }
1640
1641 /*********************************************************************
1642  *
1643  *  Media Ioctl callback
1644  *
1645  *  This routine is called whenever the user queries the status of
1646  *  the interface using ifconfig.
1647  *
1648  **********************************************************************/
1649 static void
1650 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
1651 {
1652         struct adapter *adapter = ifp->if_softc;
1653
1654         INIT_DEBUGOUT("ixgbe_media_status: begin");
1655         IXGBE_CORE_LOCK(adapter);
1656         ixgbe_update_link_status(adapter);
1657
1658         ifmr->ifm_status = IFM_AVALID;
1659         ifmr->ifm_active = IFM_ETHER;
1660
1661         if (!adapter->link_active) {
1662                 IXGBE_CORE_UNLOCK(adapter);
1663                 return;
1664         }
1665
1666         ifmr->ifm_status |= IFM_ACTIVE;
1667
1668         switch (adapter->link_speed) {
1669                 case IXGBE_LINK_SPEED_100_FULL:
1670                         ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1671                         break;
1672                 case IXGBE_LINK_SPEED_1GB_FULL:
1673                         ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1674                         break;
1675                 case IXGBE_LINK_SPEED_10GB_FULL:
1676                         ifmr->ifm_active |= adapter->optics | IFM_FDX;
1677                         break;
1678         }
1679
1680         IXGBE_CORE_UNLOCK(adapter);
1681
1682         return;
1683 }
1684
1685 /*********************************************************************
1686  *
1687  *  Media Ioctl callback
1688  *
1689  *  This routine is called when the user changes speed/duplex using
1690  *  media/mediopt option with ifconfig.
1691  *
1692  **********************************************************************/
1693 static int
1694 ixgbe_media_change(struct ifnet * ifp)
1695 {
1696         struct adapter *adapter = ifp->if_softc;
1697         struct ifmedia *ifm = &adapter->media;
1698
1699         INIT_DEBUGOUT("ixgbe_media_change: begin");
1700
1701         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1702                 return (EINVAL);
1703
1704         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1705         case IFM_AUTO:
1706                 adapter->hw.phy.autoneg_advertised =
1707                     IXGBE_LINK_SPEED_100_FULL |
1708                     IXGBE_LINK_SPEED_1GB_FULL |
1709                     IXGBE_LINK_SPEED_10GB_FULL;
1710                 break;
1711         default:
1712                 device_printf(adapter->dev, "Only auto media type\n");
1713                 return (EINVAL);
1714         }
1715
1716         return (0);
1717 }
1718
1719 /*********************************************************************
1720  *
1721  *  This routine maps the mbufs to tx descriptors, allowing the
1722  *  TX engine to transmit the packets. 
1723  *      - return 0 on success, positive on failure
1724  *
1725  **********************************************************************/
1726
1727 static int
1728 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1729 {
1730         struct adapter  *adapter = txr->adapter;
1731         u32             olinfo_status = 0, cmd_type_len;
1732         int             i, j, error, nsegs;
1733         int             first;
1734         bool            remap = TRUE;
1735         struct mbuf     *m_head;
1736         bus_dma_segment_t segs[adapter->num_segs];
1737         bus_dmamap_t    map;
1738         struct ixgbe_tx_buf *txbuf;
1739         union ixgbe_adv_tx_desc *txd = NULL;
1740
1741         m_head = *m_headp;
1742
1743         /* Basic descriptor defines */
1744         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1745             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1746
1747         if (m_head->m_flags & M_VLANTAG)
1748                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1749
1750         /*
1751          * Important to capture the first descriptor
1752          * used because it will contain the index of
1753          * the one we tell the hardware to report back
1754          */
1755         first = txr->next_avail_desc;
1756         txbuf = &txr->tx_buffers[first];
1757         map = txbuf->map;
1758
1759         /*
1760          * Map the packet for DMA.
1761          */
1762 retry:
1763         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1764             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1765
1766         if (__predict_false(error)) {
1767                 struct mbuf *m;
1768
1769                 switch (error) {
1770                 case EFBIG:
1771                         /* Try it again? - one try */
1772                         if (remap == TRUE) {
1773                                 remap = FALSE;
1774                                 m = m_defrag(*m_headp, M_NOWAIT);
1775                                 if (m == NULL) {
1776                                         adapter->mbuf_defrag_failed++;
1777                                         m_freem(*m_headp);
1778                                         *m_headp = NULL;
1779                                         return (ENOBUFS);
1780                                 }
1781                                 *m_headp = m;
1782                                 goto retry;
1783                         } else
1784                                 return (error);
1785                 case ENOMEM:
1786                         txr->no_tx_dma_setup++;
1787                         return (error);
1788                 default:
1789                         txr->no_tx_dma_setup++;
1790                         m_freem(*m_headp);
1791                         *m_headp = NULL;
1792                         return (error);
1793                 }
1794         }
1795
1796         /* Make certain there are enough descriptors */
1797         if (nsegs > txr->tx_avail - 2) {
1798                 txr->no_desc_avail++;
1799                 bus_dmamap_unload(txr->txtag, map);
1800                 return (ENOBUFS);
1801         }
1802         m_head = *m_headp;
1803
1804         /*
1805         ** Set up the appropriate offload context
1806         ** this will consume the first descriptor
1807         */
1808         error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1809         if (__predict_false(error)) {
1810                 if (error == ENOBUFS)
1811                         *m_headp = NULL;
1812                 return (error);
1813         }
1814
1815 #ifdef IXGBE_FDIR
1816         /* Do the flow director magic */
1817         if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
1818                 ++txr->atr_count;
1819                 if (txr->atr_count >= atr_sample_rate) {
1820                         ixgbe_atr(txr, m_head);
1821                         txr->atr_count = 0;
1822                 }
1823         }
1824 #endif
1825
1826         i = txr->next_avail_desc;
1827         for (j = 0; j < nsegs; j++) {
1828                 bus_size_t seglen;
1829                 bus_addr_t segaddr;
1830
1831                 txbuf = &txr->tx_buffers[i];
1832                 txd = &txr->tx_base[i];
1833                 seglen = segs[j].ds_len;
1834                 segaddr = htole64(segs[j].ds_addr);
1835
1836                 txd->read.buffer_addr = segaddr;
1837                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
1838                     cmd_type_len |seglen);
1839                 txd->read.olinfo_status = htole32(olinfo_status);
1840
1841                 if (++i == txr->num_desc)
1842                         i = 0;
1843         }
1844
1845         txd->read.cmd_type_len |=
1846             htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
1847         txr->tx_avail -= nsegs;
1848         txr->next_avail_desc = i;
1849
1850         txbuf->m_head = m_head;
1851         /*
1852         ** Here we swap the map so the last descriptor,
1853         ** which gets the completion interrupt has the
1854         ** real map, and the first descriptor gets the
1855         ** unused map from this descriptor.
1856         */
1857         txr->tx_buffers[first].map = txbuf->map;
1858         txbuf->map = map;
1859         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1860
1861         /* Set the EOP descriptor that will be marked done */
1862         txbuf = &txr->tx_buffers[first];
1863         txbuf->eop = txd;
1864
1865         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1866             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1867         /*
1868          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1869          * hardware that this frame is available to transmit.
1870          */
1871         ++txr->total_packets;
1872         IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
1873
1874         return (0);
1875
1876 }
1877
1878 static void
1879 ixgbe_set_promisc(struct adapter *adapter)
1880 {
1881         u_int32_t       reg_rctl;
1882         struct ifnet   *ifp = adapter->ifp;
1883         int             mcnt = 0;
1884
1885         reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1886         reg_rctl &= (~IXGBE_FCTRL_UPE);
1887         if (ifp->if_flags & IFF_ALLMULTI)
1888                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
1889         else {
1890                 struct  ifmultiaddr *ifma;
1891 #if __FreeBSD_version < 800000
1892                 IF_ADDR_LOCK(ifp);
1893 #else
1894                 if_maddr_rlock(ifp);
1895 #endif
1896                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1897                         if (ifma->ifma_addr->sa_family != AF_LINK)
1898                                 continue;
1899                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1900                                 break;
1901                         mcnt++;
1902                 }
1903 #if __FreeBSD_version < 800000
1904                 IF_ADDR_UNLOCK(ifp);
1905 #else
1906                 if_maddr_runlock(ifp);
1907 #endif
1908         }
1909         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
1910                 reg_rctl &= (~IXGBE_FCTRL_MPE);
1911         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1912
1913         if (ifp->if_flags & IFF_PROMISC) {
1914                 reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1915                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1916         } else if (ifp->if_flags & IFF_ALLMULTI) {
1917                 reg_rctl |= IXGBE_FCTRL_MPE;
1918                 reg_rctl &= ~IXGBE_FCTRL_UPE;
1919                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1920         }
1921         return;
1922 }
1923
1924
1925 /*********************************************************************
1926  *  Multicast Update
1927  *
1928  *  This routine is called whenever multicast address list is updated.
1929  *
1930  **********************************************************************/
1931 #define IXGBE_RAR_ENTRIES 16
1932
1933 static void
1934 ixgbe_set_multi(struct adapter *adapter)
1935 {
1936         u32     fctrl;
1937         u8      *mta;
1938         u8      *update_ptr;
1939         struct  ifmultiaddr *ifma;
1940         int     mcnt = 0;
1941         struct ifnet   *ifp = adapter->ifp;
1942
1943         IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
1944
1945         mta = adapter->mta;
1946         bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
1947             MAX_NUM_MULTICAST_ADDRESSES);
1948
1949 #if __FreeBSD_version < 800000
1950         IF_ADDR_LOCK(ifp);
1951 #else
1952         if_maddr_rlock(ifp);
1953 #endif
1954         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1955                 if (ifma->ifma_addr->sa_family != AF_LINK)
1956                         continue;
1957                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1958                         break;
1959                 bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr),
1960                     &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1961                     IXGBE_ETH_LENGTH_OF_ADDRESS);
1962                 mcnt++;
1963         }
1964 #if __FreeBSD_version < 800000
1965         IF_ADDR_UNLOCK(ifp);
1966 #else
1967         if_maddr_runlock(ifp);
1968 #endif
1969
1970         fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1971         fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1972         if (ifp->if_flags & IFF_PROMISC)
1973                 fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1974         else if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES ||
1975             ifp->if_flags & IFF_ALLMULTI) {
1976                 fctrl |= IXGBE_FCTRL_MPE;
1977                 fctrl &= ~IXGBE_FCTRL_UPE;
1978         } else
1979                 fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1980         
1981         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
1982
1983         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
1984                 update_ptr = mta;
1985                 ixgbe_update_mc_addr_list(&adapter->hw,
1986                     update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
1987         }
1988
1989         return;
1990 }
1991
1992 /*
1993  * This is an iterator function now needed by the multicast
1994  * shared code. It simply feeds the shared code routine the
1995  * addresses in the array of ixgbe_set_multi() one by one.
1996  */
1997 static u8 *
1998 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
1999 {
2000         u8 *addr = *update_ptr;
2001         u8 *newptr;
2002         *vmdq = 0;
2003
2004         newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
2005         *update_ptr = newptr;
2006         return addr;
2007 }
2008
2009
2010 /*********************************************************************
2011  *  Timer routine
2012  *
2013  *  This routine checks for link status,updates statistics,
2014  *  and runs the watchdog check.
2015  *
2016  **********************************************************************/
2017
2018 static void
2019 ixgbe_local_timer(void *arg)
2020 {
2021         struct adapter  *adapter = arg;
2022         device_t        dev = adapter->dev;
2023         struct ix_queue *que = adapter->queues;
2024         struct tx_ring  *txr = adapter->tx_rings;
2025         int             hung = 0, paused = 0;
2026
2027         mtx_assert(&adapter->core_mtx, MA_OWNED);
2028
2029         /* Check for pluggable optics */
2030         if (adapter->sfp_probe)
2031                 if (!ixgbe_sfp_probe(adapter))
2032                         goto out; /* Nothing to do */
2033
2034         ixgbe_update_link_status(adapter);
2035         ixgbe_update_stats_counters(adapter);
2036
2037         /*
2038          * If the interface has been paused
2039          * then don't do the watchdog check
2040          */
2041         if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
2042                 paused = 1;
2043
2044         /*
2045         ** Check the TX queues status
2046         **      - watchdog only if all queues show hung
2047         */          
2048         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2049                 if ((txr->queue_status == IXGBE_QUEUE_HUNG) &&
2050                     (paused == 0))
2051                         ++hung;
2052                 else if (txr->queue_status == IXGBE_QUEUE_WORKING)
2053                         taskqueue_enqueue(que->tq, &txr->txq_task);
2054         }
2055         /* Only truely watchdog if all queues show hung */
2056         if (hung == adapter->num_queues)
2057                 goto watchdog;
2058
2059 out:
2060         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
2061         return;
2062
2063 watchdog:
2064         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2065         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2066             IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
2067             IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
2068         device_printf(dev,"TX(%d) desc avail = %d,"
2069             "Next TX to Clean = %d\n",
2070             txr->me, txr->tx_avail, txr->next_to_clean);
2071         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2072         adapter->watchdog_events++;
2073         ixgbe_init_locked(adapter);
2074 }
2075
2076 /*
2077 ** Note: this routine updates the OS on the link state
2078 **      the real check of the hardware only happens with
2079 **      a link interrupt.
2080 */
2081 static void
2082 ixgbe_update_link_status(struct adapter *adapter)
2083 {
2084         struct ifnet    *ifp = adapter->ifp;
2085         device_t dev = adapter->dev;
2086
2087
2088         if (adapter->link_up){ 
2089                 if (adapter->link_active == FALSE) {
2090                         if (bootverbose)
2091                                 device_printf(dev,"Link is up %d Gbps %s \n",
2092                                     ((adapter->link_speed == 128)? 10:1),
2093                                     "Full Duplex");
2094                         adapter->link_active = TRUE;
2095                         /* Update any Flow Control changes */
2096                         ixgbe_fc_enable(&adapter->hw);
2097                         if_link_state_change(ifp, LINK_STATE_UP);
2098                 }
2099         } else { /* Link down */
2100                 if (adapter->link_active == TRUE) {
2101                         if (bootverbose)
2102                                 device_printf(dev,"Link is Down\n");
2103                         if_link_state_change(ifp, LINK_STATE_DOWN);
2104                         adapter->link_active = FALSE;
2105                 }
2106         }
2107
2108         return;
2109 }
2110
2111
2112 /*********************************************************************
2113  *
2114  *  This routine disables all traffic on the adapter by issuing a
2115  *  global reset on the MAC and deallocates TX/RX buffers.
2116  *
2117  **********************************************************************/
2118
2119 static void
2120 ixgbe_stop(void *arg)
2121 {
2122         struct ifnet   *ifp;
2123         struct adapter *adapter = arg;
2124         struct ixgbe_hw *hw = &adapter->hw;
2125         ifp = adapter->ifp;
2126
2127         mtx_assert(&adapter->core_mtx, MA_OWNED);
2128
2129         INIT_DEBUGOUT("ixgbe_stop: begin\n");
2130         ixgbe_disable_intr(adapter);
2131         callout_stop(&adapter->timer);
2132
2133         /* Let the stack know...*/
2134         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2135
2136         ixgbe_reset_hw(hw);
2137         hw->adapter_stopped = FALSE;
2138         ixgbe_stop_adapter(hw);
2139         if (hw->mac.type == ixgbe_mac_82599EB)
2140                 ixgbe_stop_mac_link_on_d3_82599(hw);
2141         /* Turn off the laser - noop with no optics */
2142         ixgbe_disable_tx_laser(hw);
2143
2144         /* Update the stack */
2145         adapter->link_up = FALSE;
2146         ixgbe_update_link_status(adapter);
2147
2148         /* reprogram the RAR[0] in case user changed it. */
2149         ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
2150
2151         return;
2152 }
2153
2154
2155 /*********************************************************************
2156  *
2157  *  Determine hardware revision.
2158  *
2159  **********************************************************************/
2160 static void
2161 ixgbe_identify_hardware(struct adapter *adapter)
2162 {
2163         device_t        dev = adapter->dev;
2164         struct ixgbe_hw *hw = &adapter->hw;
2165
2166         /* Save off the information about this board */
2167         hw->vendor_id = pci_get_vendor(dev);
2168         hw->device_id = pci_get_device(dev);
2169         hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
2170         hw->subsystem_vendor_id =
2171             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2172         hw->subsystem_device_id =
2173             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2174
2175         /* We need this here to set the num_segs below */
2176         ixgbe_set_mac_type(hw);
2177
2178         /* Pick up the 82599 and VF settings */
2179         if (hw->mac.type != ixgbe_mac_82598EB) {
2180                 hw->phy.smart_speed = ixgbe_smart_speed;
2181                 adapter->num_segs = IXGBE_82599_SCATTER;
2182         } else
2183                 adapter->num_segs = IXGBE_82598_SCATTER;
2184
2185         return;
2186 }
2187
2188 /*********************************************************************
2189  *
2190  *  Determine optic type
2191  *
2192  **********************************************************************/
2193 static void
2194 ixgbe_setup_optics(struct adapter *adapter)
2195 {
2196         struct ixgbe_hw *hw = &adapter->hw;
2197         int             layer;
2198
2199         layer = ixgbe_get_supported_physical_layer(hw);
2200
2201         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
2202                 adapter->optics = IFM_10G_T;
2203                 return;
2204         }
2205
2206         if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
2207                 adapter->optics = IFM_1000_T;
2208                 return;
2209         }
2210
2211         if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
2212                 adapter->optics = IFM_1000_SX;
2213                 return;
2214         }
2215
2216         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
2217             IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
2218                 adapter->optics = IFM_10G_LR;
2219                 return;
2220         }
2221
2222         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
2223                 adapter->optics = IFM_10G_SR;
2224                 return;
2225         }
2226
2227         if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
2228                 adapter->optics = IFM_10G_TWINAX;
2229                 return;
2230         }
2231
2232         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
2233             IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
2234                 adapter->optics = IFM_10G_CX4;
2235                 return;
2236         }
2237
2238         /* If we get here just set the default */
2239         adapter->optics = IFM_ETHER | IFM_AUTO;
2240         return;
2241 }
2242
2243 /*********************************************************************
2244  *
2245  *  Setup the Legacy or MSI Interrupt handler
2246  *
2247  **********************************************************************/
2248 static int
2249 ixgbe_allocate_legacy(struct adapter *adapter)
2250 {
2251         device_t        dev = adapter->dev;
2252         struct          ix_queue *que = adapter->queues;
2253 #ifndef IXGBE_LEGACY_TX
2254         struct tx_ring          *txr = adapter->tx_rings;
2255 #endif
2256         int             error, rid = 0;
2257
2258         /* MSI RID at 1 */
2259         if (adapter->msix == 1)
2260                 rid = 1;
2261
2262         /* We allocate a single interrupt resource */
2263         adapter->res = bus_alloc_resource_any(dev,
2264             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2265         if (adapter->res == NULL) {
2266                 device_printf(dev, "Unable to allocate bus resource: "
2267                     "interrupt\n");
2268                 return (ENXIO);
2269         }
2270
2271         /*
2272          * Try allocating a fast interrupt and the associated deferred
2273          * processing contexts.
2274          */
2275 #ifndef IXGBE_LEGACY_TX
2276         TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr);
2277 #endif
2278         TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2279         que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,
2280             taskqueue_thread_enqueue, &que->tq);
2281         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s ixq",
2282             device_get_nameunit(adapter->dev));
2283
2284         /* Tasklets for Link, SFP and Multispeed Fiber */
2285         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2286         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2287         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2288 #ifdef IXGBE_FDIR
2289         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2290 #endif
2291         adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT,
2292             taskqueue_thread_enqueue, &adapter->tq);
2293         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2294             device_get_nameunit(adapter->dev));
2295
2296         if ((error = bus_setup_intr(dev, adapter->res,
2297             INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_legacy_irq,
2298             que, &adapter->tag)) != 0) {
2299                 device_printf(dev, "Failed to register fast interrupt "
2300                     "handler: %d\n", error);
2301                 taskqueue_free(que->tq);
2302                 taskqueue_free(adapter->tq);
2303                 que->tq = NULL;
2304                 adapter->tq = NULL;
2305                 return (error);
2306         }
2307         /* For simplicity in the handlers */
2308         adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
2309
2310         return (0);
2311 }
2312
2313
2314 /*********************************************************************
2315  *
2316  *  Setup MSIX Interrupt resources and handlers 
2317  *
2318  **********************************************************************/
2319 static int
2320 ixgbe_allocate_msix(struct adapter *adapter)
2321 {
2322         device_t        dev = adapter->dev;
2323         struct          ix_queue *que = adapter->queues;
2324         struct          tx_ring *txr = adapter->tx_rings;
2325         int             error, rid, vector = 0;
2326
2327         for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
2328                 rid = vector + 1;
2329                 que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2330                     RF_SHAREABLE | RF_ACTIVE);
2331                 if (que->res == NULL) {
2332                         device_printf(dev,"Unable to allocate"
2333                             " bus resource: que interrupt [%d]\n", vector);
2334                         return (ENXIO);
2335                 }
2336                 /* Set the handler function */
2337                 error = bus_setup_intr(dev, que->res,
2338                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2339                     ixgbe_msix_que, que, &que->tag);
2340                 if (error) {
2341                         que->res = NULL;
2342                         device_printf(dev, "Failed to register QUE handler");
2343                         return (error);
2344                 }
2345 #if __FreeBSD_version >= 800504
2346                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2347 #endif
2348                 que->msix = vector;
2349                 adapter->que_mask |= (u64)(1 << que->msix);
2350                 /*
2351                 ** Bind the msix vector, and thus the
2352                 ** ring to the corresponding cpu.
2353                 */
2354                 if (adapter->num_queues > 1)
2355                         bus_bind_intr(dev, que->res, i);
2356
2357 #ifndef IXGBE_LEGACY_TX
2358                 TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr);
2359 #endif
2360                 TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2361                 que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,
2362                     taskqueue_thread_enqueue, &que->tq);
2363                 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2364                     device_get_nameunit(adapter->dev));
2365         }
2366
2367         /* and Link */
2368         rid = vector + 1;
2369         adapter->res = bus_alloc_resource_any(dev,
2370             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2371         if (!adapter->res) {
2372                 device_printf(dev,"Unable to allocate"
2373             " bus resource: Link interrupt [%d]\n", rid);
2374                 return (ENXIO);
2375         }
2376         /* Set the link handler function */
2377         error = bus_setup_intr(dev, adapter->res,
2378             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2379             ixgbe_msix_link, adapter, &adapter->tag);
2380         if (error) {
2381                 adapter->res = NULL;
2382                 device_printf(dev, "Failed to register LINK handler");
2383                 return (error);
2384         }
2385 #if __FreeBSD_version >= 800504
2386         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2387 #endif
2388         adapter->linkvec = vector;
2389         /* Tasklets for Link, SFP and Multispeed Fiber */
2390         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2391         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2392         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2393 #ifdef IXGBE_FDIR
2394         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2395 #endif
2396         adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT,
2397             taskqueue_thread_enqueue, &adapter->tq);
2398         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2399             device_get_nameunit(adapter->dev));
2400
2401         return (0);
2402 }
2403
2404 /*
2405  * Setup Either MSI/X or MSI
2406  */
2407 static int
2408 ixgbe_setup_msix(struct adapter *adapter)
2409 {
2410         device_t dev = adapter->dev;
2411         int rid, want, queues, msgs;
2412
2413         /* Override by tuneable */
2414         if (ixgbe_enable_msix == 0)
2415                 goto msi;
2416
2417         /* First try MSI/X */
2418         msgs = pci_msix_count(dev); 
2419         if (msgs == 0)
2420                 goto msi;
2421         rid = PCIR_BAR(MSIX_82598_BAR);
2422         adapter->msix_mem = bus_alloc_resource_any(dev,
2423             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2424         if (adapter->msix_mem == NULL) {
2425                 rid += 4;       /* 82599 maps in higher BAR */
2426                 adapter->msix_mem = bus_alloc_resource_any(dev,
2427                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2428         }
2429         if (adapter->msix_mem == NULL) {
2430                 /* May not be enabled */
2431                 device_printf(adapter->dev,
2432                     "Unable to map MSIX table \n");
2433                 goto msi;
2434         }
2435
2436         /* Figure out a reasonable auto config value */
2437         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2438
2439         if (ixgbe_num_queues != 0)
2440                 queues = ixgbe_num_queues;
2441         /* Set max queues to 8 when autoconfiguring */
2442         else if ((ixgbe_num_queues == 0) && (queues > 8))
2443                 queues = 8;
2444
2445         /*
2446         ** Want one vector (RX/TX pair) per queue
2447         ** plus an additional for Link.
2448         */
2449         want = queues + 1;
2450         if (msgs >= want)
2451                 msgs = want;
2452         else {
2453                 device_printf(adapter->dev,
2454                     "MSIX Configuration Problem, "
2455                     "%d vectors but %d queues wanted!\n",
2456                     msgs, want);
2457                 goto msi;
2458         }
2459         if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2460                 device_printf(adapter->dev,
2461                     "Using MSIX interrupts with %d vectors\n", msgs);
2462                 adapter->num_queues = queues;
2463                 return (msgs);
2464         }
2465         /*
2466         ** If MSIX alloc failed or provided us with
2467         ** less than needed, free and fall through to MSI
2468         */
2469         pci_release_msi(dev);
2470
2471 msi:
2472         if (adapter->msix_mem != NULL) {
2473                 bus_release_resource(dev, SYS_RES_MEMORY,
2474                     rid, adapter->msix_mem);
2475                 adapter->msix_mem = NULL;
2476         }
2477         msgs = 1;
2478         if (pci_alloc_msi(dev, &msgs) == 0) {
2479                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2480                 return (msgs);
2481         }
2482         device_printf(adapter->dev,"Using a Legacy interrupt\n");
2483         return (0);
2484 }
2485
2486
2487 static int
2488 ixgbe_allocate_pci_resources(struct adapter *adapter)
2489 {
2490         int             rid;
2491         device_t        dev = adapter->dev;
2492
2493         rid = PCIR_BAR(0);
2494         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2495             &rid, RF_ACTIVE);
2496
2497         if (!(adapter->pci_mem)) {
2498                 device_printf(dev,"Unable to allocate bus resource: memory\n");
2499                 return (ENXIO);
2500         }
2501
2502         adapter->osdep.mem_bus_space_tag =
2503                 rman_get_bustag(adapter->pci_mem);
2504         adapter->osdep.mem_bus_space_handle =
2505                 rman_get_bushandle(adapter->pci_mem);
2506         adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle;
2507
2508         /* Legacy defaults */
2509         adapter->num_queues = 1;
2510         adapter->hw.back = &adapter->osdep;
2511
2512         /*
2513         ** Now setup MSI or MSI/X, should
2514         ** return us the number of supported
2515         ** vectors. (Will be 1 for MSI)
2516         */
2517         adapter->msix = ixgbe_setup_msix(adapter);
2518         return (0);
2519 }
2520
2521 static void
2522 ixgbe_free_pci_resources(struct adapter * adapter)
2523 {
2524         struct          ix_queue *que = adapter->queues;
2525         device_t        dev = adapter->dev;
2526         int             rid, memrid;
2527
2528         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
2529                 memrid = PCIR_BAR(MSIX_82598_BAR);
2530         else
2531                 memrid = PCIR_BAR(MSIX_82599_BAR);
2532
2533         /*
2534         ** There is a slight possibility of a failure mode
2535         ** in attach that will result in entering this function
2536         ** before interrupt resources have been initialized, and
2537         ** in that case we do not want to execute the loops below
2538         ** We can detect this reliably by the state of the adapter
2539         ** res pointer.
2540         */
2541         if (adapter->res == NULL)
2542                 goto mem;
2543
2544         /*
2545         **  Release all msix queue resources:
2546         */
2547         for (int i = 0; i < adapter->num_queues; i++, que++) {
2548                 rid = que->msix + 1;
2549                 if (que->tag != NULL) {
2550                         bus_teardown_intr(dev, que->res, que->tag);
2551                         que->tag = NULL;
2552                 }
2553                 if (que->res != NULL)
2554                         bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
2555         }
2556
2557
2558         /* Clean the Legacy or Link interrupt last */
2559         if (adapter->linkvec) /* we are doing MSIX */
2560                 rid = adapter->linkvec + 1;
2561         else
2562                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2563
2564         if (adapter->tag != NULL) {
2565                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2566                 adapter->tag = NULL;
2567         }
2568         if (adapter->res != NULL)
2569                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2570
2571 mem:
2572         if (adapter->msix)
2573                 pci_release_msi(dev);
2574
2575         if (adapter->msix_mem != NULL)
2576                 bus_release_resource(dev, SYS_RES_MEMORY,
2577                     memrid, adapter->msix_mem);
2578
2579         if (adapter->pci_mem != NULL)
2580                 bus_release_resource(dev, SYS_RES_MEMORY,
2581                     PCIR_BAR(0), adapter->pci_mem);
2582
2583         return;
2584 }
2585
2586 /*********************************************************************
2587  *
2588  *  Setup networking device structure and register an interface.
2589  *
2590  **********************************************************************/
2591 static int
2592 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
2593 {
2594         struct ixgbe_hw *hw = &adapter->hw;
2595         struct ifnet   *ifp;
2596
2597         INIT_DEBUGOUT("ixgbe_setup_interface: begin");
2598
2599         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2600         if (ifp == NULL) {
2601                 device_printf(dev, "can not allocate ifnet structure\n");
2602                 return (-1);
2603         }
2604         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2605 #if __FreeBSD_version < 1000025
2606         ifp->if_baudrate = 1000000000;
2607 #else
2608         if_initbaudrate(ifp, IF_Gbps(10));
2609 #endif
2610         ifp->if_init = ixgbe_init;
2611         ifp->if_softc = adapter;
2612         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2613         ifp->if_ioctl = ixgbe_ioctl;
2614 #ifndef IXGBE_LEGACY_TX
2615         ifp->if_transmit = ixgbe_mq_start;
2616         ifp->if_qflush = ixgbe_qflush;
2617 #else
2618         ifp->if_start = ixgbe_start;
2619         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
2620         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2;
2621         IFQ_SET_READY(&ifp->if_snd);
2622 #endif
2623
2624         ether_ifattach(ifp, adapter->hw.mac.addr);
2625
2626         adapter->max_frame_size =
2627             ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
2628
2629         /*
2630          * Tell the upper layer(s) we support long frames.
2631          */
2632         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2633
2634         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWCSUM;
2635         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2636         ifp->if_capabilities |= IFCAP_LRO;
2637         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2638                              |  IFCAP_VLAN_HWTSO
2639                              |  IFCAP_VLAN_MTU;
2640         ifp->if_capenable = ifp->if_capabilities;
2641
2642         /*
2643         ** Don't turn this on by default, if vlans are
2644         ** created on another pseudo device (eg. lagg)
2645         ** then vlan events are not passed thru, breaking
2646         ** operation, but with HW FILTER off it works. If
2647         ** using vlans directly on the ixgbe driver you can
2648         ** enable this and get full hardware tag filtering.
2649         */
2650         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2651
2652         /*
2653          * Specify the media types supported by this adapter and register
2654          * callbacks to update media and link information
2655          */
2656         ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
2657                      ixgbe_media_status);
2658         ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
2659         ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
2660         if (hw->device_id == IXGBE_DEV_ID_82598AT) {
2661                 ifmedia_add(&adapter->media,
2662                     IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2663                 ifmedia_add(&adapter->media,
2664                     IFM_ETHER | IFM_1000_T, 0, NULL);
2665         }
2666         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2667         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2668
2669         return (0);
2670 }
2671
2672 static void
2673 ixgbe_config_link(struct adapter *adapter)
2674 {
2675         struct ixgbe_hw *hw = &adapter->hw;
2676         u32     autoneg, err = 0;
2677         bool    sfp, negotiate;
2678
2679         sfp = ixgbe_is_sfp(hw);
2680
2681         if (sfp) { 
2682                 if (hw->phy.multispeed_fiber) {
2683                         hw->mac.ops.setup_sfp(hw);
2684                         ixgbe_enable_tx_laser(hw);
2685                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
2686                 } else
2687                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
2688         } else {
2689                 if (hw->mac.ops.check_link)
2690                         err = ixgbe_check_link(hw, &adapter->link_speed,
2691                             &adapter->link_up, FALSE);
2692                 if (err)
2693                         goto out;
2694                 autoneg = hw->phy.autoneg_advertised;
2695                 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
2696                         err  = hw->mac.ops.get_link_capabilities(hw,
2697                             &autoneg, &negotiate);
2698                 if (err)
2699                         goto out;
2700                 if (hw->mac.ops.setup_link)
2701                         err = hw->mac.ops.setup_link(hw,
2702                             autoneg, adapter->link_up);
2703         }
2704 out:
2705         return;
2706 }
2707
2708 /********************************************************************
2709  * Manage DMA'able memory.
2710  *******************************************************************/
2711 static void
2712 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2713 {
2714         if (error)
2715                 return;
2716         *(bus_addr_t *) arg = segs->ds_addr;
2717         return;
2718 }
2719
2720 static int
2721 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2722                 struct ixgbe_dma_alloc *dma, int mapflags)
2723 {
2724         device_t dev = adapter->dev;
2725         int             r;
2726
2727         r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),   /* parent */
2728                                DBA_ALIGN, 0,    /* alignment, bounds */
2729                                BUS_SPACE_MAXADDR,       /* lowaddr */
2730                                BUS_SPACE_MAXADDR,       /* highaddr */
2731                                NULL, NULL,      /* filter, filterarg */
2732                                size,    /* maxsize */
2733                                1,       /* nsegments */
2734                                size,    /* maxsegsize */
2735                                BUS_DMA_ALLOCNOW,        /* flags */
2736                                NULL,    /* lockfunc */
2737                                NULL,    /* lockfuncarg */
2738                                &dma->dma_tag);
2739         if (r != 0) {
2740                 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2741                        "error %u\n", r);
2742                 goto fail_0;
2743         }
2744         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2745                              BUS_DMA_NOWAIT, &dma->dma_map);
2746         if (r != 0) {
2747                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2748                        "error %u\n", r);
2749                 goto fail_1;
2750         }
2751         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2752                             size,
2753                             ixgbe_dmamap_cb,
2754                             &dma->dma_paddr,
2755                             mapflags | BUS_DMA_NOWAIT);
2756         if (r != 0) {
2757                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2758                        "error %u\n", r);
2759                 goto fail_2;
2760         }
2761         dma->dma_size = size;
2762         return (0);
2763 fail_2:
2764         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2765 fail_1:
2766         bus_dma_tag_destroy(dma->dma_tag);
2767 fail_0:
2768         dma->dma_map = NULL;
2769         dma->dma_tag = NULL;
2770         return (r);
2771 }
2772
2773 static void
2774 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2775 {
2776         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2777             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2778         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2779         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2780         bus_dma_tag_destroy(dma->dma_tag);
2781 }
2782
2783
2784 /*********************************************************************
2785  *
2786  *  Allocate memory for the transmit and receive rings, and then
2787  *  the descriptors associated with each, called only once at attach.
2788  *
2789  **********************************************************************/
2790 static int
2791 ixgbe_allocate_queues(struct adapter *adapter)
2792 {
2793         device_t        dev = adapter->dev;
2794         struct ix_queue *que;
2795         struct tx_ring  *txr;
2796         struct rx_ring  *rxr;
2797         int rsize, tsize, error = IXGBE_SUCCESS;
2798         int txconf = 0, rxconf = 0;
2799
2800         /* First allocate the top level queue structs */
2801         if (!(adapter->queues =
2802             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2803             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2804                 device_printf(dev, "Unable to allocate queue memory\n");
2805                 error = ENOMEM;
2806                 goto fail;
2807         }
2808
2809         /* First allocate the TX ring struct memory */
2810         if (!(adapter->tx_rings =
2811             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2812             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2813                 device_printf(dev, "Unable to allocate TX ring memory\n");
2814                 error = ENOMEM;
2815                 goto tx_fail;
2816         }
2817
2818         /* Next allocate the RX */
2819         if (!(adapter->rx_rings =
2820             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2821             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2822                 device_printf(dev, "Unable to allocate RX ring memory\n");
2823                 error = ENOMEM;
2824                 goto rx_fail;
2825         }
2826
2827         /* For the ring itself */
2828         tsize = roundup2(adapter->num_tx_desc *
2829             sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2830
2831         /*
2832          * Now set up the TX queues, txconf is needed to handle the
2833          * possibility that things fail midcourse and we need to
2834          * undo memory gracefully
2835          */ 
2836         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2837                 /* Set up some basics */
2838                 txr = &adapter->tx_rings[i];
2839                 txr->adapter = adapter;
2840                 txr->me = i;
2841                 txr->num_desc = adapter->num_tx_desc;
2842
2843                 /* Initialize the TX side lock */
2844                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2845                     device_get_nameunit(dev), txr->me);
2846                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2847
2848                 if (ixgbe_dma_malloc(adapter, tsize,
2849                         &txr->txdma, BUS_DMA_NOWAIT)) {
2850                         device_printf(dev,
2851                             "Unable to allocate TX Descriptor memory\n");
2852                         error = ENOMEM;
2853                         goto err_tx_desc;
2854                 }
2855                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2856                 bzero((void *)txr->tx_base, tsize);
2857
2858                 /* Now allocate transmit buffers for the ring */
2859                 if (ixgbe_allocate_transmit_buffers(txr)) {
2860                         device_printf(dev,
2861                             "Critical Failure setting up transmit buffers\n");
2862                         error = ENOMEM;
2863                         goto err_tx_desc;
2864                 }
2865 #ifndef IXGBE_LEGACY_TX
2866                 /* Allocate a buf ring */
2867                 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2868                     M_WAITOK, &txr->tx_mtx);
2869                 if (txr->br == NULL) {
2870                         device_printf(dev,
2871                             "Critical Failure setting up buf ring\n");
2872                         error = ENOMEM;
2873                         goto err_tx_desc;
2874                 }
2875 #endif
2876         }
2877
2878         /*
2879          * Next the RX queues...
2880          */ 
2881         rsize = roundup2(adapter->num_rx_desc *
2882             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2883         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2884                 rxr = &adapter->rx_rings[i];
2885                 /* Set up some basics */
2886                 rxr->adapter = adapter;
2887                 rxr->me = i;
2888                 rxr->num_desc = adapter->num_rx_desc;
2889
2890                 /* Initialize the RX side lock */
2891                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2892                     device_get_nameunit(dev), rxr->me);
2893                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2894
2895                 if (ixgbe_dma_malloc(adapter, rsize,
2896                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2897                         device_printf(dev,
2898                             "Unable to allocate RxDescriptor memory\n");
2899                         error = ENOMEM;
2900                         goto err_rx_desc;
2901                 }
2902                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2903                 bzero((void *)rxr->rx_base, rsize);
2904
2905                 /* Allocate receive buffers for the ring*/
2906                 if (ixgbe_allocate_receive_buffers(rxr)) {
2907                         device_printf(dev,
2908                             "Critical Failure setting up receive buffers\n");
2909                         error = ENOMEM;
2910                         goto err_rx_desc;
2911                 }
2912         }
2913
2914         /*
2915         ** Finally set up the queue holding structs
2916         */
2917         for (int i = 0; i < adapter->num_queues; i++) {
2918                 que = &adapter->queues[i];
2919                 que->adapter = adapter;
2920                 que->txr = &adapter->tx_rings[i];
2921                 que->rxr = &adapter->rx_rings[i];
2922         }
2923
2924         return (0);
2925
2926 err_rx_desc:
2927         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2928                 ixgbe_dma_free(adapter, &rxr->rxdma);
2929 err_tx_desc:
2930         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2931                 ixgbe_dma_free(adapter, &txr->txdma);
2932         free(adapter->rx_rings, M_DEVBUF);
2933 rx_fail:
2934         free(adapter->tx_rings, M_DEVBUF);
2935 tx_fail:
2936         free(adapter->queues, M_DEVBUF);
2937 fail:
2938         return (error);
2939 }
2940
2941 /*********************************************************************
2942  *
2943  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2944  *  the information needed to transmit a packet on the wire. This is
2945  *  called only once at attach, setup is done every reset.
2946  *
2947  **********************************************************************/
2948 static int
2949 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
2950 {
2951         struct adapter *adapter = txr->adapter;
2952         device_t dev = adapter->dev;
2953         struct ixgbe_tx_buf *txbuf;
2954         int error, i;
2955
2956         /*
2957          * Setup DMA descriptor areas.
2958          */
2959         if ((error = bus_dma_tag_create(
2960                                bus_get_dma_tag(adapter->dev),   /* parent */
2961                                1, 0,            /* alignment, bounds */
2962                                BUS_SPACE_MAXADDR,       /* lowaddr */
2963                                BUS_SPACE_MAXADDR,       /* highaddr */
2964                                NULL, NULL,              /* filter, filterarg */
2965                                IXGBE_TSO_SIZE,          /* maxsize */
2966                                adapter->num_segs,       /* nsegments */
2967                                PAGE_SIZE,               /* maxsegsize */
2968                                0,                       /* flags */
2969                                NULL,                    /* lockfunc */
2970                                NULL,                    /* lockfuncarg */
2971                                &txr->txtag))) {
2972                 device_printf(dev,"Unable to allocate TX DMA tag\n");
2973                 goto fail;
2974         }
2975
2976         if (!(txr->tx_buffers =
2977             (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
2978             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2979                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
2980                 error = ENOMEM;
2981                 goto fail;
2982         }
2983
2984         /* Create the descriptor buffer dma maps */
2985         txbuf = txr->tx_buffers;
2986         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2987                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2988                 if (error != 0) {
2989                         device_printf(dev, "Unable to create TX DMA map\n");
2990                         goto fail;
2991                 }
2992         }
2993
2994         return 0;
2995 fail:
2996         /* We free all, it handles case where we are in the middle */
2997         ixgbe_free_transmit_structures(adapter);
2998         return (error);
2999 }
3000
3001 /*********************************************************************
3002  *
3003  *  Initialize a transmit ring.
3004  *
3005  **********************************************************************/
3006 static void
3007 ixgbe_setup_transmit_ring(struct tx_ring *txr)
3008 {
3009         struct adapter *adapter = txr->adapter;
3010         struct ixgbe_tx_buf *txbuf;
3011         int i;
3012 #ifdef DEV_NETMAP
3013         struct netmap_adapter *na = NA(adapter->ifp);
3014         struct netmap_slot *slot;
3015 #endif /* DEV_NETMAP */
3016
3017         /* Clear the old ring contents */
3018         IXGBE_TX_LOCK(txr);
3019 #ifdef DEV_NETMAP
3020         /*
3021          * (under lock): if in netmap mode, do some consistency
3022          * checks and set slot to entry 0 of the netmap ring.
3023          */
3024         slot = netmap_reset(na, NR_TX, txr->me, 0);
3025 #endif /* DEV_NETMAP */
3026         bzero((void *)txr->tx_base,
3027               (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
3028         /* Reset indices */
3029         txr->next_avail_desc = 0;
3030         txr->next_to_clean = 0;
3031
3032         /* Free any existing tx buffers. */
3033         txbuf = txr->tx_buffers;
3034         for (i = 0; i < txr->num_desc; i++, txbuf++) {
3035                 if (txbuf->m_head != NULL) {
3036                         bus_dmamap_sync(txr->txtag, txbuf->map,
3037                             BUS_DMASYNC_POSTWRITE);
3038                         bus_dmamap_unload(txr->txtag, txbuf->map);
3039                         m_freem(txbuf->m_head);
3040                         txbuf->m_head = NULL;
3041                 }
3042 #ifdef DEV_NETMAP
3043                 /*
3044                  * In netmap mode, set the map for the packet buffer.
3045                  * NOTE: Some drivers (not this one) also need to set
3046                  * the physical buffer address in the NIC ring.
3047                  * Slots in the netmap ring (indexed by "si") are
3048                  * kring->nkr_hwofs positions "ahead" wrt the
3049                  * corresponding slot in the NIC ring. In some drivers
3050                  * (not here) nkr_hwofs can be negative. Function
3051                  * netmap_idx_n2k() handles wraparounds properly.
3052                  */
3053                 if (slot) {
3054                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3055                         netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3056                 }
3057 #endif /* DEV_NETMAP */
3058                 /* Clear the EOP descriptor pointer */
3059                 txbuf->eop = NULL;
3060         }
3061
3062 #ifdef IXGBE_FDIR
3063         /* Set the rate at which we sample packets */
3064         if (adapter->hw.mac.type != ixgbe_mac_82598EB)
3065                 txr->atr_sample = atr_sample_rate;
3066 #endif
3067
3068         /* Set number of descriptors available */
3069         txr->tx_avail = adapter->num_tx_desc;
3070
3071         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3072             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3073         IXGBE_TX_UNLOCK(txr);
3074 }
3075
3076 /*********************************************************************
3077  *
3078  *  Initialize all transmit rings.
3079  *
3080  **********************************************************************/
3081 static int
3082 ixgbe_setup_transmit_structures(struct adapter *adapter)
3083 {
3084         struct tx_ring *txr = adapter->tx_rings;
3085
3086         for (int i = 0; i < adapter->num_queues; i++, txr++)
3087                 ixgbe_setup_transmit_ring(txr);
3088
3089         return (0);
3090 }
3091
3092 /*********************************************************************
3093  *
3094  *  Enable transmit unit.
3095  *
3096  **********************************************************************/
3097 static void
3098 ixgbe_initialize_transmit_units(struct adapter *adapter)
3099 {
3100         struct tx_ring  *txr = adapter->tx_rings;
3101         struct ixgbe_hw *hw = &adapter->hw;
3102
3103         /* Setup the Base and Length of the Tx Descriptor Ring */
3104
3105         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3106                 u64     tdba = txr->txdma.dma_paddr;
3107                 u32     txctrl;
3108
3109                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
3110                        (tdba & 0x00000000ffffffffULL));
3111                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
3112                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
3113                     adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
3114
3115                 /* Setup the HW Tx Head and Tail descriptor pointers */
3116                 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
3117                 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
3118
3119                 /* Setup Transmit Descriptor Cmd Settings */
3120                 txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
3121                 txr->queue_status = IXGBE_QUEUE_IDLE;
3122
3123                 /* Set the processing limit */
3124                 txr->process_limit = ixgbe_tx_process_limit;
3125
3126                 /* Disable Head Writeback */
3127                 switch (hw->mac.type) {
3128                 case ixgbe_mac_82598EB:
3129                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
3130                         break;
3131                 case ixgbe_mac_82599EB:
3132                 case ixgbe_mac_X540:
3133                 default:
3134                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
3135                         break;
3136                 }
3137                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3138                 switch (hw->mac.type) {
3139                 case ixgbe_mac_82598EB:
3140                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
3141                         break;
3142                 case ixgbe_mac_82599EB:
3143                 case ixgbe_mac_X540:
3144                 default:
3145                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
3146                         break;
3147                 }
3148
3149         }
3150
3151         if (hw->mac.type != ixgbe_mac_82598EB) {
3152                 u32 dmatxctl, rttdcs;
3153                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3154                 dmatxctl |= IXGBE_DMATXCTL_TE;
3155                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3156                 /* Disable arbiter to set MTQC */
3157                 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3158                 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3159                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3160                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
3161                 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3162                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3163         }
3164
3165         return;
3166 }
3167
3168 /*********************************************************************
3169  *
3170  *  Free all transmit rings.
3171  *
3172  **********************************************************************/
3173 static void
3174 ixgbe_free_transmit_structures(struct adapter *adapter)
3175 {
3176         struct tx_ring *txr = adapter->tx_rings;
3177
3178         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3179                 IXGBE_TX_LOCK(txr);
3180                 ixgbe_free_transmit_buffers(txr);
3181                 ixgbe_dma_free(adapter, &txr->txdma);
3182                 IXGBE_TX_UNLOCK(txr);
3183                 IXGBE_TX_LOCK_DESTROY(txr);
3184         }
3185         free(adapter->tx_rings, M_DEVBUF);
3186 }
3187
3188 /*********************************************************************
3189  *
3190  *  Free transmit ring related data structures.
3191  *
3192  **********************************************************************/
3193 static void
3194 ixgbe_free_transmit_buffers(struct tx_ring *txr)
3195 {
3196         struct adapter *adapter = txr->adapter;
3197         struct ixgbe_tx_buf *tx_buffer;
3198         int             i;
3199
3200         INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
3201
3202         if (txr->tx_buffers == NULL)
3203                 return;
3204
3205         tx_buffer = txr->tx_buffers;
3206         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3207                 if (tx_buffer->m_head != NULL) {
3208                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3209                             BUS_DMASYNC_POSTWRITE);
3210                         bus_dmamap_unload(txr->txtag,
3211                             tx_buffer->map);
3212                         m_freem(tx_buffer->m_head);
3213                         tx_buffer->m_head = NULL;
3214                         if (tx_buffer->map != NULL) {
3215                                 bus_dmamap_destroy(txr->txtag,
3216                                     tx_buffer->map);
3217                                 tx_buffer->map = NULL;
3218                         }
3219                 } else if (tx_buffer->map != NULL) {
3220                         bus_dmamap_unload(txr->txtag,
3221                             tx_buffer->map);
3222                         bus_dmamap_destroy(txr->txtag,
3223                             tx_buffer->map);
3224                         tx_buffer->map = NULL;
3225                 }
3226         }
3227 #ifdef IXGBE_LEGACY_TX
3228         if (txr->br != NULL)
3229                 buf_ring_free(txr->br, M_DEVBUF);
3230 #endif
3231         if (txr->tx_buffers != NULL) {
3232                 free(txr->tx_buffers, M_DEVBUF);
3233                 txr->tx_buffers = NULL;
3234         }
3235         if (txr->txtag != NULL) {
3236                 bus_dma_tag_destroy(txr->txtag);
3237                 txr->txtag = NULL;
3238         }
3239         return;
3240 }
3241
3242 /*********************************************************************
3243  *
3244  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3245  *
3246  **********************************************************************/
3247
3248 static int
3249 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3250     u32 *cmd_type_len, u32 *olinfo_status)
3251 {
3252         struct ixgbe_adv_tx_context_desc *TXD;
3253         struct ether_vlan_header *eh;
3254         struct ip *ip;
3255         struct ip6_hdr *ip6;
3256         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3257         int     ehdrlen, ip_hlen = 0;
3258         u16     etype;
3259         u8      ipproto = 0;
3260         int     offload = TRUE;
3261         int     ctxd = txr->next_avail_desc;
3262         u16     vtag = 0;
3263
3264         /* First check if TSO is to be used */
3265         if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3266                 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3267
3268         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3269                 offload = FALSE;
3270
3271         /* Indicate the whole packet as payload when not doing TSO */
3272         *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
3273
3274         /* Now ready a context descriptor */
3275         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3276
3277         /*
3278         ** In advanced descriptors the vlan tag must 
3279         ** be placed into the context descriptor. Hence
3280         ** we need to make one even if not doing offloads.
3281         */
3282         if (mp->m_flags & M_VLANTAG) {
3283                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3284                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3285         } else if (offload == FALSE) /* ... no offload to do */
3286                 return (0);
3287
3288         /*
3289          * Determine where frame payload starts.
3290          * Jump over vlan headers if already present,
3291          * helpful for QinQ too.
3292          */
3293         eh = mtod(mp, struct ether_vlan_header *);
3294         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3295                 etype = ntohs(eh->evl_proto);
3296                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3297         } else {
3298                 etype = ntohs(eh->evl_encap_proto);
3299                 ehdrlen = ETHER_HDR_LEN;
3300         }
3301
3302         /* Set the ether header length */
3303         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3304
3305         switch (etype) {
3306                 case ETHERTYPE_IP:
3307                         ip = (struct ip *)(mp->m_data + ehdrlen);
3308                         ip_hlen = ip->ip_hl << 2;
3309                         ipproto = ip->ip_p;
3310                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3311                         break;
3312                 case ETHERTYPE_IPV6:
3313                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3314                         ip_hlen = sizeof(struct ip6_hdr);
3315                         /* XXX-BZ this will go badly in case of ext hdrs. */
3316                         ipproto = ip6->ip6_nxt;
3317                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3318                         break;
3319                 default:
3320                         offload = FALSE;
3321                         break;
3322         }
3323
3324         vlan_macip_lens |= ip_hlen;
3325         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3326
3327         switch (ipproto) {
3328                 case IPPROTO_TCP:
3329                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3330                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3331                         break;
3332
3333                 case IPPROTO_UDP:
3334                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3335                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
3336                         break;
3337
3338 #if __FreeBSD_version >= 800000
3339                 case IPPROTO_SCTP:
3340                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3341                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
3342                         break;
3343 #endif
3344                 default:
3345                         offload = FALSE;
3346                         break;
3347         }
3348
3349         if (offload) /* For the TX descriptor setup */
3350                 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3351
3352         /* Now copy bits into descriptor */
3353         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3354         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3355         TXD->seqnum_seed = htole32(0);
3356         TXD->mss_l4len_idx = htole32(0);
3357
3358         /* We've consumed the first desc, adjust counters */
3359         if (++ctxd == txr->num_desc)
3360                 ctxd = 0;
3361         txr->next_avail_desc = ctxd;
3362         --txr->tx_avail;
3363
3364         return (0);
3365 }
3366
3367 /**********************************************************************
3368  *
3369  *  Setup work for hardware segmentation offload (TSO) on
3370  *  adapters using advanced tx descriptors
3371  *
3372  **********************************************************************/
3373 static int
3374 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3375     u32 *cmd_type_len, u32 *olinfo_status)
3376 {
3377         struct ixgbe_adv_tx_context_desc *TXD;
3378         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3379         u32 mss_l4len_idx = 0, paylen;
3380         u16 vtag = 0, eh_type;
3381         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3382         struct ether_vlan_header *eh;
3383 #ifdef INET6
3384         struct ip6_hdr *ip6;
3385 #endif
3386 #ifdef INET
3387         struct ip *ip;
3388 #endif
3389         struct tcphdr *th;
3390
3391
3392         /*
3393          * Determine where frame payload starts.
3394          * Jump over vlan headers if already present
3395          */
3396         eh = mtod(mp, struct ether_vlan_header *);
3397         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3398                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3399                 eh_type = eh->evl_proto;
3400         } else {
3401                 ehdrlen = ETHER_HDR_LEN;
3402                 eh_type = eh->evl_encap_proto;
3403         }
3404
3405         switch (ntohs(eh_type)) {
3406 #ifdef INET6
3407         case ETHERTYPE_IPV6:
3408                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3409                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3410                 if (ip6->ip6_nxt != IPPROTO_TCP)
3411                         return (ENXIO);
3412                 ip_hlen = sizeof(struct ip6_hdr);
3413                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3414                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3415                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3416                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3417                 break;
3418 #endif
3419 #ifdef INET
3420         case ETHERTYPE_IP:
3421                 ip = (struct ip *)(mp->m_data + ehdrlen);
3422                 if (ip->ip_p != IPPROTO_TCP)
3423                         return (ENXIO);
3424                 ip->ip_sum = 0;
3425                 ip_hlen = ip->ip_hl << 2;
3426                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3427                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3428                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3429                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3430                 /* Tell transmit desc to also do IPv4 checksum. */
3431                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3432                 break;
3433 #endif
3434         default:
3435                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3436                     __func__, ntohs(eh_type));
3437                 break;
3438         }
3439
3440         ctxd = txr->next_avail_desc;
3441         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3442
3443         tcp_hlen = th->th_off << 2;
3444
3445         /* This is used in the transmit desc in encap */
3446         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3447
3448         /* VLAN MACLEN IPLEN */
3449         if (mp->m_flags & M_VLANTAG) {
3450                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3451                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3452         }
3453
3454         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3455         vlan_macip_lens |= ip_hlen;
3456         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3457
3458         /* ADV DTYPE TUCMD */
3459         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3460         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3461         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3462
3463         /* MSS L4LEN IDX */
3464         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
3465         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
3466         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3467
3468         TXD->seqnum_seed = htole32(0);
3469
3470         if (++ctxd == txr->num_desc)
3471                 ctxd = 0;
3472
3473         txr->tx_avail--;
3474         txr->next_avail_desc = ctxd;
3475         *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
3476         *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3477         *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
3478         ++txr->tso_tx;
3479         return (0);
3480 }
3481
3482 #ifdef IXGBE_FDIR
3483 /*
3484 ** This routine parses packet headers so that Flow
3485 ** Director can make a hashed filter table entry 
3486 ** allowing traffic flows to be identified and kept
3487 ** on the same cpu.  This would be a performance
3488 ** hit, but we only do it at IXGBE_FDIR_RATE of
3489 ** packets.
3490 */
3491 static void
3492 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
3493 {
3494         struct adapter                  *adapter = txr->adapter;
3495         struct ix_queue                 *que;
3496         struct ip                       *ip;
3497         struct tcphdr                   *th;
3498         struct udphdr                   *uh;
3499         struct ether_vlan_header        *eh;
3500         union ixgbe_atr_hash_dword      input = {.dword = 0}; 
3501         union ixgbe_atr_hash_dword      common = {.dword = 0}; 
3502         int                             ehdrlen, ip_hlen;
3503         u16                             etype;
3504
3505         eh = mtod(mp, struct ether_vlan_header *);
3506         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3507                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3508                 etype = eh->evl_proto;
3509         } else {
3510                 ehdrlen = ETHER_HDR_LEN;
3511                 etype = eh->evl_encap_proto;
3512         }
3513
3514         /* Only handling IPv4 */
3515         if (etype != htons(ETHERTYPE_IP))
3516                 return;
3517
3518         ip = (struct ip *)(mp->m_data + ehdrlen);
3519         ip_hlen = ip->ip_hl << 2;
3520
3521         /* check if we're UDP or TCP */
3522         switch (ip->ip_p) {
3523         case IPPROTO_TCP:
3524                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3525                 /* src and dst are inverted */
3526                 common.port.dst ^= th->th_sport;
3527                 common.port.src ^= th->th_dport;
3528                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
3529                 break;
3530         case IPPROTO_UDP:
3531                 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
3532                 /* src and dst are inverted */
3533                 common.port.dst ^= uh->uh_sport;
3534                 common.port.src ^= uh->uh_dport;
3535                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
3536                 break;
3537         default:
3538                 return;
3539         }
3540
3541         input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
3542         if (mp->m_pkthdr.ether_vtag)
3543                 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
3544         else
3545                 common.flex_bytes ^= etype;
3546         common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
3547
3548         que = &adapter->queues[txr->me];
3549         /*
3550         ** This assumes the Rx queue and Tx
3551         ** queue are bound to the same CPU
3552         */
3553         ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
3554             input, common, que->msix);
3555 }
3556 #endif /* IXGBE_FDIR */
3557
3558 /**********************************************************************
3559  *
3560  *  Examine each tx_buffer in the used queue. If the hardware is done
3561  *  processing the packet then free associated resources. The
3562  *  tx_buffer is put back on the free queue.
3563  *
3564  **********************************************************************/
3565 static void
3566 ixgbe_txeof(struct tx_ring *txr)
3567 {
3568         struct adapter          *adapter = txr->adapter;
3569         struct ifnet            *ifp = adapter->ifp;
3570         u32                     work, processed = 0;
3571         u16                     limit = txr->process_limit;
3572         struct ixgbe_tx_buf     *buf;
3573         union ixgbe_adv_tx_desc *txd;
3574
3575         mtx_assert(&txr->tx_mtx, MA_OWNED);
3576
3577 #ifdef DEV_NETMAP
3578         if (ifp->if_capenable & IFCAP_NETMAP) {
3579                 struct netmap_adapter *na = NA(ifp);
3580                 struct netmap_kring *kring = &na->tx_rings[txr->me];
3581                 txd = txr->tx_base;
3582                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3583                     BUS_DMASYNC_POSTREAD);
3584                 /*
3585                  * In netmap mode, all the work is done in the context
3586                  * of the client thread. Interrupt handlers only wake up
3587                  * clients, which may be sleeping on individual rings
3588                  * or on a global resource for all rings.
3589                  * To implement tx interrupt mitigation, we wake up the client
3590                  * thread roughly every half ring, even if the NIC interrupts
3591                  * more frequently. This is implemented as follows:
3592                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
3593                  *   the slot that should wake up the thread (nkr_num_slots
3594                  *   means the user thread should not be woken up);
3595                  * - the driver ignores tx interrupts unless netmap_mitigate=0
3596                  *   or the slot has the DD bit set.
3597                  *
3598                  * When the driver has separate locks, we need to
3599                  * release and re-acquire txlock to avoid deadlocks.
3600                  * XXX see if we can find a better way.
3601                  */
3602                 if (!netmap_mitigate ||
3603                     (kring->nr_kflags < kring->nkr_num_slots &&
3604                     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
3605                         netmap_tx_irq(ifp, txr->me |
3606                             (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT));
3607                 }
3608                 return;
3609         }
3610 #endif /* DEV_NETMAP */
3611
3612         if (txr->tx_avail == txr->num_desc) {
3613                 txr->queue_status = IXGBE_QUEUE_IDLE;
3614                 return;
3615         }
3616
3617         /* Get work starting point */
3618         work = txr->next_to_clean;
3619         buf = &txr->tx_buffers[work];
3620         txd = &txr->tx_base[work];
3621         work -= txr->num_desc; /* The distance to ring end */
3622         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3623             BUS_DMASYNC_POSTREAD);
3624
3625         do {
3626                 union ixgbe_adv_tx_desc *eop= buf->eop;
3627                 if (eop == NULL) /* No work */
3628                         break;
3629
3630                 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
3631                         break;  /* I/O not complete */
3632
3633                 if (buf->m_head) {
3634                         txr->bytes +=
3635                             buf->m_head->m_pkthdr.len;
3636                         bus_dmamap_sync(txr->txtag,
3637                             buf->map,
3638                             BUS_DMASYNC_POSTWRITE);
3639                         bus_dmamap_unload(txr->txtag,
3640                             buf->map);
3641                         m_freem(buf->m_head);
3642                         buf->m_head = NULL;
3643                         buf->map = NULL;
3644                 }
3645                 buf->eop = NULL;
3646                 ++txr->tx_avail;
3647
3648                 /* We clean the range if multi segment */
3649                 while (txd != eop) {
3650                         ++txd;
3651                         ++buf;
3652                         ++work;
3653                         /* wrap the ring? */
3654                         if (__predict_false(!work)) {
3655                                 work -= txr->num_desc;
3656                                 buf = txr->tx_buffers;
3657                                 txd = txr->tx_base;
3658                         }
3659                         if (buf->m_head) {
3660                                 txr->bytes +=
3661                                     buf->m_head->m_pkthdr.len;
3662                                 bus_dmamap_sync(txr->txtag,
3663                                     buf->map,
3664                                     BUS_DMASYNC_POSTWRITE);
3665                                 bus_dmamap_unload(txr->txtag,
3666                                     buf->map);
3667                                 m_freem(buf->m_head);
3668                                 buf->m_head = NULL;
3669                                 buf->map = NULL;
3670                         }
3671                         ++txr->tx_avail;
3672                         buf->eop = NULL;
3673
3674                 }
3675                 ++txr->packets;
3676                 ++processed;
3677                 ++ifp->if_opackets;
3678                 txr->watchdog_time = ticks;
3679
3680                 /* Try the next packet */
3681                 ++txd;
3682                 ++buf;
3683                 ++work;
3684                 /* reset with a wrap */
3685                 if (__predict_false(!work)) {
3686                         work -= txr->num_desc;
3687                         buf = txr->tx_buffers;
3688                         txd = txr->tx_base;
3689                 }
3690                 prefetch(txd);
3691         } while (__predict_true(--limit));
3692
3693         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3694             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3695
3696         work += txr->num_desc;
3697         txr->next_to_clean = work;
3698
3699         /*
3700         ** Watchdog calculation, we know there's
3701         ** work outstanding or the first return
3702         ** would have been taken, so none processed
3703         ** for too long indicates a hang.
3704         */
3705         if ((!processed) && ((ticks - txr->watchdog_time) > IXGBE_WATCHDOG))
3706                 txr->queue_status = IXGBE_QUEUE_HUNG;
3707
3708         if (txr->tx_avail == txr->num_desc)
3709                 txr->queue_status = IXGBE_QUEUE_IDLE;
3710
3711         return;
3712 }
3713
3714 /*********************************************************************
3715  *
3716  *  Refresh mbuf buffers for RX descriptor rings
3717  *   - now keeps its own state so discards due to resource
3718  *     exhaustion are unnecessary, if an mbuf cannot be obtained
3719  *     it just returns, keeping its placeholder, thus it can simply
3720  *     be recalled to try again.
3721  *
3722  **********************************************************************/
3723 static void
3724 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
3725 {
3726         struct adapter          *adapter = rxr->adapter;
3727         bus_dma_segment_t       seg[1];
3728         struct ixgbe_rx_buf     *rxbuf;
3729         struct mbuf             *mp;
3730         int                     i, j, nsegs, error;
3731         bool                    refreshed = FALSE;
3732
3733         i = j = rxr->next_to_refresh;
3734         /* Control the loop with one beyond */
3735         if (++j == rxr->num_desc)
3736                 j = 0;
3737
3738         while (j != limit) {
3739                 rxbuf = &rxr->rx_buffers[i];
3740                 if (rxbuf->buf == NULL) {
3741                         mp = m_getjcl(M_NOWAIT, MT_DATA,
3742                             M_PKTHDR, rxr->mbuf_sz);
3743                         if (mp == NULL)
3744                                 goto update;
3745                         if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3746                                 m_adj(mp, ETHER_ALIGN);
3747                 } else
3748                         mp = rxbuf->buf;
3749
3750                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
3751
3752                 /* If we're dealing with an mbuf that was copied rather
3753                  * than replaced, there's no need to go through busdma.
3754                  */
3755                 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
3756                         /* Get the memory mapping */
3757                         error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3758                             rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
3759                         if (error != 0) {
3760                                 printf("Refresh mbufs: payload dmamap load"
3761                                     " failure - %d\n", error);
3762                                 m_free(mp);
3763                                 rxbuf->buf = NULL;
3764                                 goto update;
3765                         }
3766                         rxbuf->buf = mp;
3767                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3768                             BUS_DMASYNC_PREREAD);
3769                         rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
3770                             htole64(seg[0].ds_addr);
3771                 } else {
3772                         rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
3773                         rxbuf->flags &= ~IXGBE_RX_COPY;
3774                 }
3775
3776                 refreshed = TRUE;
3777                 /* Next is precalculated */
3778                 i = j;
3779                 rxr->next_to_refresh = i;
3780                 if (++j == rxr->num_desc)
3781                         j = 0;
3782         }
3783 update:
3784         if (refreshed) /* Update hardware tail index */
3785                 IXGBE_WRITE_REG(&adapter->hw,
3786                     IXGBE_RDT(rxr->me), rxr->next_to_refresh);
3787         return;
3788 }
3789
3790 /*********************************************************************
3791  *
3792  *  Allocate memory for rx_buffer structures. Since we use one
3793  *  rx_buffer per received packet, the maximum number of rx_buffer's
3794  *  that we'll need is equal to the number of receive descriptors
3795  *  that we've allocated.
3796  *
3797  **********************************************************************/
3798 static int
3799 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
3800 {
3801         struct  adapter         *adapter = rxr->adapter;
3802         device_t                dev = adapter->dev;
3803         struct ixgbe_rx_buf     *rxbuf;
3804         int                     i, bsize, error;
3805
3806         bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
3807         if (!(rxr->rx_buffers =
3808             (struct ixgbe_rx_buf *) malloc(bsize,
3809             M_DEVBUF, M_NOWAIT | M_ZERO))) {
3810                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3811                 error = ENOMEM;
3812                 goto fail;
3813         }
3814
3815         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),   /* parent */
3816                                    1, 0,        /* alignment, bounds */
3817                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3818                                    BUS_SPACE_MAXADDR,   /* highaddr */
3819                                    NULL, NULL,          /* filter, filterarg */
3820                                    MJUM16BYTES,         /* maxsize */
3821                                    1,                   /* nsegments */
3822                                    MJUM16BYTES,         /* maxsegsize */
3823                                    0,                   /* flags */
3824                                    NULL,                /* lockfunc */
3825                                    NULL,                /* lockfuncarg */
3826                                    &rxr->ptag))) {
3827                 device_printf(dev, "Unable to create RX DMA tag\n");
3828                 goto fail;
3829         }
3830
3831         for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
3832                 rxbuf = &rxr->rx_buffers[i];
3833                 error = bus_dmamap_create(rxr->ptag,
3834                     BUS_DMA_NOWAIT, &rxbuf->pmap);
3835                 if (error) {
3836                         device_printf(dev, "Unable to create RX dma map\n");
3837                         goto fail;
3838                 }
3839         }
3840
3841         return (0);
3842
3843 fail:
3844         /* Frees all, but can handle partial completion */
3845         ixgbe_free_receive_structures(adapter);
3846         return (error);
3847 }
3848
3849 /*
3850 ** Used to detect a descriptor that has
3851 ** been merged by Hardware RSC.
3852 */
3853 static inline u32
3854 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
3855 {
3856         return (le32toh(rx->wb.lower.lo_dword.data) &
3857             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
3858 }
3859
3860 /*********************************************************************
3861  *
3862  *  Initialize Hardware RSC (LRO) feature on 82599
3863  *  for an RX ring, this is toggled by the LRO capability
3864  *  even though it is transparent to the stack.
3865  *
3866  *  NOTE: since this HW feature only works with IPV4 and 
3867  *        our testing has shown soft LRO to be as effective
3868  *        I have decided to disable this by default.
3869  *
3870  **********************************************************************/
3871 static void
3872 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
3873 {
3874         struct  adapter         *adapter = rxr->adapter;
3875         struct  ixgbe_hw        *hw = &adapter->hw;
3876         u32                     rscctrl, rdrxctl;
3877
3878         /* If turning LRO/RSC off we need to disable it */
3879         if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
3880                 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
3881                 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
3882                 return;
3883         }
3884
3885         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
3886         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
3887 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
3888         if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
3889 #endif /* DEV_NETMAP */
3890         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
3891         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
3892         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
3893
3894         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
3895         rscctrl |= IXGBE_RSCCTL_RSCEN;
3896         /*
3897         ** Limit the total number of descriptors that
3898         ** can be combined, so it does not exceed 64K
3899         */
3900         if (rxr->mbuf_sz == MCLBYTES)
3901                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
3902         else if (rxr->mbuf_sz == MJUMPAGESIZE)
3903                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
3904         else if (rxr->mbuf_sz == MJUM9BYTES)
3905                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
3906         else  /* Using 16K cluster */
3907                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
3908
3909         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
3910
3911         /* Enable TCP header recognition */
3912         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
3913             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
3914             IXGBE_PSRTYPE_TCPHDR));
3915
3916         /* Disable RSC for ACK packets */
3917         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
3918             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
3919
3920         rxr->hw_rsc = TRUE;
3921 }
3922
3923
3924 static void     
3925 ixgbe_free_receive_ring(struct rx_ring *rxr)
3926
3927         struct ixgbe_rx_buf       *rxbuf;
3928         int i;
3929
3930         for (i = 0; i < rxr->num_desc; i++) {
3931                 rxbuf = &rxr->rx_buffers[i];
3932                 if (rxbuf->buf != NULL) {
3933                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3934                             BUS_DMASYNC_POSTREAD);
3935                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3936                         rxbuf->buf->m_flags |= M_PKTHDR;
3937                         m_freem(rxbuf->buf);
3938                         rxbuf->buf = NULL;
3939                         rxbuf->flags = 0;
3940                 }
3941         }
3942 }
3943
3944
3945 /*********************************************************************
3946  *
3947  *  Initialize a receive ring and its buffers.
3948  *
3949  **********************************************************************/
3950 static int
3951 ixgbe_setup_receive_ring(struct rx_ring *rxr)
3952 {
3953         struct  adapter         *adapter;
3954         struct ifnet            *ifp;
3955         device_t                dev;
3956         struct ixgbe_rx_buf     *rxbuf;
3957         bus_dma_segment_t       seg[1];
3958         struct lro_ctrl         *lro = &rxr->lro;
3959         int                     rsize, nsegs, error = 0;
3960 #ifdef DEV_NETMAP
3961         struct netmap_adapter *na = NA(rxr->adapter->ifp);
3962         struct netmap_slot *slot;
3963 #endif /* DEV_NETMAP */
3964
3965         adapter = rxr->adapter;
3966         ifp = adapter->ifp;
3967         dev = adapter->dev;
3968
3969         /* Clear the ring contents */
3970         IXGBE_RX_LOCK(rxr);
3971 #ifdef DEV_NETMAP
3972         /* same as in ixgbe_setup_transmit_ring() */
3973         slot = netmap_reset(na, NR_RX, rxr->me, 0);
3974 #endif /* DEV_NETMAP */
3975         rsize = roundup2(adapter->num_rx_desc *
3976             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
3977         bzero((void *)rxr->rx_base, rsize);
3978         /* Cache the size */
3979         rxr->mbuf_sz = adapter->rx_mbuf_sz;
3980
3981         /* Free current RX buffer structs and their mbufs */
3982         ixgbe_free_receive_ring(rxr);
3983
3984         /* Now replenish the mbufs */
3985         for (int j = 0; j != rxr->num_desc; ++j) {
3986                 struct mbuf     *mp;
3987
3988                 rxbuf = &rxr->rx_buffers[j];
3989 #ifdef DEV_NETMAP
3990                 /*
3991                  * In netmap mode, fill the map and set the buffer
3992                  * address in the NIC ring, considering the offset
3993                  * between the netmap and NIC rings (see comment in
3994                  * ixgbe_setup_transmit_ring() ). No need to allocate
3995                  * an mbuf, so end the block with a continue;
3996                  */
3997                 if (slot) {
3998                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
3999                         uint64_t paddr;
4000                         void *addr;
4001
4002                         addr = PNMB(slot + sj, &paddr);
4003                         netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4004                         /* Update descriptor and the cached value */
4005                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4006                         rxbuf->addr = htole64(paddr);
4007                         continue;
4008                 }
4009 #endif /* DEV_NETMAP */
4010                 rxbuf->flags = 0; 
4011                 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
4012                     M_PKTHDR, adapter->rx_mbuf_sz);
4013                 if (rxbuf->buf == NULL) {
4014                         error = ENOBUFS;
4015                         goto fail;
4016                 }
4017                 mp = rxbuf->buf;
4018                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
4019                 /* Get the memory mapping */
4020                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4021                     rxbuf->pmap, mp, seg,
4022                     &nsegs, BUS_DMA_NOWAIT);
4023                 if (error != 0)
4024                         goto fail;
4025                 bus_dmamap_sync(rxr->ptag,
4026                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4027                 /* Update the descriptor and the cached value */
4028                 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
4029                 rxbuf->addr = htole64(seg[0].ds_addr);
4030         }
4031
4032
4033         /* Setup our descriptor indices */
4034         rxr->next_to_check = 0;
4035         rxr->next_to_refresh = 0;
4036         rxr->lro_enabled = FALSE;
4037         rxr->rx_copies = 0;
4038         rxr->rx_bytes = 0;
4039         rxr->discard = FALSE;
4040         rxr->vtag_strip = FALSE;
4041
4042         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4043             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4044
4045         /*
4046         ** Now set up the LRO interface:
4047         */
4048         if (ixgbe_rsc_enable)
4049                 ixgbe_setup_hw_rsc(rxr);
4050         else if (ifp->if_capenable & IFCAP_LRO) {
4051                 int err = tcp_lro_init(lro);
4052                 if (err) {
4053                         device_printf(dev, "LRO Initialization failed!\n");
4054                         goto fail;
4055                 }
4056                 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
4057                 rxr->lro_enabled = TRUE;
4058                 lro->ifp = adapter->ifp;
4059         }
4060
4061         IXGBE_RX_UNLOCK(rxr);
4062         return (0);
4063
4064 fail:
4065         ixgbe_free_receive_ring(rxr);
4066         IXGBE_RX_UNLOCK(rxr);
4067         return (error);
4068 }
4069
4070 /*********************************************************************
4071  *
4072  *  Initialize all receive rings.
4073  *
4074  **********************************************************************/
4075 static int
4076 ixgbe_setup_receive_structures(struct adapter *adapter)
4077 {
4078         struct rx_ring *rxr = adapter->rx_rings;
4079         int j;
4080
4081         for (j = 0; j < adapter->num_queues; j++, rxr++)
4082                 if (ixgbe_setup_receive_ring(rxr))
4083                         goto fail;
4084
4085         return (0);
4086 fail:
4087         /*
4088          * Free RX buffers allocated so far, we will only handle
4089          * the rings that completed, the failing case will have
4090          * cleaned up for itself. 'j' failed, so its the terminus.
4091          */
4092         for (int i = 0; i < j; ++i) {
4093                 rxr = &adapter->rx_rings[i];
4094                 ixgbe_free_receive_ring(rxr);
4095         }
4096
4097         return (ENOBUFS);
4098 }
4099
4100 /*********************************************************************
4101  *
4102  *  Setup receive registers and features.
4103  *
4104  **********************************************************************/
4105 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
4106
4107 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
4108         
4109 static void
4110 ixgbe_initialize_receive_units(struct adapter *adapter)
4111 {
4112         struct  rx_ring *rxr = adapter->rx_rings;
4113         struct ixgbe_hw *hw = &adapter->hw;
4114         struct ifnet   *ifp = adapter->ifp;
4115         u32             bufsz, rxctrl, fctrl, srrctl, rxcsum;
4116         u32             reta, mrqc = 0, hlreg, random[10];
4117
4118
4119         /*
4120          * Make sure receives are disabled while
4121          * setting up the descriptor ring
4122          */
4123         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4124         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
4125             rxctrl & ~IXGBE_RXCTRL_RXEN);
4126
4127         /* Enable broadcasts */
4128         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4129         fctrl |= IXGBE_FCTRL_BAM;
4130         fctrl |= IXGBE_FCTRL_DPF;
4131         fctrl |= IXGBE_FCTRL_PMCF;
4132         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4133
4134         /* Set for Jumbo Frames? */
4135         hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4136         if (ifp->if_mtu > ETHERMTU)
4137                 hlreg |= IXGBE_HLREG0_JUMBOEN;
4138         else
4139                 hlreg &= ~IXGBE_HLREG0_JUMBOEN;
4140 #ifdef DEV_NETMAP
4141         /* crcstrip is conditional in netmap (in RDRXCTL too ?) */
4142         if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
4143                 hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
4144         else
4145                 hlreg |= IXGBE_HLREG0_RXCRCSTRP;
4146 #endif /* DEV_NETMAP */
4147         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
4148
4149         bufsz = (adapter->rx_mbuf_sz +
4150             BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
4151
4152         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4153                 u64 rdba = rxr->rxdma.dma_paddr;
4154
4155                 /* Setup the Base and Length of the Rx Descriptor Ring */
4156                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
4157                                (rdba & 0x00000000ffffffffULL));
4158                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
4159                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
4160                     adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4161
4162                 /* Set up the SRRCTL register */
4163                 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
4164                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4165                 srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
4166                 srrctl |= bufsz;
4167                 srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4168                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
4169
4170                 /* Setup the HW Rx Head and Tail Descriptor Pointers */
4171                 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
4172                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
4173
4174                 /* Set the processing limit */
4175                 rxr->process_limit = ixgbe_rx_process_limit;
4176         }
4177
4178         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
4179                 u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
4180                               IXGBE_PSRTYPE_UDPHDR |
4181                               IXGBE_PSRTYPE_IPV4HDR |
4182                               IXGBE_PSRTYPE_IPV6HDR;
4183                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
4184         }
4185
4186         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4187
4188         /* Setup RSS */
4189         if (adapter->num_queues > 1) {
4190                 int i, j;
4191                 reta = 0;
4192
4193                 /* set up random bits */
4194                 arc4rand(&random, sizeof(random), 0);
4195
4196                 /* Set up the redirection table */
4197                 for (i = 0, j = 0; i < 128; i++, j++) {
4198                         if (j == adapter->num_queues) j = 0;
4199                         reta = (reta << 8) | (j * 0x11);
4200                         if ((i & 3) == 3)
4201                                 IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
4202                 }
4203
4204                 /* Now fill our hash function seeds */
4205                 for (int i = 0; i < 10; i++)
4206                         IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), random[i]);
4207
4208                 /* Perform hash on these packet types */
4209                 mrqc = IXGBE_MRQC_RSSEN
4210                      | IXGBE_MRQC_RSS_FIELD_IPV4
4211                      | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
4212                      | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
4213                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
4214                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX
4215                      | IXGBE_MRQC_RSS_FIELD_IPV6
4216                      | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
4217                      | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
4218                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
4219                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4220
4221                 /* RSS and RX IPP Checksum are mutually exclusive */
4222                 rxcsum |= IXGBE_RXCSUM_PCSD;
4223         }
4224
4225         if (ifp->if_capenable & IFCAP_RXCSUM)
4226                 rxcsum |= IXGBE_RXCSUM_PCSD;
4227
4228         if (!(rxcsum & IXGBE_RXCSUM_PCSD))
4229                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4230
4231         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4232
4233         return;
4234 }
4235
4236 /*********************************************************************
4237  *
4238  *  Free all receive rings.
4239  *
4240  **********************************************************************/
4241 static void
4242 ixgbe_free_receive_structures(struct adapter *adapter)
4243 {
4244         struct rx_ring *rxr = adapter->rx_rings;
4245
4246         INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
4247
4248         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4249                 struct lro_ctrl         *lro = &rxr->lro;
4250                 ixgbe_free_receive_buffers(rxr);
4251                 /* Free LRO memory */
4252                 tcp_lro_free(lro);
4253                 /* Free the ring memory as well */
4254                 ixgbe_dma_free(adapter, &rxr->rxdma);
4255         }
4256
4257         free(adapter->rx_rings, M_DEVBUF);
4258 }
4259
4260
4261 /*********************************************************************
4262  *
4263  *  Free receive ring data structures
4264  *
4265  **********************************************************************/
4266 static void
4267 ixgbe_free_receive_buffers(struct rx_ring *rxr)
4268 {
4269         struct adapter          *adapter = rxr->adapter;
4270         struct ixgbe_rx_buf     *rxbuf;
4271
4272         INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
4273
4274         /* Cleanup any existing buffers */
4275         if (rxr->rx_buffers != NULL) {
4276                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4277                         rxbuf = &rxr->rx_buffers[i];
4278                         if (rxbuf->buf != NULL) {
4279                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4280                                     BUS_DMASYNC_POSTREAD);
4281                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4282                                 rxbuf->buf->m_flags |= M_PKTHDR;
4283                                 m_freem(rxbuf->buf);
4284                         }
4285                         rxbuf->buf = NULL;
4286                         if (rxbuf->pmap != NULL) {
4287                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4288                                 rxbuf->pmap = NULL;
4289                         }
4290                 }
4291                 if (rxr->rx_buffers != NULL) {
4292                         free(rxr->rx_buffers, M_DEVBUF);
4293                         rxr->rx_buffers = NULL;
4294                 }
4295         }
4296
4297         if (rxr->ptag != NULL) {
4298                 bus_dma_tag_destroy(rxr->ptag);
4299                 rxr->ptag = NULL;
4300         }
4301
4302         return;
4303 }
4304
4305 static __inline void
4306 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4307 {
4308                  
4309         /*
4310          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
4311          * should be computed by hardware. Also it should not have VLAN tag in
4312          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
4313          */
4314         if (rxr->lro_enabled &&
4315             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4316             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4317             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4318             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
4319             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4320             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
4321             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4322             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4323                 /*
4324                  * Send to the stack if:
4325                  **  - LRO not enabled, or
4326                  **  - no LRO resources, or
4327                  **  - lro enqueue fails
4328                  */
4329                 if (rxr->lro.lro_cnt != 0)
4330                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4331                                 return;
4332         }
4333         IXGBE_RX_UNLOCK(rxr);
4334         (*ifp->if_input)(ifp, m);
4335         IXGBE_RX_LOCK(rxr);
4336 }
4337
4338 static __inline void
4339 ixgbe_rx_discard(struct rx_ring *rxr, int i)
4340 {
4341         struct ixgbe_rx_buf     *rbuf;
4342
4343         rbuf = &rxr->rx_buffers[i];
4344
4345         if (rbuf->fmp != NULL) {/* Partial chain ? */
4346                 rbuf->fmp->m_flags |= M_PKTHDR;
4347                 m_freem(rbuf->fmp);
4348                 rbuf->fmp = NULL;
4349         }
4350
4351         /*
4352         ** With advanced descriptors the writeback
4353         ** clobbers the buffer addrs, so its easier
4354         ** to just free the existing mbufs and take
4355         ** the normal refresh path to get new buffers
4356         ** and mapping.
4357         */
4358         if (rbuf->buf) {
4359                 m_free(rbuf->buf);
4360                 rbuf->buf = NULL;
4361         }
4362
4363         rbuf->flags = 0;
4364  
4365         return;
4366 }
4367
4368
4369 /*********************************************************************
4370  *
4371  *  This routine executes in interrupt context. It replenishes
4372  *  the mbufs in the descriptor and sends data which has been
4373  *  dma'ed into host memory to upper layer.
4374  *
4375  *  We loop at most count times if count is > 0, or until done if
4376  *  count < 0.
4377  *
4378  *  Return TRUE for more work, FALSE for all clean.
4379  *********************************************************************/
4380 static bool
4381 ixgbe_rxeof(struct ix_queue *que)
4382 {
4383         struct adapter          *adapter = que->adapter;
4384         struct rx_ring          *rxr = que->rxr;
4385         struct ifnet            *ifp = adapter->ifp;
4386         struct lro_ctrl         *lro = &rxr->lro;
4387         struct lro_entry        *queued;
4388         int                     i, nextp, processed = 0;
4389         u32                     staterr = 0;
4390         u16                     count = rxr->process_limit;
4391         union ixgbe_adv_rx_desc *cur;
4392         struct ixgbe_rx_buf     *rbuf, *nbuf;
4393
4394         IXGBE_RX_LOCK(rxr);
4395
4396 #ifdef DEV_NETMAP
4397         /* Same as the txeof routine: wakeup clients on intr. */
4398         if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4399                 return (FALSE);
4400 #endif /* DEV_NETMAP */
4401
4402         for (i = rxr->next_to_check; count != 0;) {
4403                 struct mbuf     *sendmp, *mp;
4404                 u32             rsc, ptype;
4405                 u16             len;
4406                 u16             vtag = 0;
4407                 bool            eop;
4408  
4409                 /* Sync the ring. */
4410                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4411                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4412
4413                 cur = &rxr->rx_base[i];
4414                 staterr = le32toh(cur->wb.upper.status_error);
4415
4416                 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
4417                         break;
4418                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4419                         break;
4420
4421                 count--;
4422                 sendmp = NULL;
4423                 nbuf = NULL;
4424                 rsc = 0;
4425                 cur->wb.upper.status_error = 0;
4426                 rbuf = &rxr->rx_buffers[i];
4427                 mp = rbuf->buf;
4428
4429                 len = le16toh(cur->wb.upper.length);
4430                 ptype = le32toh(cur->wb.lower.lo_dword.data) &
4431                     IXGBE_RXDADV_PKTTYPE_MASK;
4432                 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
4433
4434                 /* Make sure bad packets are discarded */
4435                 if (((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) ||
4436                     (rxr->discard)) {
4437                         rxr->rx_discarded++;
4438                         if (eop)
4439                                 rxr->discard = FALSE;
4440                         else
4441                                 rxr->discard = TRUE;
4442                         ixgbe_rx_discard(rxr, i);
4443                         goto next_desc;
4444                 }
4445
4446                 /*
4447                 ** On 82599 which supports a hardware
4448                 ** LRO (called HW RSC), packets need
4449                 ** not be fragmented across sequential
4450                 ** descriptors, rather the next descriptor
4451                 ** is indicated in bits of the descriptor.
4452                 ** This also means that we might proceses
4453                 ** more than one packet at a time, something
4454                 ** that has never been true before, it
4455                 ** required eliminating global chain pointers
4456                 ** in favor of what we are doing here.  -jfv
4457                 */
4458                 if (!eop) {
4459                         /*
4460                         ** Figure out the next descriptor
4461                         ** of this frame.
4462                         */
4463                         if (rxr->hw_rsc == TRUE) {
4464                                 rsc = ixgbe_rsc_count(cur);
4465                                 rxr->rsc_num += (rsc - 1);
4466                         }
4467                         if (rsc) { /* Get hardware index */
4468                                 nextp = ((staterr &
4469                                     IXGBE_RXDADV_NEXTP_MASK) >>
4470                                     IXGBE_RXDADV_NEXTP_SHIFT);
4471                         } else { /* Just sequential */
4472                                 nextp = i + 1;
4473                                 if (nextp == adapter->num_rx_desc)
4474                                         nextp = 0;
4475                         }
4476                         nbuf = &rxr->rx_buffers[nextp];
4477                         prefetch(nbuf);
4478                 }
4479                 /*
4480                 ** Rather than using the fmp/lmp global pointers
4481                 ** we now keep the head of a packet chain in the
4482                 ** buffer struct and pass this along from one
4483                 ** descriptor to the next, until we get EOP.
4484                 */
4485                 mp->m_len = len;
4486                 /*
4487                 ** See if there is a stored head
4488                 ** that determines what we are
4489                 */
4490                 sendmp = rbuf->fmp;
4491                 if (sendmp != NULL) {  /* secondary frag */
4492                         rbuf->buf = rbuf->fmp = NULL;
4493                         mp->m_flags &= ~M_PKTHDR;
4494                         sendmp->m_pkthdr.len += mp->m_len;
4495                 } else {
4496                         /*
4497                          * Optimize.  This might be a small packet,
4498                          * maybe just a TCP ACK.  Do a fast copy that
4499                          * is cache aligned into a new mbuf, and
4500                          * leave the old mbuf+cluster for re-use.
4501                          */
4502                         if (eop && len <= IXGBE_RX_COPY_LEN) {
4503                                 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
4504                                 if (sendmp != NULL) {
4505                                         sendmp->m_data +=
4506                                             IXGBE_RX_COPY_ALIGN;
4507                                         ixgbe_bcopy(mp->m_data,
4508                                             sendmp->m_data, len);
4509                                         sendmp->m_len = len;
4510                                         rxr->rx_copies++;
4511                                         rbuf->flags |= IXGBE_RX_COPY;
4512                                 }
4513                         }
4514                         if (sendmp == NULL) {
4515                                 rbuf->buf = rbuf->fmp = NULL;
4516                                 sendmp = mp;
4517                         }
4518
4519                         /* first desc of a non-ps chain */
4520                         sendmp->m_flags |= M_PKTHDR;
4521                         sendmp->m_pkthdr.len = mp->m_len;
4522                 }
4523                 ++processed;
4524
4525                 /* Pass the head pointer on */
4526                 if (eop == 0) {
4527                         nbuf->fmp = sendmp;
4528                         sendmp = NULL;
4529                         mp->m_next = nbuf->buf;
4530                 } else { /* Sending this frame */
4531                         sendmp->m_pkthdr.rcvif = ifp;
4532                         ifp->if_ipackets++;
4533                         rxr->rx_packets++;
4534                         /* capture data for AIM */
4535                         rxr->bytes += sendmp->m_pkthdr.len;
4536                         rxr->rx_bytes += sendmp->m_pkthdr.len;
4537                         /* Process vlan info */
4538                         if ((rxr->vtag_strip) &&
4539                             (staterr & IXGBE_RXD_STAT_VP))
4540                                 vtag = le16toh(cur->wb.upper.vlan);
4541                         if (vtag) {
4542                                 sendmp->m_pkthdr.ether_vtag = vtag;
4543                                 sendmp->m_flags |= M_VLANTAG;
4544                         }
4545                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4546                                 ixgbe_rx_checksum(staterr, sendmp, ptype);
4547 #if __FreeBSD_version >= 800000
4548                         sendmp->m_pkthdr.flowid = que->msix;
4549                         sendmp->m_flags |= M_FLOWID;
4550 #endif
4551                 }
4552 next_desc:
4553                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4554                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4555
4556                 /* Advance our pointers to the next descriptor. */
4557                 if (++i == rxr->num_desc)
4558                         i = 0;
4559
4560                 /* Now send to the stack or do LRO */
4561                 if (sendmp != NULL) {
4562                         rxr->next_to_check = i;
4563                         ixgbe_rx_input(rxr, ifp, sendmp, ptype);
4564                         i = rxr->next_to_check;
4565                 }
4566
4567                /* Every 8 descriptors we go to refresh mbufs */
4568                 if (processed == 8) {
4569                         ixgbe_refresh_mbufs(rxr, i);
4570                         processed = 0;
4571                 }
4572         }
4573
4574         /* Refresh any remaining buf structs */
4575         if (ixgbe_rx_unrefreshed(rxr))
4576                 ixgbe_refresh_mbufs(rxr, i);
4577
4578         rxr->next_to_check = i;
4579
4580         /*
4581          * Flush any outstanding LRO work
4582          */
4583         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4584                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4585                 tcp_lro_flush(lro, queued);
4586         }
4587
4588         IXGBE_RX_UNLOCK(rxr);
4589
4590         /*
4591         ** Still have cleaning to do?
4592         */
4593         if ((staterr & IXGBE_RXD_STAT_DD) != 0)
4594                 return (TRUE);
4595         else
4596                 return (FALSE);
4597 }
4598
4599
4600 /*********************************************************************
4601  *
4602  *  Verify that the hardware indicated that the checksum is valid.
4603  *  Inform the stack about the status of checksum so that stack
4604  *  doesn't spend time verifying the checksum.
4605  *
4606  *********************************************************************/
4607 static void
4608 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
4609 {
4610         u16     status = (u16) staterr;
4611         u8      errors = (u8) (staterr >> 24);
4612         bool    sctp = FALSE;
4613
4614         if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4615             (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
4616                 sctp = TRUE;
4617
4618         if (status & IXGBE_RXD_STAT_IPCS) {
4619                 if (!(errors & IXGBE_RXD_ERR_IPE)) {
4620                         /* IP Checksum Good */
4621                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4622                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4623
4624                 } else
4625                         mp->m_pkthdr.csum_flags = 0;
4626         }
4627         if (status & IXGBE_RXD_STAT_L4CS) {
4628                 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4629 #if __FreeBSD_version >= 800000
4630                 if (sctp)
4631                         type = CSUM_SCTP_VALID;
4632 #endif
4633                 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
4634                         mp->m_pkthdr.csum_flags |= type;
4635                         if (!sctp)
4636                                 mp->m_pkthdr.csum_data = htons(0xffff);
4637                 } 
4638         }
4639         return;
4640 }
4641
4642
4643 /*
4644 ** This routine is run via an vlan config EVENT,
4645 ** it enables us to use the HW Filter table since
4646 ** we can get the vlan id. This just creates the
4647 ** entry in the soft version of the VFTA, init will
4648 ** repopulate the real table.
4649 */
4650 static void
4651 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4652 {
4653         struct adapter  *adapter = ifp->if_softc;
4654         u16             index, bit;
4655
4656         if (ifp->if_softc !=  arg)   /* Not our event */
4657                 return;
4658
4659         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4660                 return;
4661
4662         IXGBE_CORE_LOCK(adapter);
4663         index = (vtag >> 5) & 0x7F;
4664         bit = vtag & 0x1F;
4665         adapter->shadow_vfta[index] |= (1 << bit);
4666         ++adapter->num_vlans;
4667         ixgbe_init_locked(adapter);
4668         IXGBE_CORE_UNLOCK(adapter);
4669 }
4670
4671 /*
4672 ** This routine is run via an vlan
4673 ** unconfig EVENT, remove our entry
4674 ** in the soft vfta.
4675 */
4676 static void
4677 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4678 {
4679         struct adapter  *adapter = ifp->if_softc;
4680         u16             index, bit;
4681
4682         if (ifp->if_softc !=  arg)
4683                 return;
4684
4685         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4686                 return;
4687
4688         IXGBE_CORE_LOCK(adapter);
4689         index = (vtag >> 5) & 0x7F;
4690         bit = vtag & 0x1F;
4691         adapter->shadow_vfta[index] &= ~(1 << bit);
4692         --adapter->num_vlans;
4693         /* Re-init to load the changes */
4694         ixgbe_init_locked(adapter);
4695         IXGBE_CORE_UNLOCK(adapter);
4696 }
4697
4698 static void
4699 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
4700 {
4701         struct ifnet    *ifp = adapter->ifp;
4702         struct ixgbe_hw *hw = &adapter->hw;
4703         struct rx_ring  *rxr;
4704         u32             ctrl;
4705
4706
4707         /*
4708         ** We get here thru init_locked, meaning
4709         ** a soft reset, this has already cleared
4710         ** the VFTA and other state, so if there
4711         ** have been no vlan's registered do nothing.
4712         */
4713         if (adapter->num_vlans == 0)
4714                 return;
4715
4716         /*
4717         ** A soft reset zero's out the VFTA, so
4718         ** we need to repopulate it now.
4719         */
4720         for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
4721                 if (adapter->shadow_vfta[i] != 0)
4722                         IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
4723                             adapter->shadow_vfta[i]);
4724
4725         ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4726         /* Enable the Filter Table if enabled */
4727         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
4728                 ctrl &= ~IXGBE_VLNCTRL_CFIEN;
4729                 ctrl |= IXGBE_VLNCTRL_VFE;
4730         }
4731         if (hw->mac.type == ixgbe_mac_82598EB)
4732                 ctrl |= IXGBE_VLNCTRL_VME;
4733         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
4734
4735         /* Setup the queues for vlans */
4736         for (int i = 0; i < adapter->num_queues; i++) {
4737                 rxr = &adapter->rx_rings[i];
4738                 /* On 82599 the VLAN enable is per/queue in RXDCTL */
4739                 if (hw->mac.type != ixgbe_mac_82598EB) {
4740                         ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
4741                         ctrl |= IXGBE_RXDCTL_VME;
4742                         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
4743                 }
4744                 rxr->vtag_strip = TRUE;
4745         }
4746 }
4747
4748 static void
4749 ixgbe_enable_intr(struct adapter *adapter)
4750 {
4751         struct ixgbe_hw *hw = &adapter->hw;
4752         struct ix_queue *que = adapter->queues;
4753         u32             mask, fwsm;
4754
4755         mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
4756         /* Enable Fan Failure detection */
4757         if (hw->device_id == IXGBE_DEV_ID_82598AT)
4758                     mask |= IXGBE_EIMS_GPI_SDP1;
4759
4760         switch (adapter->hw.mac.type) {
4761                 case ixgbe_mac_82599EB:
4762                         mask |= IXGBE_EIMS_ECC;
4763                         mask |= IXGBE_EIMS_GPI_SDP0;
4764                         mask |= IXGBE_EIMS_GPI_SDP1;
4765                         mask |= IXGBE_EIMS_GPI_SDP2;
4766 #ifdef IXGBE_FDIR
4767                         mask |= IXGBE_EIMS_FLOW_DIR;
4768 #endif
4769                         break;
4770                 case ixgbe_mac_X540:
4771                         mask |= IXGBE_EIMS_ECC;
4772                         /* Detect if Thermal Sensor is enabled */
4773                         fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
4774                         if (fwsm & IXGBE_FWSM_TS_ENABLED)
4775                                 mask |= IXGBE_EIMS_TS;
4776 #ifdef IXGBE_FDIR
4777                         mask |= IXGBE_EIMS_FLOW_DIR;
4778 #endif
4779                 /* falls through */
4780                 default:
4781                         break;
4782         }
4783
4784         IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
4785
4786         /* With RSS we use auto clear */
4787         if (adapter->msix_mem) {
4788                 mask = IXGBE_EIMS_ENABLE_MASK;
4789                 /* Don't autoclear Link */
4790                 mask &= ~IXGBE_EIMS_OTHER;
4791                 mask &= ~IXGBE_EIMS_LSC;
4792                 IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
4793         }
4794
4795         /*
4796         ** Now enable all queues, this is done separately to
4797         ** allow for handling the extended (beyond 32) MSIX
4798         ** vectors that can be used by 82599
4799         */
4800         for (int i = 0; i < adapter->num_queues; i++, que++)
4801                 ixgbe_enable_queue(adapter, que->msix);
4802
4803         IXGBE_WRITE_FLUSH(hw);
4804
4805         return;
4806 }
4807
4808 static void
4809 ixgbe_disable_intr(struct adapter *adapter)
4810 {
4811         if (adapter->msix_mem)
4812                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
4813         if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
4814                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
4815         } else {
4816                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
4817                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
4818                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
4819         }
4820         IXGBE_WRITE_FLUSH(&adapter->hw);
4821         return;
4822 }
4823
4824 u16
4825 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
4826 {
4827         u16 value;
4828
4829         value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
4830             reg, 2);
4831
4832         return (value);
4833 }
4834
4835 void
4836 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
4837 {
4838         pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
4839             reg, value, 2);
4840
4841         return;
4842 }
4843
4844 /*
4845 ** Get the width and transaction speed of
4846 ** the slot this adapter is plugged into.
4847 */
4848 static void
4849 ixgbe_get_slot_info(struct ixgbe_hw *hw)
4850 {
4851         device_t                dev = ((struct ixgbe_osdep *)hw->back)->dev;
4852         struct ixgbe_mac_info   *mac = &hw->mac;
4853         u16                     link;
4854         u32                     offset;
4855
4856         /* For most devices simply call the shared code routine */
4857         if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
4858                 ixgbe_get_bus_info(hw);
4859                 goto display;
4860         }
4861
4862         /*
4863         ** For the Quad port adapter we need to parse back
4864         ** up the PCI tree to find the speed of the expansion
4865         ** slot into which this adapter is plugged. A bit more work.
4866         */
4867         dev = device_get_parent(device_get_parent(dev));
4868 #ifdef IXGBE_DEBUG
4869         device_printf(dev, "parent pcib = %x,%x,%x\n",
4870             pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
4871 #endif
4872         dev = device_get_parent(device_get_parent(dev));
4873 #ifdef IXGBE_DEBUG
4874         device_printf(dev, "slot pcib = %x,%x,%x\n",
4875             pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
4876 #endif
4877         /* Now get the PCI Express Capabilities offset */
4878         pci_find_cap(dev, PCIY_EXPRESS, &offset);
4879         /* ...and read the Link Status Register */
4880         link = pci_read_config(dev, offset + PCIER_LINK_STA, 2);
4881         switch (link & IXGBE_PCI_LINK_WIDTH) {
4882         case IXGBE_PCI_LINK_WIDTH_1:
4883                 hw->bus.width = ixgbe_bus_width_pcie_x1;
4884                 break;
4885         case IXGBE_PCI_LINK_WIDTH_2:
4886                 hw->bus.width = ixgbe_bus_width_pcie_x2;
4887                 break;
4888         case IXGBE_PCI_LINK_WIDTH_4:
4889                 hw->bus.width = ixgbe_bus_width_pcie_x4;
4890                 break;
4891         case IXGBE_PCI_LINK_WIDTH_8:
4892                 hw->bus.width = ixgbe_bus_width_pcie_x8;
4893                 break;
4894         default:
4895                 hw->bus.width = ixgbe_bus_width_unknown;
4896                 break;
4897         }
4898
4899         switch (link & IXGBE_PCI_LINK_SPEED) {
4900         case IXGBE_PCI_LINK_SPEED_2500:
4901                 hw->bus.speed = ixgbe_bus_speed_2500;
4902                 break;
4903         case IXGBE_PCI_LINK_SPEED_5000:
4904                 hw->bus.speed = ixgbe_bus_speed_5000;
4905                 break;
4906         case IXGBE_PCI_LINK_SPEED_8000:
4907                 hw->bus.speed = ixgbe_bus_speed_8000;
4908                 break;
4909         default:
4910                 hw->bus.speed = ixgbe_bus_speed_unknown;
4911                 break;
4912         }
4913
4914         mac->ops.set_lan_id(hw);
4915
4916 display:
4917         device_printf(dev,"PCI Express Bus: Speed %s %s\n",
4918             ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s":
4919             (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s":
4920             (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s":"Unknown"),
4921             (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
4922             (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
4923             (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
4924             ("Unknown"));
4925
4926         if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) &&
4927             ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
4928             (hw->bus.speed == ixgbe_bus_speed_2500))) {
4929                 device_printf(dev, "PCI-Express bandwidth available"
4930                     " for this card\n     is not sufficient for"
4931                     " optimal performance.\n");
4932                 device_printf(dev, "For optimal performance a x8 "
4933                     "PCIE, or x4 PCIE Gen2 slot is required.\n");
4934         }
4935         if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) &&
4936             ((hw->bus.width <= ixgbe_bus_width_pcie_x8) &&
4937             (hw->bus.speed < ixgbe_bus_speed_8000))) {
4938                 device_printf(dev, "PCI-Express bandwidth available"
4939                     " for this card\n     is not sufficient for"
4940                     " optimal performance.\n");
4941                 device_printf(dev, "For optimal performance a x8 "
4942                     "PCIE Gen3 slot is required.\n");
4943         }
4944
4945         return;
4946 }
4947
4948
4949 /*
4950 ** Setup the correct IVAR register for a particular MSIX interrupt
4951 **   (yes this is all very magic and confusing :)
4952 **  - entry is the register array entry
4953 **  - vector is the MSIX vector for this queue
4954 **  - type is RX/TX/MISC
4955 */
4956 static void
4957 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
4958 {
4959         struct ixgbe_hw *hw = &adapter->hw;
4960         u32 ivar, index;
4961
4962         vector |= IXGBE_IVAR_ALLOC_VAL;
4963
4964         switch (hw->mac.type) {
4965
4966         case ixgbe_mac_82598EB:
4967                 if (type == -1)
4968                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4969                 else
4970                         entry += (type * 64);
4971                 index = (entry >> 2) & 0x1F;
4972                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4973                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4974                 ivar |= (vector << (8 * (entry & 0x3)));
4975                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
4976                 break;
4977
4978         case ixgbe_mac_82599EB:
4979         case ixgbe_mac_X540:
4980                 if (type == -1) { /* MISC IVAR */
4981                         index = (entry & 1) * 8;
4982                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4983                         ivar &= ~(0xFF << index);
4984                         ivar |= (vector << index);
4985                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4986                 } else {        /* RX/TX IVARS */
4987                         index = (16 * (entry & 1)) + (8 * type);
4988                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4989                         ivar &= ~(0xFF << index);
4990                         ivar |= (vector << index);
4991                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4992                 }
4993
4994         default:
4995                 break;
4996         }
4997 }
4998
4999 static void
5000 ixgbe_configure_ivars(struct adapter *adapter)
5001 {
5002         struct  ix_queue *que = adapter->queues;
5003         u32 newitr;
5004
5005         if (ixgbe_max_interrupt_rate > 0)
5006                 newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
5007         else
5008                 newitr = 0;
5009
5010         for (int i = 0; i < adapter->num_queues; i++, que++) {
5011                 /* First the RX queue entry */
5012                 ixgbe_set_ivar(adapter, i, que->msix, 0);
5013                 /* ... and the TX */
5014                 ixgbe_set_ivar(adapter, i, que->msix, 1);
5015                 /* Set an Initial EITR value */
5016                 IXGBE_WRITE_REG(&adapter->hw,
5017                     IXGBE_EITR(que->msix), newitr);
5018         }
5019
5020         /* For the Link interrupt */
5021         ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
5022 }
5023
5024 /*
5025 ** ixgbe_sfp_probe - called in the local timer to
5026 ** determine if a port had optics inserted.
5027 */  
5028 static bool ixgbe_sfp_probe(struct adapter *adapter)
5029 {
5030         struct ixgbe_hw *hw = &adapter->hw;
5031         device_t        dev = adapter->dev;
5032         bool            result = FALSE;
5033
5034         if ((hw->phy.type == ixgbe_phy_nl) &&
5035             (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
5036                 s32 ret = hw->phy.ops.identify_sfp(hw);
5037                 if (ret)
5038                         goto out;
5039                 ret = hw->phy.ops.reset(hw);
5040                 if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5041                         device_printf(dev,"Unsupported SFP+ module detected!");
5042                         printf(" Reload driver with supported module.\n");
5043                         adapter->sfp_probe = FALSE;
5044                         goto out;
5045                 } else
5046                         device_printf(dev,"SFP+ module detected!\n");
5047                 /* We now have supported optics */
5048                 adapter->sfp_probe = FALSE;
5049                 /* Set the optics type so system reports correctly */
5050                 ixgbe_setup_optics(adapter);
5051                 result = TRUE;
5052         }
5053 out:
5054         return (result);
5055 }
5056
5057 /*
5058 ** Tasklet handler for MSIX Link interrupts
5059 **  - do outside interrupt since it might sleep
5060 */
5061 static void
5062 ixgbe_handle_link(void *context, int pending)
5063 {
5064         struct adapter  *adapter = context;
5065
5066         ixgbe_check_link(&adapter->hw,
5067             &adapter->link_speed, &adapter->link_up, 0);
5068         ixgbe_update_link_status(adapter);
5069 }
5070
5071 /*
5072 ** Tasklet for handling SFP module interrupts
5073 */
5074 static void
5075 ixgbe_handle_mod(void *context, int pending)
5076 {
5077         struct adapter  *adapter = context;
5078         struct ixgbe_hw *hw = &adapter->hw;
5079         device_t        dev = adapter->dev;
5080         u32 err;
5081
5082         err = hw->phy.ops.identify_sfp(hw);
5083         if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5084                 device_printf(dev,
5085                     "Unsupported SFP+ module type was detected.\n");
5086                 return;
5087         }
5088         err = hw->mac.ops.setup_sfp(hw);
5089         if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5090                 device_printf(dev,
5091                     "Setup failure - unsupported SFP+ module type.\n");
5092                 return;
5093         }
5094         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
5095         return;
5096 }
5097
5098
5099 /*
5100 ** Tasklet for handling MSF (multispeed fiber) interrupts
5101 */
5102 static void
5103 ixgbe_handle_msf(void *context, int pending)
5104 {
5105         struct adapter  *adapter = context;
5106         struct ixgbe_hw *hw = &adapter->hw;
5107         u32 autoneg;
5108         bool negotiate;
5109
5110         autoneg = hw->phy.autoneg_advertised;
5111         if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
5112                 hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
5113         if (hw->mac.ops.setup_link)
5114                 hw->mac.ops.setup_link(hw, autoneg, TRUE);
5115         return;
5116 }
5117
5118 #ifdef IXGBE_FDIR
5119 /*
5120 ** Tasklet for reinitializing the Flow Director filter table
5121 */
5122 static void
5123 ixgbe_reinit_fdir(void *context, int pending)
5124 {
5125         struct adapter  *adapter = context;
5126         struct ifnet   *ifp = adapter->ifp;
5127
5128         if (adapter->fdir_reinit != 1) /* Shouldn't happen */
5129                 return;
5130         ixgbe_reinit_fdir_tables_82599(&adapter->hw);
5131         adapter->fdir_reinit = 0;
5132         /* re-enable flow director interrupts */
5133         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
5134         /* Restart the interface */
5135         ifp->if_drv_flags |= IFF_DRV_RUNNING;
5136         return;
5137 }
5138 #endif
5139
5140 /**********************************************************************
5141  *
5142  *  Update the board statistics counters.
5143  *
5144  **********************************************************************/
5145 static void
5146 ixgbe_update_stats_counters(struct adapter *adapter)
5147 {
5148         struct ifnet   *ifp = adapter->ifp;
5149         struct ixgbe_hw *hw = &adapter->hw;
5150         u32  missed_rx = 0, bprc, lxon, lxoff, total;
5151         u64  total_missed_rx = 0;
5152
5153         adapter->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
5154         adapter->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
5155         adapter->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
5156         adapter->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
5157
5158         /*
5159         ** Note: these are for the 8 possible traffic classes,
5160         **       which in current implementation is unused,
5161         **       therefore only 0 should read real data.
5162         */
5163         for (int i = 0; i < 8; i++) {
5164                 u32 mp;
5165                 mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
5166                 /* missed_rx tallies misses for the gprc workaround */
5167                 missed_rx += mp;
5168                 /* global total per queue */
5169                 adapter->stats.mpc[i] += mp;
5170                 /* Running comprehensive total for stats display */
5171                 total_missed_rx += adapter->stats.mpc[i];
5172                 if (hw->mac.type == ixgbe_mac_82598EB) {
5173                         adapter->stats.rnbc[i] +=
5174                             IXGBE_READ_REG(hw, IXGBE_RNBC(i));
5175                         adapter->stats.qbtc[i] +=
5176                             IXGBE_READ_REG(hw, IXGBE_QBTC(i));
5177                         adapter->stats.qbrc[i] +=
5178                             IXGBE_READ_REG(hw, IXGBE_QBRC(i));
5179                         adapter->stats.pxonrxc[i] +=
5180                             IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
5181                 } else
5182                         adapter->stats.pxonrxc[i] +=
5183                             IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
5184                 adapter->stats.pxontxc[i] +=
5185                     IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
5186                 adapter->stats.pxofftxc[i] +=
5187                     IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
5188                 adapter->stats.pxoffrxc[i] +=
5189                     IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
5190                 adapter->stats.pxon2offc[i] +=
5191                     IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
5192         }
5193         for (int i = 0; i < 16; i++) {
5194                 adapter->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
5195                 adapter->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
5196                 adapter->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
5197         }
5198         adapter->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
5199         adapter->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
5200         adapter->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
5201
5202         /* Hardware workaround, gprc counts missed packets */
5203         adapter->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
5204         adapter->stats.gprc -= missed_rx;
5205
5206         if (hw->mac.type != ixgbe_mac_82598EB) {
5207                 adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
5208                     ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
5209                 adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
5210                     ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
5211                 adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
5212                     ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
5213                 adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
5214                 adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
5215         } else {
5216                 adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
5217                 adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
5218                 /* 82598 only has a counter in the high register */
5219                 adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
5220                 adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
5221                 adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
5222         }
5223
5224         /*
5225          * Workaround: mprc hardware is incorrectly counting
5226          * broadcasts, so for now we subtract those.
5227          */
5228         bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
5229         adapter->stats.bprc += bprc;
5230         adapter->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
5231         if (hw->mac.type == ixgbe_mac_82598EB)
5232                 adapter->stats.mprc -= bprc;
5233
5234         adapter->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
5235         adapter->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
5236         adapter->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
5237         adapter->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
5238         adapter->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
5239         adapter->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
5240
5241         lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
5242         adapter->stats.lxontxc += lxon;
5243         lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
5244         adapter->stats.lxofftxc += lxoff;
5245         total = lxon + lxoff;
5246
5247         adapter->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
5248         adapter->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
5249         adapter->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
5250         adapter->stats.gptc -= total;
5251         adapter->stats.mptc -= total;
5252         adapter->stats.ptc64 -= total;
5253         adapter->stats.gotc -= total * ETHER_MIN_LEN;
5254
5255         adapter->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
5256         adapter->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
5257         adapter->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
5258         adapter->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
5259         adapter->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
5260         adapter->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
5261         adapter->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
5262         adapter->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
5263         adapter->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
5264         adapter->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
5265         adapter->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
5266         adapter->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
5267         adapter->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
5268         adapter->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
5269         adapter->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
5270         adapter->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
5271         adapter->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
5272         adapter->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
5273         /* Only read FCOE on 82599 */
5274         if (hw->mac.type != ixgbe_mac_82598EB) {
5275                 adapter->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
5276                 adapter->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
5277                 adapter->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
5278                 adapter->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
5279                 adapter->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
5280         }
5281
5282         /* Fill out the OS statistics structure */
5283         ifp->if_ipackets = adapter->stats.gprc;
5284         ifp->if_opackets = adapter->stats.gptc;
5285         ifp->if_ibytes = adapter->stats.gorc;
5286         ifp->if_obytes = adapter->stats.gotc;
5287         ifp->if_imcasts = adapter->stats.mprc;
5288         ifp->if_omcasts = adapter->stats.mptc;
5289         ifp->if_collisions = 0;
5290
5291         /* Rx Errors */
5292         ifp->if_iqdrops = total_missed_rx;
5293         ifp->if_ierrors = adapter->stats.crcerrs + adapter->stats.rlec;
5294 }
5295
5296 /** ixgbe_sysctl_tdh_handler - Handler function
5297  *  Retrieves the TDH value from the hardware
5298  */
5299 static int 
5300 ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS)
5301 {
5302         int error;
5303
5304         struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
5305         if (!txr) return 0;
5306
5307         unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
5308         error = sysctl_handle_int(oidp, &val, 0, req);
5309         if (error || !req->newptr)
5310                 return error;
5311         return 0;
5312 }
5313
5314 /** ixgbe_sysctl_tdt_handler - Handler function
5315  *  Retrieves the TDT value from the hardware
5316  */
5317 static int 
5318 ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS)
5319 {
5320         int error;
5321
5322         struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
5323         if (!txr) return 0;
5324
5325         unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
5326         error = sysctl_handle_int(oidp, &val, 0, req);
5327         if (error || !req->newptr)
5328                 return error;
5329         return 0;
5330 }
5331
5332 /** ixgbe_sysctl_rdh_handler - Handler function
5333  *  Retrieves the RDH value from the hardware
5334  */
5335 static int 
5336 ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS)
5337 {
5338         int error;
5339
5340         struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
5341         if (!rxr) return 0;
5342
5343         unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
5344         error = sysctl_handle_int(oidp, &val, 0, req);
5345         if (error || !req->newptr)
5346                 return error;
5347         return 0;
5348 }
5349
5350 /** ixgbe_sysctl_rdt_handler - Handler function
5351  *  Retrieves the RDT value from the hardware
5352  */
5353 static int 
5354 ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS)
5355 {
5356         int error;
5357
5358         struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
5359         if (!rxr) return 0;
5360
5361         unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
5362         error = sysctl_handle_int(oidp, &val, 0, req);
5363         if (error || !req->newptr)
5364                 return error;
5365         return 0;
5366 }
5367
5368 static int
5369 ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5370 {
5371         int error;
5372         struct ix_queue *que = ((struct ix_queue *)oidp->oid_arg1);
5373         unsigned int reg, usec, rate;
5374
5375         reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
5376         usec = ((reg & 0x0FF8) >> 3);
5377         if (usec > 0)
5378                 rate = 500000 / usec;
5379         else
5380                 rate = 0;
5381         error = sysctl_handle_int(oidp, &rate, 0, req);
5382         if (error || !req->newptr)
5383                 return error;
5384         reg &= ~0xfff; /* default, no limitation */
5385         ixgbe_max_interrupt_rate = 0;
5386         if (rate > 0 && rate < 500000) {
5387                 if (rate < 1000)
5388                         rate = 1000;
5389                 ixgbe_max_interrupt_rate = rate;
5390                 reg |= ((4000000/rate) & 0xff8 );
5391         }
5392         IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
5393         return 0;
5394 }
5395
5396 /*
5397  * Add sysctl variables, one per statistic, to the system.
5398  */
5399 static void
5400 ixgbe_add_hw_stats(struct adapter *adapter)
5401 {
5402
5403         device_t dev = adapter->dev;
5404
5405         struct tx_ring *txr = adapter->tx_rings;
5406         struct rx_ring *rxr = adapter->rx_rings;
5407
5408         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5409         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5410         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5411         struct ixgbe_hw_stats *stats = &adapter->stats;
5412
5413         struct sysctl_oid *stat_node, *queue_node;
5414         struct sysctl_oid_list *stat_list, *queue_list;
5415
5416 #define QUEUE_NAME_LEN 32
5417         char namebuf[QUEUE_NAME_LEN];
5418
5419         /* Driver Statistics */
5420         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5421                         CTLFLAG_RD, &adapter->dropped_pkts,
5422                         "Driver dropped packets");
5423         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_failed",
5424                         CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5425                         "m_defrag() failed");
5426         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events",
5427                         CTLFLAG_RD, &adapter->watchdog_events,
5428                         "Watchdog timeouts");
5429         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5430                         CTLFLAG_RD, &adapter->link_irq,
5431                         "Link MSIX IRQ Handled");
5432
5433         for (int i = 0; i < adapter->num_queues; i++, txr++) {
5434                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5435                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5436                                             CTLFLAG_RD, NULL, "Queue Name");
5437                 queue_list = SYSCTL_CHILDREN(queue_node);
5438
5439                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5440                                 CTLTYPE_UINT | CTLFLAG_RW, &adapter->queues[i],
5441                                 sizeof(&adapter->queues[i]),
5442                                 ixgbe_sysctl_interrupt_rate_handler, "IU",
5443                                 "Interrupt Rate");
5444                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs",
5445                                 CTLFLAG_RD, &(adapter->queues[i].irqs),
5446                                 "irqs on this queue");
5447                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5448                                 CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
5449                                 ixgbe_sysctl_tdh_handler, "IU",
5450                                 "Transmit Descriptor Head");
5451                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5452                                 CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
5453                                 ixgbe_sysctl_tdt_handler, "IU",
5454                                 "Transmit Descriptor Tail");
5455                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tso_tx",
5456                                 CTLFLAG_RD, &txr->tso_tx,
5457                                 "TSO");
5458                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_tx_dma_setup",
5459                                 CTLFLAG_RD, &txr->no_tx_dma_setup,
5460                                 "Driver tx dma failure in xmit");
5461                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5462                                 CTLFLAG_RD, &txr->no_desc_avail,
5463                                 "Queue No Descriptor Available");
5464                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5465                                 CTLFLAG_RD, &txr->total_packets,
5466                                 "Queue Packets Transmitted");
5467         }
5468
5469         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
5470                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5471                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
5472                                             CTLFLAG_RD, NULL, "Queue Name");
5473                 queue_list = SYSCTL_CHILDREN(queue_node);
5474
5475                 struct lro_ctrl *lro = &rxr->lro;
5476
5477                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5478                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
5479                                             CTLFLAG_RD, NULL, "Queue Name");
5480                 queue_list = SYSCTL_CHILDREN(queue_node);
5481
5482                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5483                                 CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
5484                                 ixgbe_sysctl_rdh_handler, "IU",
5485                                 "Receive Descriptor Head");
5486                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5487                                 CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
5488                                 ixgbe_sysctl_rdt_handler, "IU",
5489                                 "Receive Descriptor Tail");
5490                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5491                                 CTLFLAG_RD, &rxr->rx_packets,
5492                                 "Queue Packets Received");
5493                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5494                                 CTLFLAG_RD, &rxr->rx_bytes,
5495                                 "Queue Bytes Received");
5496                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_copies",
5497                                 CTLFLAG_RD, &rxr->rx_copies,
5498                                 "Copied RX Frames");
5499                 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
5500                                 CTLFLAG_RD, &lro->lro_queued, 0,
5501                                 "LRO Queued");
5502                 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
5503                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5504                                 "LRO Flushed");
5505         }
5506
5507         /* MAC stats get the own sub node */
5508
5509         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5510                                     CTLFLAG_RD, NULL, "MAC Statistics");
5511         stat_list = SYSCTL_CHILDREN(stat_node);
5512
5513         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5514                         CTLFLAG_RD, &stats->crcerrs,
5515                         "CRC Errors");
5516         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
5517                         CTLFLAG_RD, &stats->illerrc,
5518                         "Illegal Byte Errors");
5519         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
5520                         CTLFLAG_RD, &stats->errbc,
5521                         "Byte Errors");
5522         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
5523                         CTLFLAG_RD, &stats->mspdc,
5524                         "MAC Short Packets Discarded");
5525         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
5526                         CTLFLAG_RD, &stats->mlfc,
5527                         "MAC Local Faults");
5528         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
5529                         CTLFLAG_RD, &stats->mrfc,
5530                         "MAC Remote Faults");
5531         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
5532                         CTLFLAG_RD, &stats->rlec,
5533                         "Receive Length Errors");
5534
5535         /* Flow Control stats */
5536         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5537                         CTLFLAG_RD, &stats->lxontxc,
5538                         "Link XON Transmitted");
5539         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5540                         CTLFLAG_RD, &stats->lxonrxc,
5541                         "Link XON Received");
5542         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5543                         CTLFLAG_RD, &stats->lxofftxc,
5544                         "Link XOFF Transmitted");
5545         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5546                         CTLFLAG_RD, &stats->lxoffrxc,
5547                         "Link XOFF Received");
5548
5549         /* Packet Reception Stats */
5550         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
5551                         CTLFLAG_RD, &stats->tor, 
5552                         "Total Octets Received"); 
5553         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
5554                         CTLFLAG_RD, &stats->gorc, 
5555                         "Good Octets Received"); 
5556         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
5557                         CTLFLAG_RD, &stats->tpr,
5558                         "Total Packets Received");
5559         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
5560                         CTLFLAG_RD, &stats->gprc,
5561                         "Good Packets Received");
5562         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
5563                         CTLFLAG_RD, &stats->mprc,
5564                         "Multicast Packets Received");
5565         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
5566                         CTLFLAG_RD, &stats->bprc,
5567                         "Broadcast Packets Received");
5568         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5569                         CTLFLAG_RD, &stats->prc64,
5570                         "64 byte frames received ");
5571         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5572                         CTLFLAG_RD, &stats->prc127,
5573                         "65-127 byte frames received");
5574         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5575                         CTLFLAG_RD, &stats->prc255,
5576                         "128-255 byte frames received");
5577         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5578                         CTLFLAG_RD, &stats->prc511,
5579                         "256-511 byte frames received");
5580         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5581                         CTLFLAG_RD, &stats->prc1023,
5582                         "512-1023 byte frames received");
5583         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5584                         CTLFLAG_RD, &stats->prc1522,
5585                         "1023-1522 byte frames received");
5586         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
5587                         CTLFLAG_RD, &stats->ruc,
5588                         "Receive Undersized");
5589         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5590                         CTLFLAG_RD, &stats->rfc,
5591                         "Fragmented Packets Received ");
5592         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
5593                         CTLFLAG_RD, &stats->roc,
5594                         "Oversized Packets Received");
5595         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
5596                         CTLFLAG_RD, &stats->rjc,
5597                         "Received Jabber");
5598         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
5599                         CTLFLAG_RD, &stats->mngprc,
5600                         "Management Packets Received");
5601         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
5602                         CTLFLAG_RD, &stats->mngptc,
5603                         "Management Packets Dropped");
5604         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
5605                         CTLFLAG_RD, &stats->xec,
5606                         "Checksum Errors");
5607
5608         /* Packet Transmission Stats */
5609         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5610                         CTLFLAG_RD, &stats->gotc, 
5611                         "Good Octets Transmitted"); 
5612         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5613                         CTLFLAG_RD, &stats->tpt,
5614                         "Total Packets Transmitted");
5615         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5616                         CTLFLAG_RD, &stats->gptc,
5617                         "Good Packets Transmitted");
5618         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5619                         CTLFLAG_RD, &stats->bptc,
5620                         "Broadcast Packets Transmitted");
5621         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5622                         CTLFLAG_RD, &stats->mptc,
5623                         "Multicast Packets Transmitted");
5624         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
5625                         CTLFLAG_RD, &stats->mngptc,
5626                         "Management Packets Transmitted");
5627         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5628                         CTLFLAG_RD, &stats->ptc64,
5629                         "64 byte frames transmitted ");
5630         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5631                         CTLFLAG_RD, &stats->ptc127,
5632                         "65-127 byte frames transmitted");
5633         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5634                         CTLFLAG_RD, &stats->ptc255,
5635                         "128-255 byte frames transmitted");
5636         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5637                         CTLFLAG_RD, &stats->ptc511,
5638                         "256-511 byte frames transmitted");
5639         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5640                         CTLFLAG_RD, &stats->ptc1023,
5641                         "512-1023 byte frames transmitted");
5642         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5643                         CTLFLAG_RD, &stats->ptc1522,
5644                         "1024-1522 byte frames transmitted");
5645 }
5646
5647 /*
5648 ** Set flow control using sysctl:
5649 ** Flow control values:
5650 **      0 - off
5651 **      1 - rx pause
5652 **      2 - tx pause
5653 **      3 - full
5654 */
5655 static int
5656 ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS)
5657 {
5658         int error, last;
5659         struct adapter *adapter = (struct adapter *) arg1;
5660
5661         last = adapter->fc;
5662         error = sysctl_handle_int(oidp, &adapter->fc, 0, req);
5663         if ((error) || (req->newptr == NULL))
5664                 return (error);
5665
5666         /* Don't bother if it's not changed */
5667         if (adapter->fc == last)
5668                 return (0);
5669
5670         switch (adapter->fc) {
5671                 case ixgbe_fc_rx_pause:
5672                 case ixgbe_fc_tx_pause:
5673                 case ixgbe_fc_full:
5674                         adapter->hw.fc.requested_mode = adapter->fc;
5675                         if (adapter->num_queues > 1)
5676                                 ixgbe_disable_rx_drop(adapter);
5677                         break;
5678                 case ixgbe_fc_none:
5679                         adapter->hw.fc.requested_mode = ixgbe_fc_none;
5680                         if (adapter->num_queues > 1)
5681                                 ixgbe_enable_rx_drop(adapter);
5682                         break;
5683                 default:
5684                         adapter->fc = last;
5685                         return (EINVAL);
5686         }
5687         /* Don't autoneg if forcing a value */
5688         adapter->hw.fc.disable_fc_autoneg = TRUE;
5689         ixgbe_fc_enable(&adapter->hw);
5690         return error;
5691 }
5692
5693 /*
5694 ** Control link advertise speed:
5695 **      1 - advertise only 1G
5696 **      2 - advertise 100Mb
5697 **      3 - advertise normal
5698 */
5699 static int
5700 ixgbe_set_advertise(SYSCTL_HANDLER_ARGS)
5701 {
5702         int                     error = 0;
5703         struct adapter          *adapter;
5704         device_t                dev;
5705         struct ixgbe_hw         *hw;
5706         ixgbe_link_speed        speed, last;
5707
5708         adapter = (struct adapter *) arg1;
5709         dev = adapter->dev;
5710         hw = &adapter->hw;
5711         last = adapter->advertise;
5712
5713         error = sysctl_handle_int(oidp, &adapter->advertise, 0, req);
5714         if ((error) || (req->newptr == NULL))
5715                 return (error);
5716
5717         if (adapter->advertise == last) /* no change */
5718                 return (0);
5719
5720         if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
5721             (hw->phy.multispeed_fiber)))
5722                 return (EINVAL);
5723
5724         if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
5725                 device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
5726                 return (EINVAL);
5727         }
5728
5729         if (adapter->advertise == 1)
5730                 speed = IXGBE_LINK_SPEED_1GB_FULL;
5731         else if (adapter->advertise == 2)
5732                 speed = IXGBE_LINK_SPEED_100_FULL;
5733         else if (adapter->advertise == 3)
5734                 speed = IXGBE_LINK_SPEED_1GB_FULL |
5735                         IXGBE_LINK_SPEED_10GB_FULL;
5736         else {  /* bogus value */
5737                 adapter->advertise = last;
5738                 return (EINVAL);
5739         }
5740
5741         hw->mac.autotry_restart = TRUE;
5742         hw->mac.ops.setup_link(hw, speed, TRUE);
5743
5744         return (error);
5745 }
5746
5747 /*
5748 ** Thermal Shutdown Trigger
5749 **   - cause a Thermal Overtemp IRQ
5750 **   - this now requires firmware enabling
5751 */
5752 static int
5753 ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS)
5754 {
5755         int             error, fire = 0;
5756         struct adapter  *adapter = (struct adapter *) arg1;
5757         struct ixgbe_hw *hw = &adapter->hw;
5758
5759
5760         if (hw->mac.type != ixgbe_mac_X540)
5761                 return (0);
5762
5763         error = sysctl_handle_int(oidp, &fire, 0, req);
5764         if ((error) || (req->newptr == NULL))
5765                 return (error);
5766
5767         if (fire) {
5768                 u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
5769                 reg |= IXGBE_EICR_TS;
5770                 IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
5771         }
5772
5773         return (0);
5774 }
5775
5776 /*
5777 ** Enable the hardware to drop packets when the buffer is
5778 ** full. This is useful when multiqueue,so that no single
5779 ** queue being full stalls the entire RX engine. We only
5780 ** enable this when Multiqueue AND when Flow Control is 
5781 ** disabled.
5782 */
5783 static void
5784 ixgbe_enable_rx_drop(struct adapter *adapter)
5785 {
5786         struct ixgbe_hw *hw = &adapter->hw;
5787
5788         for (int i = 0; i < adapter->num_queues; i++) {
5789                 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
5790                 srrctl |= IXGBE_SRRCTL_DROP_EN;
5791                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
5792         }
5793 }
5794
5795 static void
5796 ixgbe_disable_rx_drop(struct adapter *adapter)
5797 {
5798         struct ixgbe_hw *hw = &adapter->hw;
5799
5800         for (int i = 0; i < adapter->num_queues; i++) {
5801                 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
5802                 srrctl &= ~IXGBE_SRRCTL_DROP_EN;
5803                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
5804         }
5805 }