]> CyberLeo.Net >> Repos - FreeBSD/releng/9.2.git/blob - sys/dev/ixgbe/ixgbe.c
MFC 253176,253179: make sure the rxbuf->flags is cleared when the RX ring is reset
[FreeBSD/releng/9.2.git] / sys / dev / ixgbe / ixgbe.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2013, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38 #include "ixgbe.h"
39
40 /*********************************************************************
41  *  Set this to one to display debug statistics
42  *********************************************************************/
43 int             ixgbe_display_debug_stats = 0;
44
45 /*********************************************************************
46  *  Driver version
47  *********************************************************************/
48 char ixgbe_driver_version[] = "2.5.15";
49
50 /*********************************************************************
51  *  PCI Device ID Table
52  *
53  *  Used by probe to select devices to load on
54  *  Last field stores an index into ixgbe_strings
55  *  Last entry must be all 0s
56  *
57  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
58  *********************************************************************/
59
60 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
61 {
62         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
63         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
64         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
65         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
66         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
67         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
68         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
69         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
70         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
71         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
72         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
73         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
74         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
75         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
76         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
77         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
78         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
79         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
80         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
81         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
82         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
83         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
84         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0},
85         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
86         /* required last entry */
87         {0, 0, 0, 0, 0}
88 };
89
90 /*********************************************************************
91  *  Table of branding strings
92  *********************************************************************/
93
94 static char    *ixgbe_strings[] = {
95         "Intel(R) PRO/10GbE PCI-Express Network Driver"
96 };
97
98 /*********************************************************************
99  *  Function prototypes
100  *********************************************************************/
101 static int      ixgbe_probe(device_t);
102 static int      ixgbe_attach(device_t);
103 static int      ixgbe_detach(device_t);
104 static int      ixgbe_shutdown(device_t);
105 #ifdef IXGBE_LEGACY_TX
106 static void     ixgbe_start(struct ifnet *);
107 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
108 #else /* ! IXGBE_LEGACY_TX */
109 static int      ixgbe_mq_start(struct ifnet *, struct mbuf *);
110 static int      ixgbe_mq_start_locked(struct ifnet *, struct tx_ring *);
111 static void     ixgbe_qflush(struct ifnet *);
112 static void     ixgbe_deferred_mq_start(void *, int);
113 #endif /* IXGBE_LEGACY_TX */
114 static int      ixgbe_ioctl(struct ifnet *, u_long, caddr_t);
115 static void     ixgbe_init(void *);
116 static void     ixgbe_init_locked(struct adapter *);
117 static void     ixgbe_stop(void *);
118 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
119 static int      ixgbe_media_change(struct ifnet *);
120 static void     ixgbe_identify_hardware(struct adapter *);
121 static int      ixgbe_allocate_pci_resources(struct adapter *);
122 static void     ixgbe_get_slot_info(struct ixgbe_hw *);
123 static int      ixgbe_allocate_msix(struct adapter *);
124 static int      ixgbe_allocate_legacy(struct adapter *);
125 static int      ixgbe_allocate_queues(struct adapter *);
126 static int      ixgbe_setup_msix(struct adapter *);
127 static void     ixgbe_free_pci_resources(struct adapter *);
128 static void     ixgbe_local_timer(void *);
129 static int      ixgbe_setup_interface(device_t, struct adapter *);
130 static void     ixgbe_config_link(struct adapter *);
131
132 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
133 static int      ixgbe_setup_transmit_structures(struct adapter *);
134 static void     ixgbe_setup_transmit_ring(struct tx_ring *);
135 static void     ixgbe_initialize_transmit_units(struct adapter *);
136 static void     ixgbe_free_transmit_structures(struct adapter *);
137 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
138
139 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
140 static int      ixgbe_setup_receive_structures(struct adapter *);
141 static int      ixgbe_setup_receive_ring(struct rx_ring *);
142 static void     ixgbe_initialize_receive_units(struct adapter *);
143 static void     ixgbe_free_receive_structures(struct adapter *);
144 static void     ixgbe_free_receive_buffers(struct rx_ring *);
145 static void     ixgbe_setup_hw_rsc(struct rx_ring *);
146
147 static void     ixgbe_enable_intr(struct adapter *);
148 static void     ixgbe_disable_intr(struct adapter *);
149 static void     ixgbe_update_stats_counters(struct adapter *);
150 static void     ixgbe_txeof(struct tx_ring *);
151 static bool     ixgbe_rxeof(struct ix_queue *);
152 static void     ixgbe_rx_checksum(u32, struct mbuf *, u32);
153 static void     ixgbe_set_promisc(struct adapter *);
154 static void     ixgbe_set_multi(struct adapter *);
155 static void     ixgbe_update_link_status(struct adapter *);
156 static void     ixgbe_refresh_mbufs(struct rx_ring *, int);
157 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
158 static int      ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS);
159 static int      ixgbe_set_advertise(SYSCTL_HANDLER_ARGS);
160 static int      ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS);
161 static int      ixgbe_dma_malloc(struct adapter *, bus_size_t,
162                     struct ixgbe_dma_alloc *, int);
163 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
164 static int      ixgbe_tx_ctx_setup(struct tx_ring *,
165                     struct mbuf *, u32 *, u32 *);
166 static int      ixgbe_tso_setup(struct tx_ring *,
167                     struct mbuf *, u32 *, u32 *);
168 static void     ixgbe_set_ivar(struct adapter *, u8, u8, s8);
169 static void     ixgbe_configure_ivars(struct adapter *);
170 static u8 *     ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
171
172 static void     ixgbe_setup_vlan_hw_support(struct adapter *);
173 static void     ixgbe_register_vlan(void *, struct ifnet *, u16);
174 static void     ixgbe_unregister_vlan(void *, struct ifnet *, u16);
175
176 static void     ixgbe_add_hw_stats(struct adapter *adapter);
177
178 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
179 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
180                     struct mbuf *, u32);
181
182 static void     ixgbe_enable_rx_drop(struct adapter *);
183 static void     ixgbe_disable_rx_drop(struct adapter *);
184
185 /* Support for pluggable optic modules */
186 static bool     ixgbe_sfp_probe(struct adapter *);
187 static void     ixgbe_setup_optics(struct adapter *);
188
189 /* Legacy (single vector interrupt handler */
190 static void     ixgbe_legacy_irq(void *);
191
192 /* The MSI/X Interrupt handlers */
193 static void     ixgbe_msix_que(void *);
194 static void     ixgbe_msix_link(void *);
195
196 /* Deferred interrupt tasklets */
197 static void     ixgbe_handle_que(void *, int);
198 static void     ixgbe_handle_link(void *, int);
199 static void     ixgbe_handle_msf(void *, int);
200 static void     ixgbe_handle_mod(void *, int);
201
202 #ifdef IXGBE_FDIR
203 static void     ixgbe_atr(struct tx_ring *, struct mbuf *);
204 static void     ixgbe_reinit_fdir(void *, int);
205 #endif
206
207 /* Missing shared code prototype */
208 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw);
209
210 /*********************************************************************
211  *  FreeBSD Device Interface Entry Points
212  *********************************************************************/
213
214 static device_method_t ixgbe_methods[] = {
215         /* Device interface */
216         DEVMETHOD(device_probe, ixgbe_probe),
217         DEVMETHOD(device_attach, ixgbe_attach),
218         DEVMETHOD(device_detach, ixgbe_detach),
219         DEVMETHOD(device_shutdown, ixgbe_shutdown),
220         DEVMETHOD_END
221 };
222
223 static driver_t ixgbe_driver = {
224         "ix", ixgbe_methods, sizeof(struct adapter),
225 };
226
227 devclass_t ixgbe_devclass;
228 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
229
230 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
231 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
232
233 /*
234 ** TUNEABLE PARAMETERS:
235 */
236
237 /*
238 ** AIM: Adaptive Interrupt Moderation
239 ** which means that the interrupt rate
240 ** is varied over time based on the
241 ** traffic for that interrupt vector
242 */
243 static int ixgbe_enable_aim = TRUE;
244 TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
245
246 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
247 TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
248
249 /* How many packets rxeof tries to clean at a time */
250 static int ixgbe_rx_process_limit = 256;
251 TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
252
253 /* How many packets txeof tries to clean at a time */
254 static int ixgbe_tx_process_limit = 256;
255 TUNABLE_INT("hw.ixgbe.tx_process_limit", &ixgbe_tx_process_limit);
256
257 /*
258 ** Smart speed setting, default to on
259 ** this only works as a compile option
260 ** right now as its during attach, set
261 ** this to 'ixgbe_smart_speed_off' to
262 ** disable.
263 */
264 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
265
266 /*
267  * MSIX should be the default for best performance,
268  * but this allows it to be forced off for testing.
269  */
270 static int ixgbe_enable_msix = 1;
271 TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
272
273 /*
274  * Number of Queues, can be set to 0,
275  * it then autoconfigures based on the
276  * number of cpus with a max of 8. This
277  * can be overriden manually here.
278  */
279 static int ixgbe_num_queues = 0;
280 TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
281
282 /*
283 ** Number of TX descriptors per ring,
284 ** setting higher than RX as this seems
285 ** the better performing choice.
286 */
287 static int ixgbe_txd = PERFORM_TXD;
288 TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd);
289
290 /* Number of RX descriptors per ring */
291 static int ixgbe_rxd = PERFORM_RXD;
292 TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd);
293
294 /*
295 ** Defining this on will allow the use
296 ** of unsupported SFP+ modules, note that
297 ** doing so you are on your own :)
298 */
299 static int allow_unsupported_sfp = FALSE;
300 TUNABLE_INT("hw.ixgbe.unsupported_sfp", &allow_unsupported_sfp);
301
302 /*
303 ** HW RSC control: 
304 **  this feature only works with
305 **  IPv4, and only on 82599 and later.
306 **  Also this will cause IP forwarding to
307 **  fail and that can't be controlled by
308 **  the stack as LRO can. For all these
309 **  reasons I've deemed it best to leave
310 **  this off and not bother with a tuneable
311 **  interface, this would need to be compiled
312 **  to enable.
313 */
314 static bool ixgbe_rsc_enable = FALSE;
315
316 /* Keep running tab on them for sanity check */
317 static int ixgbe_total_ports;
318
319 #ifdef IXGBE_FDIR
320 /*
321 ** For Flow Director: this is the
322 ** number of TX packets we sample
323 ** for the filter pool, this means
324 ** every 20th packet will be probed.
325 **
326 ** This feature can be disabled by 
327 ** setting this to 0.
328 */
329 static int atr_sample_rate = 20;
330 /* 
331 ** Flow Director actually 'steals'
332 ** part of the packet buffer as its
333 ** filter pool, this variable controls
334 ** how much it uses:
335 **  0 = 64K, 1 = 128K, 2 = 256K
336 */
337 static int fdir_pballoc = 1;
338 #endif
339
340 #ifdef DEV_NETMAP
341 /*
342  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
343  * be a reference on how to implement netmap support in a driver.
344  * Additional comments are in ixgbe_netmap.h .
345  *
346  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
347  * that extend the standard driver.
348  */
349 #include <dev/netmap/ixgbe_netmap.h>
350 #endif /* DEV_NETMAP */
351
352 /*********************************************************************
353  *  Device identification routine
354  *
355  *  ixgbe_probe determines if the driver should be loaded on
356  *  adapter based on PCI vendor/device id of the adapter.
357  *
358  *  return BUS_PROBE_DEFAULT on success, positive on failure
359  *********************************************************************/
360
361 static int
362 ixgbe_probe(device_t dev)
363 {
364         ixgbe_vendor_info_t *ent;
365
366         u16     pci_vendor_id = 0;
367         u16     pci_device_id = 0;
368         u16     pci_subvendor_id = 0;
369         u16     pci_subdevice_id = 0;
370         char    adapter_name[256];
371
372         INIT_DEBUGOUT("ixgbe_probe: begin");
373
374         pci_vendor_id = pci_get_vendor(dev);
375         if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID)
376                 return (ENXIO);
377
378         pci_device_id = pci_get_device(dev);
379         pci_subvendor_id = pci_get_subvendor(dev);
380         pci_subdevice_id = pci_get_subdevice(dev);
381
382         ent = ixgbe_vendor_info_array;
383         while (ent->vendor_id != 0) {
384                 if ((pci_vendor_id == ent->vendor_id) &&
385                     (pci_device_id == ent->device_id) &&
386
387                     ((pci_subvendor_id == ent->subvendor_id) ||
388                      (ent->subvendor_id == 0)) &&
389
390                     ((pci_subdevice_id == ent->subdevice_id) ||
391                      (ent->subdevice_id == 0))) {
392                         sprintf(adapter_name, "%s, Version - %s",
393                                 ixgbe_strings[ent->index],
394                                 ixgbe_driver_version);
395                         device_set_desc_copy(dev, adapter_name);
396                         ++ixgbe_total_ports;
397                         return (BUS_PROBE_DEFAULT);
398                 }
399                 ent++;
400         }
401         return (ENXIO);
402 }
403
404 /*********************************************************************
405  *  Device initialization routine
406  *
407  *  The attach entry point is called when the driver is being loaded.
408  *  This routine identifies the type of hardware, allocates all resources
409  *  and initializes the hardware.
410  *
411  *  return 0 on success, positive on failure
412  *********************************************************************/
413
414 static int
415 ixgbe_attach(device_t dev)
416 {
417         struct adapter *adapter;
418         struct ixgbe_hw *hw;
419         int             error = 0;
420         u16             csum;
421         u32             ctrl_ext;
422
423         INIT_DEBUGOUT("ixgbe_attach: begin");
424
425         /* Allocate, clear, and link in our adapter structure */
426         adapter = device_get_softc(dev);
427         adapter->dev = adapter->osdep.dev = dev;
428         hw = &adapter->hw;
429
430         /* Core Lock Init*/
431         IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
432
433         /* SYSCTL APIs */
434
435         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
436                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
437                         OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW,
438                         adapter, 0, ixgbe_set_flowcntl, "I", "Flow Control");
439
440         SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
441                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
442                         OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
443                         &ixgbe_enable_aim, 1, "Interrupt Moderation");
444
445         /*
446         ** Allow a kind of speed control by forcing the autoneg
447         ** advertised speed list to only a certain value, this
448         ** supports 1G on 82599 devices, and 100Mb on x540.
449         */
450         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
451                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
452                         OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW,
453                         adapter, 0, ixgbe_set_advertise, "I", "Link Speed");
454
455         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
456                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
457                         OID_AUTO, "ts", CTLTYPE_INT | CTLFLAG_RW, adapter,
458                         0, ixgbe_set_thermal_test, "I", "Thermal Test");
459
460         /* Set up the timer callout */
461         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
462
463         /* Determine hardware revision */
464         ixgbe_identify_hardware(adapter);
465
466         /* Do base PCI setup - map BAR0 */
467         if (ixgbe_allocate_pci_resources(adapter)) {
468                 device_printf(dev, "Allocation of PCI resources failed\n");
469                 error = ENXIO;
470                 goto err_out;
471         }
472
473         /* Do descriptor calc and sanity checks */
474         if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
475             ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
476                 device_printf(dev, "TXD config issue, using default!\n");
477                 adapter->num_tx_desc = DEFAULT_TXD;
478         } else
479                 adapter->num_tx_desc = ixgbe_txd;
480
481         /*
482         ** With many RX rings it is easy to exceed the
483         ** system mbuf allocation. Tuning nmbclusters
484         ** can alleviate this.
485         */
486         if (nmbclusters > 0 ) {
487                 int s;
488                 s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
489                 if (s > nmbclusters) {
490                         device_printf(dev, "RX Descriptors exceed "
491                             "system mbuf max, using default instead!\n");
492                         ixgbe_rxd = DEFAULT_RXD;
493                 }
494         }
495
496         if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
497             ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) {
498                 device_printf(dev, "RXD config issue, using default!\n");
499                 adapter->num_rx_desc = DEFAULT_RXD;
500         } else
501                 adapter->num_rx_desc = ixgbe_rxd;
502
503         /* Allocate our TX/RX Queues */
504         if (ixgbe_allocate_queues(adapter)) {
505                 error = ENOMEM;
506                 goto err_out;
507         }
508
509         /* Allocate multicast array memory. */
510         adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
511             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
512         if (adapter->mta == NULL) {
513                 device_printf(dev, "Can not allocate multicast setup array\n");
514                 error = ENOMEM;
515                 goto err_late;
516         }
517
518         /* Initialize the shared code */
519         hw->allow_unsupported_sfp = allow_unsupported_sfp;
520         error = ixgbe_init_shared_code(hw);
521         if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
522                 /*
523                 ** No optics in this port, set up
524                 ** so the timer routine will probe 
525                 ** for later insertion.
526                 */
527                 adapter->sfp_probe = TRUE;
528                 error = 0;
529         } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
530                 device_printf(dev,"Unsupported SFP+ module detected!\n");
531                 error = EIO;
532                 goto err_late;
533         } else if (error) {
534                 device_printf(dev,"Unable to initialize the shared code\n");
535                 error = EIO;
536                 goto err_late;
537         }
538
539         /* Make sure we have a good EEPROM before we read from it */
540         if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
541                 device_printf(dev,"The EEPROM Checksum Is Not Valid\n");
542                 error = EIO;
543                 goto err_late;
544         }
545
546         error = ixgbe_init_hw(hw);
547         switch (error) {
548         case IXGBE_ERR_EEPROM_VERSION:
549                 device_printf(dev, "This device is a pre-production adapter/"
550                     "LOM.  Please be aware there may be issues associated "
551                     "with your hardware.\n If you are experiencing problems "
552                     "please contact your Intel or hardware representative "
553                     "who provided you with this hardware.\n");
554                 break;
555         case IXGBE_ERR_SFP_NOT_SUPPORTED:
556                 device_printf(dev,"Unsupported SFP+ Module\n");
557                 error = EIO;
558                 goto err_late;
559         case IXGBE_ERR_SFP_NOT_PRESENT:
560                 device_printf(dev,"No SFP+ Module found\n");
561                 /* falls thru */
562         default:
563                 break;
564         }
565
566         /* Detect and set physical type */
567         ixgbe_setup_optics(adapter);
568
569         if ((adapter->msix > 1) && (ixgbe_enable_msix))
570                 error = ixgbe_allocate_msix(adapter); 
571         else
572                 error = ixgbe_allocate_legacy(adapter); 
573         if (error) 
574                 goto err_late;
575
576         /* Setup OS specific network interface */
577         if (ixgbe_setup_interface(dev, adapter) != 0)
578                 goto err_late;
579
580         /* Initialize statistics */
581         ixgbe_update_stats_counters(adapter);
582
583         /* Register for VLAN events */
584         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
585             ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
586         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
587             ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
588
589         /*
590         ** Check PCIE slot type/speed/width
591         */
592         ixgbe_get_slot_info(hw);
593
594         /* Set an initial default flow control value */
595         adapter->fc =  ixgbe_fc_full;
596
597         /* let hardware know driver is loaded */
598         ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
599         ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
600         IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
601
602         ixgbe_add_hw_stats(adapter);
603
604 #ifdef DEV_NETMAP
605         ixgbe_netmap_attach(adapter);
606 #endif /* DEV_NETMAP */
607         INIT_DEBUGOUT("ixgbe_attach: end");
608         return (0);
609 err_late:
610         ixgbe_free_transmit_structures(adapter);
611         ixgbe_free_receive_structures(adapter);
612 err_out:
613         if (adapter->ifp != NULL)
614                 if_free(adapter->ifp);
615         ixgbe_free_pci_resources(adapter);
616         free(adapter->mta, M_DEVBUF);
617         return (error);
618
619 }
620
621 /*********************************************************************
622  *  Device removal routine
623  *
624  *  The detach entry point is called when the driver is being removed.
625  *  This routine stops the adapter and deallocates all the resources
626  *  that were allocated for driver operation.
627  *
628  *  return 0 on success, positive on failure
629  *********************************************************************/
630
631 static int
632 ixgbe_detach(device_t dev)
633 {
634         struct adapter *adapter = device_get_softc(dev);
635         struct ix_queue *que = adapter->queues;
636         struct tx_ring *txr = adapter->tx_rings;
637         u32     ctrl_ext;
638
639         INIT_DEBUGOUT("ixgbe_detach: begin");
640
641         /* Make sure VLANS are not using driver */
642         if (adapter->ifp->if_vlantrunk != NULL) {
643                 device_printf(dev,"Vlan in use, detach first\n");
644                 return (EBUSY);
645         }
646
647         IXGBE_CORE_LOCK(adapter);
648         ixgbe_stop(adapter);
649         IXGBE_CORE_UNLOCK(adapter);
650
651         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
652                 if (que->tq) {
653 #ifndef IXGBE_LEGACY_TX
654                         taskqueue_drain(que->tq, &txr->txq_task);
655 #endif
656                         taskqueue_drain(que->tq, &que->que_task);
657                         taskqueue_free(que->tq);
658                 }
659         }
660
661         /* Drain the Link queue */
662         if (adapter->tq) {
663                 taskqueue_drain(adapter->tq, &adapter->link_task);
664                 taskqueue_drain(adapter->tq, &adapter->mod_task);
665                 taskqueue_drain(adapter->tq, &adapter->msf_task);
666 #ifdef IXGBE_FDIR
667                 taskqueue_drain(adapter->tq, &adapter->fdir_task);
668 #endif
669                 taskqueue_free(adapter->tq);
670         }
671
672         /* let hardware know driver is unloading */
673         ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
674         ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
675         IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
676
677         /* Unregister VLAN events */
678         if (adapter->vlan_attach != NULL)
679                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
680         if (adapter->vlan_detach != NULL)
681                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
682
683         ether_ifdetach(adapter->ifp);
684         callout_drain(&adapter->timer);
685 #ifdef DEV_NETMAP
686         netmap_detach(adapter->ifp);
687 #endif /* DEV_NETMAP */
688         ixgbe_free_pci_resources(adapter);
689         bus_generic_detach(dev);
690         if_free(adapter->ifp);
691
692         ixgbe_free_transmit_structures(adapter);
693         ixgbe_free_receive_structures(adapter);
694         free(adapter->mta, M_DEVBUF);
695
696         IXGBE_CORE_LOCK_DESTROY(adapter);
697         return (0);
698 }
699
700 /*********************************************************************
701  *
702  *  Shutdown entry point
703  *
704  **********************************************************************/
705
706 static int
707 ixgbe_shutdown(device_t dev)
708 {
709         struct adapter *adapter = device_get_softc(dev);
710         IXGBE_CORE_LOCK(adapter);
711         ixgbe_stop(adapter);
712         IXGBE_CORE_UNLOCK(adapter);
713         return (0);
714 }
715
716
717 #ifdef IXGBE_LEGACY_TX
718 /*********************************************************************
719  *  Transmit entry point
720  *
721  *  ixgbe_start is called by the stack to initiate a transmit.
722  *  The driver will remain in this routine as long as there are
723  *  packets to transmit and transmit resources are available.
724  *  In case resources are not available stack is notified and
725  *  the packet is requeued.
726  **********************************************************************/
727
728 static void
729 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
730 {
731         struct mbuf    *m_head;
732         struct adapter *adapter = txr->adapter;
733
734         IXGBE_TX_LOCK_ASSERT(txr);
735
736         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
737                 return;
738         if (!adapter->link_active)
739                 return;
740
741         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
742                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
743                         break;
744
745                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
746                 if (m_head == NULL)
747                         break;
748
749                 if (ixgbe_xmit(txr, &m_head)) {
750                         if (m_head != NULL)
751                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
752                         break;
753                 }
754                 /* Send a copy of the frame to the BPF listener */
755                 ETHER_BPF_MTAP(ifp, m_head);
756
757                 /* Set watchdog on */
758                 txr->watchdog_time = ticks;
759                 txr->queue_status = IXGBE_QUEUE_WORKING;
760
761         }
762         return;
763 }
764
765 /*
766  * Legacy TX start - called by the stack, this
767  * always uses the first tx ring, and should
768  * not be used with multiqueue tx enabled.
769  */
770 static void
771 ixgbe_start(struct ifnet *ifp)
772 {
773         struct adapter *adapter = ifp->if_softc;
774         struct tx_ring  *txr = adapter->tx_rings;
775
776         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
777                 IXGBE_TX_LOCK(txr);
778                 ixgbe_start_locked(txr, ifp);
779                 IXGBE_TX_UNLOCK(txr);
780         }
781         return;
782 }
783
784 #else /* ! IXGBE_LEGACY_TX */
785
786 /*
787 ** Multiqueue Transmit driver
788 **
789 */
790 static int
791 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
792 {
793         struct adapter  *adapter = ifp->if_softc;
794         struct ix_queue *que;
795         struct tx_ring  *txr;
796         int             i, err = 0;
797
798         /* Which queue to use */
799         if ((m->m_flags & M_FLOWID) != 0)
800                 i = m->m_pkthdr.flowid % adapter->num_queues;
801         else
802                 i = curcpu % adapter->num_queues;
803
804         txr = &adapter->tx_rings[i];
805         que = &adapter->queues[i];
806
807         err = drbr_enqueue(ifp, txr->br, m);
808         if (err)
809                 return (err);
810         if (IXGBE_TX_TRYLOCK(txr)) {
811                 err = ixgbe_mq_start_locked(ifp, txr);
812                 IXGBE_TX_UNLOCK(txr);
813         } else
814                 taskqueue_enqueue(que->tq, &txr->txq_task);
815
816         return (err);
817 }
818
819 static int
820 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
821 {
822         struct adapter  *adapter = txr->adapter;
823         struct mbuf     *next;
824         int             enqueued = 0, err = 0;
825
826         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
827             adapter->link_active == 0)
828                 return (ENETDOWN);
829
830         /* Process the queue */
831 #if __FreeBSD_version < 901504
832         next = drbr_dequeue(ifp, txr->br);
833         while (next != NULL) {
834                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
835                         if (next != NULL)
836                                 err = drbr_enqueue(ifp, txr->br, next);
837 #else
838         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
839                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
840                         if (next == NULL) {
841                                 drbr_advance(ifp, txr->br);
842                         } else {
843                                 drbr_putback(ifp, txr->br, next);
844                         }
845 #endif
846                         break;
847                 }
848 #if __FreeBSD_version >= 901504
849                 drbr_advance(ifp, txr->br);
850 #endif
851                 enqueued++;
852                 /* Send a copy of the frame to the BPF listener */
853                 ETHER_BPF_MTAP(ifp, next);
854                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
855                         break;
856 #if __FreeBSD_version < 901504
857                 next = drbr_dequeue(ifp, txr->br);
858 #endif
859         }
860
861         if (enqueued > 0) {
862                 /* Set watchdog on */
863                 txr->queue_status = IXGBE_QUEUE_WORKING;
864                 txr->watchdog_time = ticks;
865         }
866
867         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
868                 ixgbe_txeof(txr);
869
870         return (err);
871 }
872
873 /*
874  * Called from a taskqueue to drain queued transmit packets.
875  */
876 static void
877 ixgbe_deferred_mq_start(void *arg, int pending)
878 {
879         struct tx_ring *txr = arg;
880         struct adapter *adapter = txr->adapter;
881         struct ifnet *ifp = adapter->ifp;
882
883         IXGBE_TX_LOCK(txr);
884         if (!drbr_empty(ifp, txr->br))
885                 ixgbe_mq_start_locked(ifp, txr);
886         IXGBE_TX_UNLOCK(txr);
887 }
888
889 /*
890 ** Flush all ring buffers
891 */
892 static void
893 ixgbe_qflush(struct ifnet *ifp)
894 {
895         struct adapter  *adapter = ifp->if_softc;
896         struct tx_ring  *txr = adapter->tx_rings;
897         struct mbuf     *m;
898
899         for (int i = 0; i < adapter->num_queues; i++, txr++) {
900                 IXGBE_TX_LOCK(txr);
901                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
902                         m_freem(m);
903                 IXGBE_TX_UNLOCK(txr);
904         }
905         if_qflush(ifp);
906 }
907 #endif /* IXGBE_LEGACY_TX */
908
909 /*********************************************************************
910  *  Ioctl entry point
911  *
912  *  ixgbe_ioctl is called when the user wants to configure the
913  *  interface.
914  *
915  *  return 0 on success, positive on failure
916  **********************************************************************/
917
918 static int
919 ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data)
920 {
921         struct adapter  *adapter = ifp->if_softc;
922         struct ixgbe_hw *hw = &adapter->hw;
923         struct ifreq    *ifr = (struct ifreq *) data;
924 #if defined(INET) || defined(INET6)
925         struct ifaddr *ifa = (struct ifaddr *)data;
926         bool            avoid_reset = FALSE;
927 #endif
928         int             error = 0;
929
930         switch (command) {
931
932         case SIOCSIFADDR:
933 #ifdef INET
934                 if (ifa->ifa_addr->sa_family == AF_INET)
935                         avoid_reset = TRUE;
936 #endif
937 #ifdef INET6
938                 if (ifa->ifa_addr->sa_family == AF_INET6)
939                         avoid_reset = TRUE;
940 #endif
941 #if defined(INET) || defined(INET6)
942                 /*
943                 ** Calling init results in link renegotiation,
944                 ** so we avoid doing it when possible.
945                 */
946                 if (avoid_reset) {
947                         ifp->if_flags |= IFF_UP;
948                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
949                                 ixgbe_init(adapter);
950                         if (!(ifp->if_flags & IFF_NOARP))
951                                 arp_ifinit(ifp, ifa);
952                 } else
953                         error = ether_ioctl(ifp, command, data);
954 #endif
955                 break;
956         case SIOCSIFMTU:
957                 IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
958                 if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
959                         error = EINVAL;
960                 } else {
961                         IXGBE_CORE_LOCK(adapter);
962                         ifp->if_mtu = ifr->ifr_mtu;
963                         adapter->max_frame_size =
964                                 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
965                         ixgbe_init_locked(adapter);
966                         IXGBE_CORE_UNLOCK(adapter);
967                 }
968                 break;
969         case SIOCSIFFLAGS:
970                 IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
971                 IXGBE_CORE_LOCK(adapter);
972                 if (ifp->if_flags & IFF_UP) {
973                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
974                                 if ((ifp->if_flags ^ adapter->if_flags) &
975                                     (IFF_PROMISC | IFF_ALLMULTI)) {
976                                         ixgbe_set_promisc(adapter);
977                                 }
978                         } else
979                                 ixgbe_init_locked(adapter);
980                 } else
981                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
982                                 ixgbe_stop(adapter);
983                 adapter->if_flags = ifp->if_flags;
984                 IXGBE_CORE_UNLOCK(adapter);
985                 break;
986         case SIOCADDMULTI:
987         case SIOCDELMULTI:
988                 IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
989                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
990                         IXGBE_CORE_LOCK(adapter);
991                         ixgbe_disable_intr(adapter);
992                         ixgbe_set_multi(adapter);
993                         ixgbe_enable_intr(adapter);
994                         IXGBE_CORE_UNLOCK(adapter);
995                 }
996                 break;
997         case SIOCSIFMEDIA:
998         case SIOCGIFMEDIA:
999                 IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
1000                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1001                 break;
1002         case SIOCSIFCAP:
1003         {
1004                 int mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1005                 IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
1006                 if (mask & IFCAP_HWCSUM)
1007                         ifp->if_capenable ^= IFCAP_HWCSUM;
1008                 if (mask & IFCAP_TSO4)
1009                         ifp->if_capenable ^= IFCAP_TSO4;
1010                 if (mask & IFCAP_TSO6)
1011                         ifp->if_capenable ^= IFCAP_TSO6;
1012                 if (mask & IFCAP_LRO)
1013                         ifp->if_capenable ^= IFCAP_LRO;
1014                 if (mask & IFCAP_VLAN_HWTAGGING)
1015                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1016                 if (mask & IFCAP_VLAN_HWFILTER)
1017                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1018                 if (mask & IFCAP_VLAN_HWTSO)
1019                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1020                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1021                         IXGBE_CORE_LOCK(adapter);
1022                         ixgbe_init_locked(adapter);
1023                         IXGBE_CORE_UNLOCK(adapter);
1024                 }
1025                 VLAN_CAPABILITIES(ifp);
1026                 break;
1027         }
1028         case SIOCGI2C:
1029         {
1030                 struct ixgbe_i2c_req    i2c;
1031                 IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)");
1032                 error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
1033                 if (error)
1034                         break;
1035                 if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
1036                         error = EINVAL;
1037                         break;
1038                 }
1039                 hw->phy.ops.read_i2c_byte(hw, i2c.offset,
1040                     i2c.dev_addr, i2c.data);
1041                 error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
1042                 break;
1043         }
1044         default:
1045                 IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
1046                 error = ether_ioctl(ifp, command, data);
1047                 break;
1048         }
1049
1050         return (error);
1051 }
1052
1053 /*********************************************************************
1054  *  Init entry point
1055  *
1056  *  This routine is used in two ways. It is used by the stack as
1057  *  init entry point in network interface structure. It is also used
1058  *  by the driver as a hw/sw initialization routine to get to a
1059  *  consistent state.
1060  *
1061  *  return 0 on success, positive on failure
1062  **********************************************************************/
1063 #define IXGBE_MHADD_MFS_SHIFT 16
1064
1065 static void
1066 ixgbe_init_locked(struct adapter *adapter)
1067 {
1068         struct ifnet   *ifp = adapter->ifp;
1069         device_t        dev = adapter->dev;
1070         struct ixgbe_hw *hw = &adapter->hw;
1071         u32             k, txdctl, mhadd, gpie;
1072         u32             rxdctl, rxctrl;
1073
1074         mtx_assert(&adapter->core_mtx, MA_OWNED);
1075         INIT_DEBUGOUT("ixgbe_init_locked: begin");
1076         hw->adapter_stopped = FALSE;
1077         ixgbe_stop_adapter(hw);
1078         callout_stop(&adapter->timer);
1079
1080         /* reprogram the RAR[0] in case user changed it. */
1081         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
1082
1083         /* Get the latest mac address, User can use a LAA */
1084         bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr,
1085               IXGBE_ETH_LENGTH_OF_ADDRESS);
1086         ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
1087         hw->addr_ctrl.rar_used_count = 1;
1088
1089         /* Set the various hardware offload abilities */
1090         ifp->if_hwassist = 0;
1091         if (ifp->if_capenable & IFCAP_TSO)
1092                 ifp->if_hwassist |= CSUM_TSO;
1093         if (ifp->if_capenable & IFCAP_TXCSUM) {
1094                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1095 #if __FreeBSD_version >= 800000
1096                 if (hw->mac.type != ixgbe_mac_82598EB)
1097                         ifp->if_hwassist |= CSUM_SCTP;
1098 #endif
1099         }
1100
1101         /* Prepare transmit descriptors and buffers */
1102         if (ixgbe_setup_transmit_structures(adapter)) {
1103                 device_printf(dev,"Could not setup transmit structures\n");
1104                 ixgbe_stop(adapter);
1105                 return;
1106         }
1107
1108         ixgbe_init_hw(hw);
1109         ixgbe_initialize_transmit_units(adapter);
1110
1111         /* Setup Multicast table */
1112         ixgbe_set_multi(adapter);
1113
1114         /*
1115         ** Determine the correct mbuf pool
1116         ** for doing jumbo frames
1117         */
1118         if (adapter->max_frame_size <= 2048)
1119                 adapter->rx_mbuf_sz = MCLBYTES;
1120         else if (adapter->max_frame_size <= 4096)
1121                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1122         else if (adapter->max_frame_size <= 9216)
1123                 adapter->rx_mbuf_sz = MJUM9BYTES;
1124         else
1125                 adapter->rx_mbuf_sz = MJUM16BYTES;
1126
1127         /* Prepare receive descriptors and buffers */
1128         if (ixgbe_setup_receive_structures(adapter)) {
1129                 device_printf(dev,"Could not setup receive structures\n");
1130                 ixgbe_stop(adapter);
1131                 return;
1132         }
1133
1134         /* Configure RX settings */
1135         ixgbe_initialize_receive_units(adapter);
1136
1137         gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
1138
1139         /* Enable Fan Failure Interrupt */
1140         gpie |= IXGBE_SDP1_GPIEN;
1141
1142         /* Add for Module detection */
1143         if (hw->mac.type == ixgbe_mac_82599EB)
1144                 gpie |= IXGBE_SDP2_GPIEN;
1145
1146         /* Thermal Failure Detection */
1147         if (hw->mac.type == ixgbe_mac_X540)
1148                 gpie |= IXGBE_SDP0_GPIEN;
1149
1150         if (adapter->msix > 1) {
1151                 /* Enable Enhanced MSIX mode */
1152                 gpie |= IXGBE_GPIE_MSIX_MODE;
1153                 gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
1154                     IXGBE_GPIE_OCD;
1155         }
1156         IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
1157
1158         /* Set MTU size */
1159         if (ifp->if_mtu > ETHERMTU) {
1160                 mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
1161                 mhadd &= ~IXGBE_MHADD_MFS_MASK;
1162                 mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
1163                 IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
1164         }
1165         
1166         /* Now enable all the queues */
1167
1168         for (int i = 0; i < adapter->num_queues; i++) {
1169                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
1170                 txdctl |= IXGBE_TXDCTL_ENABLE;
1171                 /* Set WTHRESH to 8, burst writeback */
1172                 txdctl |= (8 << 16);
1173                 /*
1174                  * When the internal queue falls below PTHRESH (32),
1175                  * start prefetching as long as there are at least
1176                  * HTHRESH (1) buffers ready. The values are taken
1177                  * from the Intel linux driver 3.8.21.
1178                  * Prefetching enables tx line rate even with 1 queue.
1179                  */
1180                 txdctl |= (32 << 0) | (1 << 8);
1181                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
1182         }
1183
1184         for (int i = 0; i < adapter->num_queues; i++) {
1185                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
1186                 if (hw->mac.type == ixgbe_mac_82598EB) {
1187                         /*
1188                         ** PTHRESH = 21
1189                         ** HTHRESH = 4
1190                         ** WTHRESH = 8
1191                         */
1192                         rxdctl &= ~0x3FFFFF;
1193                         rxdctl |= 0x080420;
1194                 }
1195                 rxdctl |= IXGBE_RXDCTL_ENABLE;
1196                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
1197                 for (k = 0; k < 10; k++) {
1198                         if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
1199                             IXGBE_RXDCTL_ENABLE)
1200                                 break;
1201                         else
1202                                 msec_delay(1);
1203                 }
1204                 wmb();
1205 #ifdef DEV_NETMAP
1206                 /*
1207                  * In netmap mode, we must preserve the buffers made
1208                  * available to userspace before the if_init()
1209                  * (this is true by default on the TX side, because
1210                  * init makes all buffers available to userspace).
1211                  *
1212                  * netmap_reset() and the device specific routines
1213                  * (e.g. ixgbe_setup_receive_rings()) map these
1214                  * buffers at the end of the NIC ring, so here we
1215                  * must set the RDT (tail) register to make sure
1216                  * they are not overwritten.
1217                  *
1218                  * In this driver the NIC ring starts at RDH = 0,
1219                  * RDT points to the last slot available for reception (?),
1220                  * so RDT = num_rx_desc - 1 means the whole ring is available.
1221                  */
1222                 if (ifp->if_capenable & IFCAP_NETMAP) {
1223                         struct netmap_adapter *na = NA(adapter->ifp);
1224                         struct netmap_kring *kring = &na->rx_rings[i];
1225                         int t = na->num_rx_desc - 1 - kring->nr_hwavail;
1226
1227                         IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
1228                 } else
1229 #endif /* DEV_NETMAP */
1230                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
1231         }
1232
1233         /* Set up VLAN support and filter */
1234         ixgbe_setup_vlan_hw_support(adapter);
1235
1236         /* Enable Receive engine */
1237         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1238         if (hw->mac.type == ixgbe_mac_82598EB)
1239                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
1240         rxctrl |= IXGBE_RXCTRL_RXEN;
1241         ixgbe_enable_rx_dma(hw, rxctrl);
1242
1243         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
1244
1245         /* Set up MSI/X routing */
1246         if (ixgbe_enable_msix)  {
1247                 ixgbe_configure_ivars(adapter);
1248                 /* Set up auto-mask */
1249                 if (hw->mac.type == ixgbe_mac_82598EB)
1250                         IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1251                 else {
1252                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1253                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1254                 }
1255         } else {  /* Simple settings for Legacy/MSI */
1256                 ixgbe_set_ivar(adapter, 0, 0, 0);
1257                 ixgbe_set_ivar(adapter, 0, 0, 1);
1258                 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1259         }
1260
1261 #ifdef IXGBE_FDIR
1262         /* Init Flow director */
1263         if (hw->mac.type != ixgbe_mac_82598EB) {
1264                 u32 hdrm = 32 << fdir_pballoc;
1265
1266                 hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
1267                 ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
1268         }
1269 #endif
1270
1271         /*
1272         ** Check on any SFP devices that
1273         ** need to be kick-started
1274         */
1275         if (hw->phy.type == ixgbe_phy_none) {
1276                 int err = hw->phy.ops.identify(hw);
1277                 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1278                         device_printf(dev,
1279                             "Unsupported SFP+ module type was detected.\n");
1280                         return;
1281                 }
1282         }
1283
1284         /* Set moderation on the Link interrupt */
1285         IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
1286
1287         /* Config/Enable Link */
1288         ixgbe_config_link(adapter);
1289
1290         /* Hardware Packet Buffer & Flow Control setup */
1291         {
1292                 u32 rxpb, frame, size, tmp;
1293
1294                 frame = adapter->max_frame_size;
1295
1296                 /* Calculate High Water */
1297                 if (hw->mac.type == ixgbe_mac_X540)
1298                         tmp = IXGBE_DV_X540(frame, frame);
1299                 else
1300                         tmp = IXGBE_DV(frame, frame);
1301                 size = IXGBE_BT2KB(tmp);
1302                 rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
1303                 hw->fc.high_water[0] = rxpb - size;
1304
1305                 /* Now calculate Low Water */
1306                 if (hw->mac.type == ixgbe_mac_X540)
1307                         tmp = IXGBE_LOW_DV_X540(frame);
1308                 else
1309                         tmp = IXGBE_LOW_DV(frame);
1310                 hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1311                 
1312                 hw->fc.requested_mode = adapter->fc;
1313                 hw->fc.pause_time = IXGBE_FC_PAUSE;
1314                 hw->fc.send_xon = TRUE;
1315         }
1316         /* Initialize the FC settings */
1317         ixgbe_start_hw(hw);
1318
1319         /* And now turn on interrupts */
1320         ixgbe_enable_intr(adapter);
1321
1322         /* Now inform the stack we're ready */
1323         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1324
1325         return;
1326 }
1327
1328 static void
1329 ixgbe_init(void *arg)
1330 {
1331         struct adapter *adapter = arg;
1332
1333         IXGBE_CORE_LOCK(adapter);
1334         ixgbe_init_locked(adapter);
1335         IXGBE_CORE_UNLOCK(adapter);
1336         return;
1337 }
1338
1339
1340 /*
1341 **
1342 ** MSIX Interrupt Handlers and Tasklets
1343 **
1344 */
1345
1346 static inline void
1347 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
1348 {
1349         struct ixgbe_hw *hw = &adapter->hw;
1350         u64     queue = (u64)(1 << vector);
1351         u32     mask;
1352
1353         if (hw->mac.type == ixgbe_mac_82598EB) {
1354                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1355                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
1356         } else {
1357                 mask = (queue & 0xFFFFFFFF);
1358                 if (mask)
1359                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
1360                 mask = (queue >> 32);
1361                 if (mask)
1362                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
1363         }
1364 }
1365
1366 static inline void
1367 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
1368 {
1369         struct ixgbe_hw *hw = &adapter->hw;
1370         u64     queue = (u64)(1 << vector);
1371         u32     mask;
1372
1373         if (hw->mac.type == ixgbe_mac_82598EB) {
1374                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1375                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
1376         } else {
1377                 mask = (queue & 0xFFFFFFFF);
1378                 if (mask)
1379                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
1380                 mask = (queue >> 32);
1381                 if (mask)
1382                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
1383         }
1384 }
1385
1386 static void
1387 ixgbe_handle_que(void *context, int pending)
1388 {
1389         struct ix_queue *que = context;
1390         struct adapter  *adapter = que->adapter;
1391         struct tx_ring  *txr = que->txr;
1392         struct ifnet    *ifp = adapter->ifp;
1393         bool            more;
1394
1395         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1396                 more = ixgbe_rxeof(que);
1397                 IXGBE_TX_LOCK(txr);
1398                 ixgbe_txeof(txr);
1399 #ifndef IXGBE_LEGACY_TX
1400                 if (!drbr_empty(ifp, txr->br))
1401                         ixgbe_mq_start_locked(ifp, txr);
1402 #else
1403                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1404                         ixgbe_start_locked(txr, ifp);
1405 #endif
1406                 IXGBE_TX_UNLOCK(txr);
1407         }
1408
1409         /* Reenable this interrupt */
1410         if (que->res != NULL)
1411                 ixgbe_enable_queue(adapter, que->msix);
1412         else
1413                 ixgbe_enable_intr(adapter);
1414         return;
1415 }
1416
1417
1418 /*********************************************************************
1419  *
1420  *  Legacy Interrupt Service routine
1421  *
1422  **********************************************************************/
1423
1424 static void
1425 ixgbe_legacy_irq(void *arg)
1426 {
1427         struct ix_queue *que = arg;
1428         struct adapter  *adapter = que->adapter;
1429         struct ixgbe_hw *hw = &adapter->hw;
1430         struct ifnet    *ifp = adapter->ifp;
1431         struct          tx_ring *txr = adapter->tx_rings;
1432         bool            more;
1433         u32             reg_eicr;
1434
1435
1436         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1437
1438         ++que->irqs;
1439         if (reg_eicr == 0) {
1440                 ixgbe_enable_intr(adapter);
1441                 return;
1442         }
1443
1444         more = ixgbe_rxeof(que);
1445
1446         IXGBE_TX_LOCK(txr);
1447         ixgbe_txeof(txr);
1448 #ifdef IXGBE_LEGACY_TX
1449         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1450                 ixgbe_start_locked(txr, ifp);
1451 #else
1452         if (!drbr_empty(ifp, txr->br))
1453                 ixgbe_mq_start_locked(ifp, txr);
1454 #endif
1455         IXGBE_TX_UNLOCK(txr);
1456
1457         /* Check for fan failure */
1458         if ((hw->phy.media_type == ixgbe_media_type_copper) &&
1459             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1460                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1461                     "REPLACE IMMEDIATELY!!\n");
1462                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
1463         }
1464
1465         /* Link status change */
1466         if (reg_eicr & IXGBE_EICR_LSC)
1467                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1468
1469         if (more)
1470                 taskqueue_enqueue(que->tq, &que->que_task);
1471         else
1472                 ixgbe_enable_intr(adapter);
1473         return;
1474 }
1475
1476
1477 /*********************************************************************
1478  *
1479  *  MSIX Queue Interrupt Service routine
1480  *
1481  **********************************************************************/
1482 void
1483 ixgbe_msix_que(void *arg)
1484 {
1485         struct ix_queue *que = arg;
1486         struct adapter  *adapter = que->adapter;
1487         struct ifnet    *ifp = adapter->ifp;
1488         struct tx_ring  *txr = que->txr;
1489         struct rx_ring  *rxr = que->rxr;
1490         bool            more;
1491         u32             newitr = 0;
1492
1493         /* Protect against spurious interrupts */
1494         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1495                 return;
1496
1497         ixgbe_disable_queue(adapter, que->msix);
1498         ++que->irqs;
1499
1500         more = ixgbe_rxeof(que);
1501
1502         IXGBE_TX_LOCK(txr);
1503         ixgbe_txeof(txr);
1504 #ifdef IXGBE_LEGACY_TX
1505         if (!IFQ_DRV_IS_EMPTY(ifp->if_snd))
1506                 ixgbe_start_locked(txr, ifp);
1507 #else
1508         if (!drbr_empty(ifp, txr->br))
1509                 ixgbe_mq_start_locked(ifp, txr);
1510 #endif
1511         IXGBE_TX_UNLOCK(txr);
1512
1513         /* Do AIM now? */
1514
1515         if (ixgbe_enable_aim == FALSE)
1516                 goto no_calc;
1517         /*
1518         ** Do Adaptive Interrupt Moderation:
1519         **  - Write out last calculated setting
1520         **  - Calculate based on average size over
1521         **    the last interval.
1522         */
1523         if (que->eitr_setting)
1524                 IXGBE_WRITE_REG(&adapter->hw,
1525                     IXGBE_EITR(que->msix), que->eitr_setting);
1526  
1527         que->eitr_setting = 0;
1528
1529         /* Idle, do nothing */
1530         if ((txr->bytes == 0) && (rxr->bytes == 0))
1531                 goto no_calc;
1532                                 
1533         if ((txr->bytes) && (txr->packets))
1534                 newitr = txr->bytes/txr->packets;
1535         if ((rxr->bytes) && (rxr->packets))
1536                 newitr = max(newitr,
1537                     (rxr->bytes / rxr->packets));
1538         newitr += 24; /* account for hardware frame, crc */
1539
1540         /* set an upper boundary */
1541         newitr = min(newitr, 3000);
1542
1543         /* Be nice to the mid range */
1544         if ((newitr > 300) && (newitr < 1200))
1545                 newitr = (newitr / 3);
1546         else
1547                 newitr = (newitr / 2);
1548
1549         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
1550                 newitr |= newitr << 16;
1551         else
1552                 newitr |= IXGBE_EITR_CNT_WDIS;
1553                  
1554         /* save for next interrupt */
1555         que->eitr_setting = newitr;
1556
1557         /* Reset state */
1558         txr->bytes = 0;
1559         txr->packets = 0;
1560         rxr->bytes = 0;
1561         rxr->packets = 0;
1562
1563 no_calc:
1564         if (more)
1565                 taskqueue_enqueue(que->tq, &que->que_task);
1566         else
1567                 ixgbe_enable_queue(adapter, que->msix);
1568         return;
1569 }
1570
1571
1572 static void
1573 ixgbe_msix_link(void *arg)
1574 {
1575         struct adapter  *adapter = arg;
1576         struct ixgbe_hw *hw = &adapter->hw;
1577         u32             reg_eicr;
1578
1579         ++adapter->link_irq;
1580
1581         /* First get the cause */
1582         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
1583         /* Be sure the queue bits are not cleared */
1584         reg_eicr &= ~IXGBE_EICR_RTX_QUEUE;
1585         /* Clear interrupt with write */
1586         IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
1587
1588         /* Link status change */
1589         if (reg_eicr & IXGBE_EICR_LSC)
1590                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1591
1592         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
1593 #ifdef IXGBE_FDIR
1594                 if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
1595                         /* This is probably overkill :) */
1596                         if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
1597                                 return;
1598                         /* Disable the interrupt */
1599                         IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
1600                         taskqueue_enqueue(adapter->tq, &adapter->fdir_task);
1601                 } else
1602 #endif
1603                 if (reg_eicr & IXGBE_EICR_ECC) {
1604                         device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
1605                             "Please Reboot!!\n");
1606                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
1607                 } else
1608
1609                 if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
1610                         /* Clear the interrupt */
1611                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1612                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
1613                 } else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
1614                         /* Clear the interrupt */
1615                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
1616                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
1617                 }
1618         } 
1619
1620         /* Check for fan failure */
1621         if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
1622             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1623                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1624                     "REPLACE IMMEDIATELY!!\n");
1625                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1626         }
1627
1628         /* Check for over temp condition */
1629         if ((hw->mac.type == ixgbe_mac_X540) &&
1630             (reg_eicr & IXGBE_EICR_TS)) {
1631                 device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
1632                     "PHY IS SHUT DOWN!!\n");
1633                 device_printf(adapter->dev, "System shutdown required\n");
1634                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
1635         }
1636
1637         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
1638         return;
1639 }
1640
1641 /*********************************************************************
1642  *
1643  *  Media Ioctl callback
1644  *
1645  *  This routine is called whenever the user queries the status of
1646  *  the interface using ifconfig.
1647  *
1648  **********************************************************************/
1649 static void
1650 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
1651 {
1652         struct adapter *adapter = ifp->if_softc;
1653
1654         INIT_DEBUGOUT("ixgbe_media_status: begin");
1655         IXGBE_CORE_LOCK(adapter);
1656         ixgbe_update_link_status(adapter);
1657
1658         ifmr->ifm_status = IFM_AVALID;
1659         ifmr->ifm_active = IFM_ETHER;
1660
1661         if (!adapter->link_active) {
1662                 IXGBE_CORE_UNLOCK(adapter);
1663                 return;
1664         }
1665
1666         ifmr->ifm_status |= IFM_ACTIVE;
1667
1668         switch (adapter->link_speed) {
1669                 case IXGBE_LINK_SPEED_100_FULL:
1670                         ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1671                         break;
1672                 case IXGBE_LINK_SPEED_1GB_FULL:
1673                         ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1674                         break;
1675                 case IXGBE_LINK_SPEED_10GB_FULL:
1676                         ifmr->ifm_active |= adapter->optics | IFM_FDX;
1677                         break;
1678         }
1679
1680         IXGBE_CORE_UNLOCK(adapter);
1681
1682         return;
1683 }
1684
1685 /*********************************************************************
1686  *
1687  *  Media Ioctl callback
1688  *
1689  *  This routine is called when the user changes speed/duplex using
1690  *  media/mediopt option with ifconfig.
1691  *
1692  **********************************************************************/
1693 static int
1694 ixgbe_media_change(struct ifnet * ifp)
1695 {
1696         struct adapter *adapter = ifp->if_softc;
1697         struct ifmedia *ifm = &adapter->media;
1698
1699         INIT_DEBUGOUT("ixgbe_media_change: begin");
1700
1701         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1702                 return (EINVAL);
1703
1704         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1705         case IFM_AUTO:
1706                 adapter->hw.phy.autoneg_advertised =
1707                     IXGBE_LINK_SPEED_100_FULL |
1708                     IXGBE_LINK_SPEED_1GB_FULL |
1709                     IXGBE_LINK_SPEED_10GB_FULL;
1710                 break;
1711         default:
1712                 device_printf(adapter->dev, "Only auto media type\n");
1713                 return (EINVAL);
1714         }
1715
1716         return (0);
1717 }
1718
1719 /*********************************************************************
1720  *
1721  *  This routine maps the mbufs to tx descriptors, allowing the
1722  *  TX engine to transmit the packets. 
1723  *      - return 0 on success, positive on failure
1724  *
1725  **********************************************************************/
1726
1727 static int
1728 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1729 {
1730         struct adapter  *adapter = txr->adapter;
1731         u32             olinfo_status = 0, cmd_type_len;
1732         int             i, j, error, nsegs;
1733         int             first;
1734         bool            remap = TRUE;
1735         struct mbuf     *m_head;
1736         bus_dma_segment_t segs[adapter->num_segs];
1737         bus_dmamap_t    map;
1738         struct ixgbe_tx_buf *txbuf;
1739         union ixgbe_adv_tx_desc *txd = NULL;
1740
1741         m_head = *m_headp;
1742
1743         /* Basic descriptor defines */
1744         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1745             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1746
1747         if (m_head->m_flags & M_VLANTAG)
1748                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1749
1750         /*
1751          * Important to capture the first descriptor
1752          * used because it will contain the index of
1753          * the one we tell the hardware to report back
1754          */
1755         first = txr->next_avail_desc;
1756         txbuf = &txr->tx_buffers[first];
1757         map = txbuf->map;
1758
1759         /*
1760          * Map the packet for DMA.
1761          */
1762 retry:
1763         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1764             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1765
1766         if (__predict_false(error)) {
1767                 struct mbuf *m;
1768
1769                 switch (error) {
1770                 case EFBIG:
1771                         /* Try it again? - one try */
1772                         if (remap == TRUE) {
1773                                 remap = FALSE;
1774                                 m = m_defrag(*m_headp, M_NOWAIT);
1775                                 if (m == NULL) {
1776                                         adapter->mbuf_defrag_failed++;
1777                                         m_freem(*m_headp);
1778                                         *m_headp = NULL;
1779                                         return (ENOBUFS);
1780                                 }
1781                                 *m_headp = m;
1782                                 goto retry;
1783                         } else
1784                                 return (error);
1785                 case ENOMEM:
1786                         txr->no_tx_dma_setup++;
1787                         return (error);
1788                 default:
1789                         txr->no_tx_dma_setup++;
1790                         m_freem(*m_headp);
1791                         *m_headp = NULL;
1792                         return (error);
1793                 }
1794         }
1795
1796         /* Make certain there are enough descriptors */
1797         if (nsegs > txr->tx_avail - 2) {
1798                 txr->no_desc_avail++;
1799                 bus_dmamap_unload(txr->txtag, map);
1800                 return (ENOBUFS);
1801         }
1802         m_head = *m_headp;
1803
1804         /*
1805         ** Set up the appropriate offload context
1806         ** this will consume the first descriptor
1807         */
1808         error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1809         if (__predict_false(error)) {
1810                 if (error == ENOBUFS)
1811                         *m_headp = NULL;
1812                 return (error);
1813         }
1814
1815 #ifdef IXGBE_FDIR
1816         /* Do the flow director magic */
1817         if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
1818                 ++txr->atr_count;
1819                 if (txr->atr_count >= atr_sample_rate) {
1820                         ixgbe_atr(txr, m_head);
1821                         txr->atr_count = 0;
1822                 }
1823         }
1824 #endif
1825
1826         i = txr->next_avail_desc;
1827         for (j = 0; j < nsegs; j++) {
1828                 bus_size_t seglen;
1829                 bus_addr_t segaddr;
1830
1831                 txbuf = &txr->tx_buffers[i];
1832                 txd = &txr->tx_base[i];
1833                 seglen = segs[j].ds_len;
1834                 segaddr = htole64(segs[j].ds_addr);
1835
1836                 txd->read.buffer_addr = segaddr;
1837                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
1838                     cmd_type_len |seglen);
1839                 txd->read.olinfo_status = htole32(olinfo_status);
1840
1841                 if (++i == txr->num_desc)
1842                         i = 0;
1843         }
1844
1845         txd->read.cmd_type_len |=
1846             htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
1847         txr->tx_avail -= nsegs;
1848         txr->next_avail_desc = i;
1849
1850         txbuf->m_head = m_head;
1851         /*
1852         ** Here we swap the map so the last descriptor,
1853         ** which gets the completion interrupt has the
1854         ** real map, and the first descriptor gets the
1855         ** unused map from this descriptor.
1856         */
1857         txr->tx_buffers[first].map = txbuf->map;
1858         txbuf->map = map;
1859         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1860
1861         /* Set the EOP descriptor that will be marked done */
1862         txbuf = &txr->tx_buffers[first];
1863         txbuf->eop = txd;
1864
1865         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1866             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1867         /*
1868          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1869          * hardware that this frame is available to transmit.
1870          */
1871         ++txr->total_packets;
1872         IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
1873
1874         return (0);
1875
1876 }
1877
1878 static void
1879 ixgbe_set_promisc(struct adapter *adapter)
1880 {
1881         u_int32_t       reg_rctl;
1882         struct ifnet   *ifp = adapter->ifp;
1883         int             mcnt = 0;
1884
1885         reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1886         reg_rctl &= (~IXGBE_FCTRL_UPE);
1887         if (ifp->if_flags & IFF_ALLMULTI)
1888                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
1889         else {
1890                 struct  ifmultiaddr *ifma;
1891 #if __FreeBSD_version < 800000
1892                 IF_ADDR_LOCK(ifp);
1893 #else
1894                 if_maddr_rlock(ifp);
1895 #endif
1896                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1897                         if (ifma->ifma_addr->sa_family != AF_LINK)
1898                                 continue;
1899                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1900                                 break;
1901                         mcnt++;
1902                 }
1903 #if __FreeBSD_version < 800000
1904                 IF_ADDR_UNLOCK(ifp);
1905 #else
1906                 if_maddr_runlock(ifp);
1907 #endif
1908         }
1909         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
1910                 reg_rctl &= (~IXGBE_FCTRL_MPE);
1911         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1912
1913         if (ifp->if_flags & IFF_PROMISC) {
1914                 reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1915                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1916         } else if (ifp->if_flags & IFF_ALLMULTI) {
1917                 reg_rctl |= IXGBE_FCTRL_MPE;
1918                 reg_rctl &= ~IXGBE_FCTRL_UPE;
1919                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1920         }
1921         return;
1922 }
1923
1924
1925 /*********************************************************************
1926  *  Multicast Update
1927  *
1928  *  This routine is called whenever multicast address list is updated.
1929  *
1930  **********************************************************************/
1931 #define IXGBE_RAR_ENTRIES 16
1932
1933 static void
1934 ixgbe_set_multi(struct adapter *adapter)
1935 {
1936         u32     fctrl;
1937         u8      *mta;
1938         u8      *update_ptr;
1939         struct  ifmultiaddr *ifma;
1940         int     mcnt = 0;
1941         struct ifnet   *ifp = adapter->ifp;
1942
1943         IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
1944
1945         mta = adapter->mta;
1946         bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
1947             MAX_NUM_MULTICAST_ADDRESSES);
1948
1949 #if __FreeBSD_version < 800000
1950         IF_ADDR_LOCK(ifp);
1951 #else
1952         if_maddr_rlock(ifp);
1953 #endif
1954         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1955                 if (ifma->ifma_addr->sa_family != AF_LINK)
1956                         continue;
1957                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1958                         break;
1959                 bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr),
1960                     &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1961                     IXGBE_ETH_LENGTH_OF_ADDRESS);
1962                 mcnt++;
1963         }
1964 #if __FreeBSD_version < 800000
1965         IF_ADDR_UNLOCK(ifp);
1966 #else
1967         if_maddr_runlock(ifp);
1968 #endif
1969
1970         fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1971         fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1972         if (ifp->if_flags & IFF_PROMISC)
1973                 fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1974         else if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES ||
1975             ifp->if_flags & IFF_ALLMULTI) {
1976                 fctrl |= IXGBE_FCTRL_MPE;
1977                 fctrl &= ~IXGBE_FCTRL_UPE;
1978         } else
1979                 fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1980         
1981         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
1982
1983         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
1984                 update_ptr = mta;
1985                 ixgbe_update_mc_addr_list(&adapter->hw,
1986                     update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
1987         }
1988
1989         return;
1990 }
1991
1992 /*
1993  * This is an iterator function now needed by the multicast
1994  * shared code. It simply feeds the shared code routine the
1995  * addresses in the array of ixgbe_set_multi() one by one.
1996  */
1997 static u8 *
1998 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
1999 {
2000         u8 *addr = *update_ptr;
2001         u8 *newptr;
2002         *vmdq = 0;
2003
2004         newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
2005         *update_ptr = newptr;
2006         return addr;
2007 }
2008
2009
2010 /*********************************************************************
2011  *  Timer routine
2012  *
2013  *  This routine checks for link status,updates statistics,
2014  *  and runs the watchdog check.
2015  *
2016  **********************************************************************/
2017
2018 static void
2019 ixgbe_local_timer(void *arg)
2020 {
2021         struct adapter  *adapter = arg;
2022         device_t        dev = adapter->dev;
2023         struct ix_queue *que = adapter->queues;
2024         struct tx_ring  *txr = adapter->tx_rings;
2025         int             hung = 0, paused = 0;
2026
2027         mtx_assert(&adapter->core_mtx, MA_OWNED);
2028
2029         /* Check for pluggable optics */
2030         if (adapter->sfp_probe)
2031                 if (!ixgbe_sfp_probe(adapter))
2032                         goto out; /* Nothing to do */
2033
2034         ixgbe_update_link_status(adapter);
2035         ixgbe_update_stats_counters(adapter);
2036
2037         /*
2038          * If the interface has been paused
2039          * then don't do the watchdog check
2040          */
2041         if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
2042                 paused = 1;
2043
2044         /*
2045         ** Check the TX queues status
2046         **      - watchdog only if all queues show hung
2047         */          
2048         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2049                 if ((txr->queue_status == IXGBE_QUEUE_HUNG) &&
2050                     (paused == 0))
2051                         ++hung;
2052                 else if (txr->queue_status == IXGBE_QUEUE_WORKING)
2053                         taskqueue_enqueue(que->tq, &txr->txq_task);
2054         }
2055         /* Only truely watchdog if all queues show hung */
2056         if (hung == adapter->num_queues)
2057                 goto watchdog;
2058
2059 out:
2060         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
2061         return;
2062
2063 watchdog:
2064         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2065         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2066             IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
2067             IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
2068         device_printf(dev,"TX(%d) desc avail = %d,"
2069             "Next TX to Clean = %d\n",
2070             txr->me, txr->tx_avail, txr->next_to_clean);
2071         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2072         adapter->watchdog_events++;
2073         ixgbe_init_locked(adapter);
2074 }
2075
2076 /*
2077 ** Note: this routine updates the OS on the link state
2078 **      the real check of the hardware only happens with
2079 **      a link interrupt.
2080 */
2081 static void
2082 ixgbe_update_link_status(struct adapter *adapter)
2083 {
2084         struct ifnet    *ifp = adapter->ifp;
2085         device_t dev = adapter->dev;
2086
2087
2088         if (adapter->link_up){ 
2089                 if (adapter->link_active == FALSE) {
2090                         if (bootverbose)
2091                                 device_printf(dev,"Link is up %d Gbps %s \n",
2092                                     ((adapter->link_speed == 128)? 10:1),
2093                                     "Full Duplex");
2094                         adapter->link_active = TRUE;
2095                         /* Update any Flow Control changes */
2096                         ixgbe_fc_enable(&adapter->hw);
2097                         if_link_state_change(ifp, LINK_STATE_UP);
2098                 }
2099         } else { /* Link down */
2100                 if (adapter->link_active == TRUE) {
2101                         if (bootverbose)
2102                                 device_printf(dev,"Link is Down\n");
2103                         if_link_state_change(ifp, LINK_STATE_DOWN);
2104                         adapter->link_active = FALSE;
2105                 }
2106         }
2107
2108         return;
2109 }
2110
2111
2112 /*********************************************************************
2113  *
2114  *  This routine disables all traffic on the adapter by issuing a
2115  *  global reset on the MAC and deallocates TX/RX buffers.
2116  *
2117  **********************************************************************/
2118
2119 static void
2120 ixgbe_stop(void *arg)
2121 {
2122         struct ifnet   *ifp;
2123         struct adapter *adapter = arg;
2124         struct ixgbe_hw *hw = &adapter->hw;
2125         ifp = adapter->ifp;
2126
2127         mtx_assert(&adapter->core_mtx, MA_OWNED);
2128
2129         INIT_DEBUGOUT("ixgbe_stop: begin\n");
2130         ixgbe_disable_intr(adapter);
2131         callout_stop(&adapter->timer);
2132
2133         /* Let the stack know...*/
2134         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2135
2136         ixgbe_reset_hw(hw);
2137         hw->adapter_stopped = FALSE;
2138         ixgbe_stop_adapter(hw);
2139         if (hw->mac.type == ixgbe_mac_82599EB)
2140                 ixgbe_stop_mac_link_on_d3_82599(hw);
2141         /* Turn off the laser - noop with no optics */
2142         ixgbe_disable_tx_laser(hw);
2143
2144         /* Update the stack */
2145         adapter->link_up = FALSE;
2146         ixgbe_update_link_status(adapter);
2147
2148         /* reprogram the RAR[0] in case user changed it. */
2149         ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
2150
2151         return;
2152 }
2153
2154
2155 /*********************************************************************
2156  *
2157  *  Determine hardware revision.
2158  *
2159  **********************************************************************/
2160 static void
2161 ixgbe_identify_hardware(struct adapter *adapter)
2162 {
2163         device_t        dev = adapter->dev;
2164         struct ixgbe_hw *hw = &adapter->hw;
2165
2166         /* Save off the information about this board */
2167         hw->vendor_id = pci_get_vendor(dev);
2168         hw->device_id = pci_get_device(dev);
2169         hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
2170         hw->subsystem_vendor_id =
2171             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2172         hw->subsystem_device_id =
2173             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2174
2175         /* We need this here to set the num_segs below */
2176         ixgbe_set_mac_type(hw);
2177
2178         /* Pick up the 82599 and VF settings */
2179         if (hw->mac.type != ixgbe_mac_82598EB) {
2180                 hw->phy.smart_speed = ixgbe_smart_speed;
2181                 adapter->num_segs = IXGBE_82599_SCATTER;
2182         } else
2183                 adapter->num_segs = IXGBE_82598_SCATTER;
2184
2185         return;
2186 }
2187
2188 /*********************************************************************
2189  *
2190  *  Determine optic type
2191  *
2192  **********************************************************************/
2193 static void
2194 ixgbe_setup_optics(struct adapter *adapter)
2195 {
2196         struct ixgbe_hw *hw = &adapter->hw;
2197         int             layer;
2198
2199         layer = ixgbe_get_supported_physical_layer(hw);
2200
2201         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
2202                 adapter->optics = IFM_10G_T;
2203                 return;
2204         }
2205
2206         if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
2207                 adapter->optics = IFM_1000_T;
2208                 return;
2209         }
2210
2211         if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
2212                 adapter->optics = IFM_1000_SX;
2213                 return;
2214         }
2215
2216         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
2217             IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
2218                 adapter->optics = IFM_10G_LR;
2219                 return;
2220         }
2221
2222         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
2223                 adapter->optics = IFM_10G_SR;
2224                 return;
2225         }
2226
2227         if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
2228                 adapter->optics = IFM_10G_TWINAX;
2229                 return;
2230         }
2231
2232         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
2233             IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
2234                 adapter->optics = IFM_10G_CX4;
2235                 return;
2236         }
2237
2238         /* If we get here just set the default */
2239         adapter->optics = IFM_ETHER | IFM_AUTO;
2240         return;
2241 }
2242
2243 /*********************************************************************
2244  *
2245  *  Setup the Legacy or MSI Interrupt handler
2246  *
2247  **********************************************************************/
2248 static int
2249 ixgbe_allocate_legacy(struct adapter *adapter)
2250 {
2251         device_t        dev = adapter->dev;
2252         struct          ix_queue *que = adapter->queues;
2253 #ifndef IXGBE_LEGACY_TX
2254         struct tx_ring          *txr = adapter->tx_rings;
2255 #endif
2256         int             error, rid = 0;
2257
2258         /* MSI RID at 1 */
2259         if (adapter->msix == 1)
2260                 rid = 1;
2261
2262         /* We allocate a single interrupt resource */
2263         adapter->res = bus_alloc_resource_any(dev,
2264             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2265         if (adapter->res == NULL) {
2266                 device_printf(dev, "Unable to allocate bus resource: "
2267                     "interrupt\n");
2268                 return (ENXIO);
2269         }
2270
2271         /*
2272          * Try allocating a fast interrupt and the associated deferred
2273          * processing contexts.
2274          */
2275 #ifndef IXGBE_LEGACY_TX
2276         TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr);
2277 #endif
2278         TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2279         que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,
2280             taskqueue_thread_enqueue, &que->tq);
2281         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s ixq",
2282             device_get_nameunit(adapter->dev));
2283
2284         /* Tasklets for Link, SFP and Multispeed Fiber */
2285         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2286         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2287         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2288 #ifdef IXGBE_FDIR
2289         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2290 #endif
2291         adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT,
2292             taskqueue_thread_enqueue, &adapter->tq);
2293         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2294             device_get_nameunit(adapter->dev));
2295
2296         if ((error = bus_setup_intr(dev, adapter->res,
2297             INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_legacy_irq,
2298             que, &adapter->tag)) != 0) {
2299                 device_printf(dev, "Failed to register fast interrupt "
2300                     "handler: %d\n", error);
2301                 taskqueue_free(que->tq);
2302                 taskqueue_free(adapter->tq);
2303                 que->tq = NULL;
2304                 adapter->tq = NULL;
2305                 return (error);
2306         }
2307         /* For simplicity in the handlers */
2308         adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
2309
2310         return (0);
2311 }
2312
2313
2314 /*********************************************************************
2315  *
2316  *  Setup MSIX Interrupt resources and handlers 
2317  *
2318  **********************************************************************/
2319 static int
2320 ixgbe_allocate_msix(struct adapter *adapter)
2321 {
2322         device_t        dev = adapter->dev;
2323         struct          ix_queue *que = adapter->queues;
2324         struct          tx_ring *txr = adapter->tx_rings;
2325         int             error, rid, vector = 0;
2326
2327         for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
2328                 rid = vector + 1;
2329                 que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2330                     RF_SHAREABLE | RF_ACTIVE);
2331                 if (que->res == NULL) {
2332                         device_printf(dev,"Unable to allocate"
2333                             " bus resource: que interrupt [%d]\n", vector);
2334                         return (ENXIO);
2335                 }
2336                 /* Set the handler function */
2337                 error = bus_setup_intr(dev, que->res,
2338                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2339                     ixgbe_msix_que, que, &que->tag);
2340                 if (error) {
2341                         que->res = NULL;
2342                         device_printf(dev, "Failed to register QUE handler");
2343                         return (error);
2344                 }
2345 #if __FreeBSD_version >= 800504
2346                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2347 #endif
2348                 que->msix = vector;
2349                 adapter->que_mask |= (u64)(1 << que->msix);
2350                 /*
2351                 ** Bind the msix vector, and thus the
2352                 ** ring to the corresponding cpu.
2353                 */
2354                 if (adapter->num_queues > 1)
2355                         bus_bind_intr(dev, que->res, i);
2356
2357 #ifndef IXGBE_LEGACY_TX
2358                 TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr);
2359 #endif
2360                 TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2361                 que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,
2362                     taskqueue_thread_enqueue, &que->tq);
2363                 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2364                     device_get_nameunit(adapter->dev));
2365         }
2366
2367         /* and Link */
2368         rid = vector + 1;
2369         adapter->res = bus_alloc_resource_any(dev,
2370             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2371         if (!adapter->res) {
2372                 device_printf(dev,"Unable to allocate"
2373             " bus resource: Link interrupt [%d]\n", rid);
2374                 return (ENXIO);
2375         }
2376         /* Set the link handler function */
2377         error = bus_setup_intr(dev, adapter->res,
2378             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2379             ixgbe_msix_link, adapter, &adapter->tag);
2380         if (error) {
2381                 adapter->res = NULL;
2382                 device_printf(dev, "Failed to register LINK handler");
2383                 return (error);
2384         }
2385 #if __FreeBSD_version >= 800504
2386         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2387 #endif
2388         adapter->linkvec = vector;
2389         /* Tasklets for Link, SFP and Multispeed Fiber */
2390         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2391         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2392         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2393 #ifdef IXGBE_FDIR
2394         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2395 #endif
2396         adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT,
2397             taskqueue_thread_enqueue, &adapter->tq);
2398         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2399             device_get_nameunit(adapter->dev));
2400
2401         return (0);
2402 }
2403
2404 /*
2405  * Setup Either MSI/X or MSI
2406  */
2407 static int
2408 ixgbe_setup_msix(struct adapter *adapter)
2409 {
2410         device_t dev = adapter->dev;
2411         int rid, want, queues, msgs;
2412
2413         /* Override by tuneable */
2414         if (ixgbe_enable_msix == 0)
2415                 goto msi;
2416
2417         /* First try MSI/X */
2418         rid = PCIR_BAR(MSIX_82598_BAR);
2419         adapter->msix_mem = bus_alloc_resource_any(dev,
2420             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2421         if (!adapter->msix_mem) {
2422                 rid += 4;       /* 82599 maps in higher BAR */
2423                 adapter->msix_mem = bus_alloc_resource_any(dev,
2424                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2425         }
2426         if (!adapter->msix_mem) {
2427                 /* May not be enabled */
2428                 device_printf(adapter->dev,
2429                     "Unable to map MSIX table \n");
2430                 goto msi;
2431         }
2432
2433         msgs = pci_msix_count(dev); 
2434         if (msgs == 0) { /* system has msix disabled */
2435                 bus_release_resource(dev, SYS_RES_MEMORY,
2436                     rid, adapter->msix_mem);
2437                 adapter->msix_mem = NULL;
2438                 goto msi;
2439         }
2440
2441         /* Figure out a reasonable auto config value */
2442         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2443
2444         if (ixgbe_num_queues != 0)
2445                 queues = ixgbe_num_queues;
2446         /* Set max queues to 8 when autoconfiguring */
2447         else if ((ixgbe_num_queues == 0) && (queues > 8))
2448                 queues = 8;
2449
2450         /*
2451         ** Want one vector (RX/TX pair) per queue
2452         ** plus an additional for Link.
2453         */
2454         want = queues + 1;
2455         if (msgs >= want)
2456                 msgs = want;
2457         else {
2458                 device_printf(adapter->dev,
2459                     "MSIX Configuration Problem, "
2460                     "%d vectors but %d queues wanted!\n",
2461                     msgs, want);
2462                 return (0); /* Will go to Legacy setup */
2463         }
2464         if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2465                 device_printf(adapter->dev,
2466                     "Using MSIX interrupts with %d vectors\n", msgs);
2467                 adapter->num_queues = queues;
2468                 return (msgs);
2469         }
2470 msi:
2471         msgs = pci_msi_count(dev);
2472         if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2473                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2474         else
2475                 device_printf(adapter->dev,"Using a Legacy interrupt\n");
2476         return (msgs);
2477 }
2478
2479
2480 static int
2481 ixgbe_allocate_pci_resources(struct adapter *adapter)
2482 {
2483         int             rid;
2484         device_t        dev = adapter->dev;
2485
2486         rid = PCIR_BAR(0);
2487         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2488             &rid, RF_ACTIVE);
2489
2490         if (!(adapter->pci_mem)) {
2491                 device_printf(dev,"Unable to allocate bus resource: memory\n");
2492                 return (ENXIO);
2493         }
2494
2495         adapter->osdep.mem_bus_space_tag =
2496                 rman_get_bustag(adapter->pci_mem);
2497         adapter->osdep.mem_bus_space_handle =
2498                 rman_get_bushandle(adapter->pci_mem);
2499         adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle;
2500
2501         /* Legacy defaults */
2502         adapter->num_queues = 1;
2503         adapter->hw.back = &adapter->osdep;
2504
2505         /*
2506         ** Now setup MSI or MSI/X, should
2507         ** return us the number of supported
2508         ** vectors. (Will be 1 for MSI)
2509         */
2510         adapter->msix = ixgbe_setup_msix(adapter);
2511         return (0);
2512 }
2513
2514 static void
2515 ixgbe_free_pci_resources(struct adapter * adapter)
2516 {
2517         struct          ix_queue *que = adapter->queues;
2518         device_t        dev = adapter->dev;
2519         int             rid, memrid;
2520
2521         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
2522                 memrid = PCIR_BAR(MSIX_82598_BAR);
2523         else
2524                 memrid = PCIR_BAR(MSIX_82599_BAR);
2525
2526         /*
2527         ** There is a slight possibility of a failure mode
2528         ** in attach that will result in entering this function
2529         ** before interrupt resources have been initialized, and
2530         ** in that case we do not want to execute the loops below
2531         ** We can detect this reliably by the state of the adapter
2532         ** res pointer.
2533         */
2534         if (adapter->res == NULL)
2535                 goto mem;
2536
2537         /*
2538         **  Release all msix queue resources:
2539         */
2540         for (int i = 0; i < adapter->num_queues; i++, que++) {
2541                 rid = que->msix + 1;
2542                 if (que->tag != NULL) {
2543                         bus_teardown_intr(dev, que->res, que->tag);
2544                         que->tag = NULL;
2545                 }
2546                 if (que->res != NULL)
2547                         bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
2548         }
2549
2550
2551         /* Clean the Legacy or Link interrupt last */
2552         if (adapter->linkvec) /* we are doing MSIX */
2553                 rid = adapter->linkvec + 1;
2554         else
2555                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2556
2557         if (adapter->tag != NULL) {
2558                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2559                 adapter->tag = NULL;
2560         }
2561         if (adapter->res != NULL)
2562                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2563
2564 mem:
2565         if (adapter->msix)
2566                 pci_release_msi(dev);
2567
2568         if (adapter->msix_mem != NULL)
2569                 bus_release_resource(dev, SYS_RES_MEMORY,
2570                     memrid, adapter->msix_mem);
2571
2572         if (adapter->pci_mem != NULL)
2573                 bus_release_resource(dev, SYS_RES_MEMORY,
2574                     PCIR_BAR(0), adapter->pci_mem);
2575
2576         return;
2577 }
2578
2579 /*********************************************************************
2580  *
2581  *  Setup networking device structure and register an interface.
2582  *
2583  **********************************************************************/
2584 static int
2585 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
2586 {
2587         struct ixgbe_hw *hw = &adapter->hw;
2588         struct ifnet   *ifp;
2589
2590         INIT_DEBUGOUT("ixgbe_setup_interface: begin");
2591
2592         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2593         if (ifp == NULL) {
2594                 device_printf(dev, "can not allocate ifnet structure\n");
2595                 return (-1);
2596         }
2597         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2598 #if __FreeBSD_version < 1000025
2599         ifp->if_baudrate = 1000000000;
2600 #else
2601         if_initbaudrate(ifp, IF_Gbps(10));
2602 #endif
2603         ifp->if_init = ixgbe_init;
2604         ifp->if_softc = adapter;
2605         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2606         ifp->if_ioctl = ixgbe_ioctl;
2607 #ifndef IXGBE_LEGACY_TX
2608         ifp->if_transmit = ixgbe_mq_start;
2609         ifp->if_qflush = ixgbe_qflush;
2610 #else
2611         ifp->if_start = ixgbe_start;
2612         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
2613         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2;
2614         IFQ_SET_READY(&ifp->if_snd);
2615 #endif
2616
2617         ether_ifattach(ifp, adapter->hw.mac.addr);
2618
2619         adapter->max_frame_size =
2620             ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
2621
2622         /*
2623          * Tell the upper layer(s) we support long frames.
2624          */
2625         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2626
2627         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWCSUM;
2628         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2629         ifp->if_capabilities |= IFCAP_LRO;
2630         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2631                              |  IFCAP_VLAN_HWTSO
2632                              |  IFCAP_VLAN_MTU;
2633         ifp->if_capenable = ifp->if_capabilities;
2634
2635         /*
2636         ** Don't turn this on by default, if vlans are
2637         ** created on another pseudo device (eg. lagg)
2638         ** then vlan events are not passed thru, breaking
2639         ** operation, but with HW FILTER off it works. If
2640         ** using vlans directly on the ixgbe driver you can
2641         ** enable this and get full hardware tag filtering.
2642         */
2643         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2644
2645         /*
2646          * Specify the media types supported by this adapter and register
2647          * callbacks to update media and link information
2648          */
2649         ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
2650                      ixgbe_media_status);
2651         ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
2652         ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
2653         if (hw->device_id == IXGBE_DEV_ID_82598AT) {
2654                 ifmedia_add(&adapter->media,
2655                     IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2656                 ifmedia_add(&adapter->media,
2657                     IFM_ETHER | IFM_1000_T, 0, NULL);
2658         }
2659         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2660         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2661
2662         return (0);
2663 }
2664
2665 static void
2666 ixgbe_config_link(struct adapter *adapter)
2667 {
2668         struct ixgbe_hw *hw = &adapter->hw;
2669         u32     autoneg, err = 0;
2670         bool    sfp, negotiate;
2671
2672         sfp = ixgbe_is_sfp(hw);
2673
2674         if (sfp) { 
2675                 if (hw->phy.multispeed_fiber) {
2676                         hw->mac.ops.setup_sfp(hw);
2677                         ixgbe_enable_tx_laser(hw);
2678                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
2679                 } else
2680                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
2681         } else {
2682                 if (hw->mac.ops.check_link)
2683                         err = ixgbe_check_link(hw, &adapter->link_speed,
2684                             &adapter->link_up, FALSE);
2685                 if (err)
2686                         goto out;
2687                 autoneg = hw->phy.autoneg_advertised;
2688                 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
2689                         err  = hw->mac.ops.get_link_capabilities(hw,
2690                             &autoneg, &negotiate);
2691                 if (err)
2692                         goto out;
2693                 if (hw->mac.ops.setup_link)
2694                         err = hw->mac.ops.setup_link(hw,
2695                             autoneg, adapter->link_up);
2696         }
2697 out:
2698         return;
2699 }
2700
2701 /********************************************************************
2702  * Manage DMA'able memory.
2703  *******************************************************************/
2704 static void
2705 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2706 {
2707         if (error)
2708                 return;
2709         *(bus_addr_t *) arg = segs->ds_addr;
2710         return;
2711 }
2712
2713 static int
2714 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2715                 struct ixgbe_dma_alloc *dma, int mapflags)
2716 {
2717         device_t dev = adapter->dev;
2718         int             r;
2719
2720         r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),   /* parent */
2721                                DBA_ALIGN, 0,    /* alignment, bounds */
2722                                BUS_SPACE_MAXADDR,       /* lowaddr */
2723                                BUS_SPACE_MAXADDR,       /* highaddr */
2724                                NULL, NULL,      /* filter, filterarg */
2725                                size,    /* maxsize */
2726                                1,       /* nsegments */
2727                                size,    /* maxsegsize */
2728                                BUS_DMA_ALLOCNOW,        /* flags */
2729                                NULL,    /* lockfunc */
2730                                NULL,    /* lockfuncarg */
2731                                &dma->dma_tag);
2732         if (r != 0) {
2733                 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2734                        "error %u\n", r);
2735                 goto fail_0;
2736         }
2737         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2738                              BUS_DMA_NOWAIT, &dma->dma_map);
2739         if (r != 0) {
2740                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2741                        "error %u\n", r);
2742                 goto fail_1;
2743         }
2744         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2745                             size,
2746                             ixgbe_dmamap_cb,
2747                             &dma->dma_paddr,
2748                             mapflags | BUS_DMA_NOWAIT);
2749         if (r != 0) {
2750                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2751                        "error %u\n", r);
2752                 goto fail_2;
2753         }
2754         dma->dma_size = size;
2755         return (0);
2756 fail_2:
2757         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2758 fail_1:
2759         bus_dma_tag_destroy(dma->dma_tag);
2760 fail_0:
2761         dma->dma_map = NULL;
2762         dma->dma_tag = NULL;
2763         return (r);
2764 }
2765
2766 static void
2767 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2768 {
2769         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2770             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2771         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2772         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2773         bus_dma_tag_destroy(dma->dma_tag);
2774 }
2775
2776
2777 /*********************************************************************
2778  *
2779  *  Allocate memory for the transmit and receive rings, and then
2780  *  the descriptors associated with each, called only once at attach.
2781  *
2782  **********************************************************************/
2783 static int
2784 ixgbe_allocate_queues(struct adapter *adapter)
2785 {
2786         device_t        dev = adapter->dev;
2787         struct ix_queue *que;
2788         struct tx_ring  *txr;
2789         struct rx_ring  *rxr;
2790         int rsize, tsize, error = IXGBE_SUCCESS;
2791         int txconf = 0, rxconf = 0;
2792
2793         /* First allocate the top level queue structs */
2794         if (!(adapter->queues =
2795             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2796             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2797                 device_printf(dev, "Unable to allocate queue memory\n");
2798                 error = ENOMEM;
2799                 goto fail;
2800         }
2801
2802         /* First allocate the TX ring struct memory */
2803         if (!(adapter->tx_rings =
2804             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2805             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2806                 device_printf(dev, "Unable to allocate TX ring memory\n");
2807                 error = ENOMEM;
2808                 goto tx_fail;
2809         }
2810
2811         /* Next allocate the RX */
2812         if (!(adapter->rx_rings =
2813             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2814             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2815                 device_printf(dev, "Unable to allocate RX ring memory\n");
2816                 error = ENOMEM;
2817                 goto rx_fail;
2818         }
2819
2820         /* For the ring itself */
2821         tsize = roundup2(adapter->num_tx_desc *
2822             sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2823
2824         /*
2825          * Now set up the TX queues, txconf is needed to handle the
2826          * possibility that things fail midcourse and we need to
2827          * undo memory gracefully
2828          */ 
2829         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2830                 /* Set up some basics */
2831                 txr = &adapter->tx_rings[i];
2832                 txr->adapter = adapter;
2833                 txr->me = i;
2834                 txr->num_desc = adapter->num_tx_desc;
2835
2836                 /* Initialize the TX side lock */
2837                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2838                     device_get_nameunit(dev), txr->me);
2839                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2840
2841                 if (ixgbe_dma_malloc(adapter, tsize,
2842                         &txr->txdma, BUS_DMA_NOWAIT)) {
2843                         device_printf(dev,
2844                             "Unable to allocate TX Descriptor memory\n");
2845                         error = ENOMEM;
2846                         goto err_tx_desc;
2847                 }
2848                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2849                 bzero((void *)txr->tx_base, tsize);
2850
2851                 /* Now allocate transmit buffers for the ring */
2852                 if (ixgbe_allocate_transmit_buffers(txr)) {
2853                         device_printf(dev,
2854                             "Critical Failure setting up transmit buffers\n");
2855                         error = ENOMEM;
2856                         goto err_tx_desc;
2857                 }
2858 #ifndef IXGBE_LEGACY_TX
2859                 /* Allocate a buf ring */
2860                 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2861                     M_WAITOK, &txr->tx_mtx);
2862                 if (txr->br == NULL) {
2863                         device_printf(dev,
2864                             "Critical Failure setting up buf ring\n");
2865                         error = ENOMEM;
2866                         goto err_tx_desc;
2867                 }
2868 #endif
2869         }
2870
2871         /*
2872          * Next the RX queues...
2873          */ 
2874         rsize = roundup2(adapter->num_rx_desc *
2875             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2876         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2877                 rxr = &adapter->rx_rings[i];
2878                 /* Set up some basics */
2879                 rxr->adapter = adapter;
2880                 rxr->me = i;
2881                 rxr->num_desc = adapter->num_rx_desc;
2882
2883                 /* Initialize the RX side lock */
2884                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2885                     device_get_nameunit(dev), rxr->me);
2886                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2887
2888                 if (ixgbe_dma_malloc(adapter, rsize,
2889                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2890                         device_printf(dev,
2891                             "Unable to allocate RxDescriptor memory\n");
2892                         error = ENOMEM;
2893                         goto err_rx_desc;
2894                 }
2895                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2896                 bzero((void *)rxr->rx_base, rsize);
2897
2898                 /* Allocate receive buffers for the ring*/
2899                 if (ixgbe_allocate_receive_buffers(rxr)) {
2900                         device_printf(dev,
2901                             "Critical Failure setting up receive buffers\n");
2902                         error = ENOMEM;
2903                         goto err_rx_desc;
2904                 }
2905         }
2906
2907         /*
2908         ** Finally set up the queue holding structs
2909         */
2910         for (int i = 0; i < adapter->num_queues; i++) {
2911                 que = &adapter->queues[i];
2912                 que->adapter = adapter;
2913                 que->txr = &adapter->tx_rings[i];
2914                 que->rxr = &adapter->rx_rings[i];
2915         }
2916
2917         return (0);
2918
2919 err_rx_desc:
2920         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2921                 ixgbe_dma_free(adapter, &rxr->rxdma);
2922 err_tx_desc:
2923         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2924                 ixgbe_dma_free(adapter, &txr->txdma);
2925         free(adapter->rx_rings, M_DEVBUF);
2926 rx_fail:
2927         free(adapter->tx_rings, M_DEVBUF);
2928 tx_fail:
2929         free(adapter->queues, M_DEVBUF);
2930 fail:
2931         return (error);
2932 }
2933
2934 /*********************************************************************
2935  *
2936  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2937  *  the information needed to transmit a packet on the wire. This is
2938  *  called only once at attach, setup is done every reset.
2939  *
2940  **********************************************************************/
2941 static int
2942 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
2943 {
2944         struct adapter *adapter = txr->adapter;
2945         device_t dev = adapter->dev;
2946         struct ixgbe_tx_buf *txbuf;
2947         int error, i;
2948
2949         /*
2950          * Setup DMA descriptor areas.
2951          */
2952         if ((error = bus_dma_tag_create(
2953                                bus_get_dma_tag(adapter->dev),   /* parent */
2954                                1, 0,            /* alignment, bounds */
2955                                BUS_SPACE_MAXADDR,       /* lowaddr */
2956                                BUS_SPACE_MAXADDR,       /* highaddr */
2957                                NULL, NULL,              /* filter, filterarg */
2958                                IXGBE_TSO_SIZE,          /* maxsize */
2959                                adapter->num_segs,       /* nsegments */
2960                                PAGE_SIZE,               /* maxsegsize */
2961                                0,                       /* flags */
2962                                NULL,                    /* lockfunc */
2963                                NULL,                    /* lockfuncarg */
2964                                &txr->txtag))) {
2965                 device_printf(dev,"Unable to allocate TX DMA tag\n");
2966                 goto fail;
2967         }
2968
2969         if (!(txr->tx_buffers =
2970             (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
2971             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2972                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
2973                 error = ENOMEM;
2974                 goto fail;
2975         }
2976
2977         /* Create the descriptor buffer dma maps */
2978         txbuf = txr->tx_buffers;
2979         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2980                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2981                 if (error != 0) {
2982                         device_printf(dev, "Unable to create TX DMA map\n");
2983                         goto fail;
2984                 }
2985         }
2986
2987         return 0;
2988 fail:
2989         /* We free all, it handles case where we are in the middle */
2990         ixgbe_free_transmit_structures(adapter);
2991         return (error);
2992 }
2993
2994 /*********************************************************************
2995  *
2996  *  Initialize a transmit ring.
2997  *
2998  **********************************************************************/
2999 static void
3000 ixgbe_setup_transmit_ring(struct tx_ring *txr)
3001 {
3002         struct adapter *adapter = txr->adapter;
3003         struct ixgbe_tx_buf *txbuf;
3004         int i;
3005 #ifdef DEV_NETMAP
3006         struct netmap_adapter *na = NA(adapter->ifp);
3007         struct netmap_slot *slot;
3008 #endif /* DEV_NETMAP */
3009
3010         /* Clear the old ring contents */
3011         IXGBE_TX_LOCK(txr);
3012 #ifdef DEV_NETMAP
3013         /*
3014          * (under lock): if in netmap mode, do some consistency
3015          * checks and set slot to entry 0 of the netmap ring.
3016          */
3017         slot = netmap_reset(na, NR_TX, txr->me, 0);
3018 #endif /* DEV_NETMAP */
3019         bzero((void *)txr->tx_base,
3020               (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
3021         /* Reset indices */
3022         txr->next_avail_desc = 0;
3023         txr->next_to_clean = 0;
3024
3025         /* Free any existing tx buffers. */
3026         txbuf = txr->tx_buffers;
3027         for (i = 0; i < txr->num_desc; i++, txbuf++) {
3028                 if (txbuf->m_head != NULL) {
3029                         bus_dmamap_sync(txr->txtag, txbuf->map,
3030                             BUS_DMASYNC_POSTWRITE);
3031                         bus_dmamap_unload(txr->txtag, txbuf->map);
3032                         m_freem(txbuf->m_head);
3033                         txbuf->m_head = NULL;
3034                 }
3035 #ifdef DEV_NETMAP
3036                 /*
3037                  * In netmap mode, set the map for the packet buffer.
3038                  * NOTE: Some drivers (not this one) also need to set
3039                  * the physical buffer address in the NIC ring.
3040                  * Slots in the netmap ring (indexed by "si") are
3041                  * kring->nkr_hwofs positions "ahead" wrt the
3042                  * corresponding slot in the NIC ring. In some drivers
3043                  * (not here) nkr_hwofs can be negative. Function
3044                  * netmap_idx_n2k() handles wraparounds properly.
3045                  */
3046                 if (slot) {
3047                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3048                         netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3049                 }
3050 #endif /* DEV_NETMAP */
3051                 /* Clear the EOP descriptor pointer */
3052                 txbuf->eop = NULL;
3053         }
3054
3055 #ifdef IXGBE_FDIR
3056         /* Set the rate at which we sample packets */
3057         if (adapter->hw.mac.type != ixgbe_mac_82598EB)
3058                 txr->atr_sample = atr_sample_rate;
3059 #endif
3060
3061         /* Set number of descriptors available */
3062         txr->tx_avail = adapter->num_tx_desc;
3063
3064         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3065             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3066         IXGBE_TX_UNLOCK(txr);
3067 }
3068
3069 /*********************************************************************
3070  *
3071  *  Initialize all transmit rings.
3072  *
3073  **********************************************************************/
3074 static int
3075 ixgbe_setup_transmit_structures(struct adapter *adapter)
3076 {
3077         struct tx_ring *txr = adapter->tx_rings;
3078
3079         for (int i = 0; i < adapter->num_queues; i++, txr++)
3080                 ixgbe_setup_transmit_ring(txr);
3081
3082         return (0);
3083 }
3084
3085 /*********************************************************************
3086  *
3087  *  Enable transmit unit.
3088  *
3089  **********************************************************************/
3090 static void
3091 ixgbe_initialize_transmit_units(struct adapter *adapter)
3092 {
3093         struct tx_ring  *txr = adapter->tx_rings;
3094         struct ixgbe_hw *hw = &adapter->hw;
3095
3096         /* Setup the Base and Length of the Tx Descriptor Ring */
3097
3098         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3099                 u64     tdba = txr->txdma.dma_paddr;
3100                 u32     txctrl;
3101
3102                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
3103                        (tdba & 0x00000000ffffffffULL));
3104                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
3105                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
3106                     adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
3107
3108                 /* Setup the HW Tx Head and Tail descriptor pointers */
3109                 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
3110                 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
3111
3112                 /* Setup Transmit Descriptor Cmd Settings */
3113                 txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
3114                 txr->queue_status = IXGBE_QUEUE_IDLE;
3115
3116                 /* Set the processing limit */
3117                 txr->process_limit = ixgbe_tx_process_limit;
3118
3119                 /* Disable Head Writeback */
3120                 switch (hw->mac.type) {
3121                 case ixgbe_mac_82598EB:
3122                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
3123                         break;
3124                 case ixgbe_mac_82599EB:
3125                 case ixgbe_mac_X540:
3126                 default:
3127                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
3128                         break;
3129                 }
3130                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3131                 switch (hw->mac.type) {
3132                 case ixgbe_mac_82598EB:
3133                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
3134                         break;
3135                 case ixgbe_mac_82599EB:
3136                 case ixgbe_mac_X540:
3137                 default:
3138                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
3139                         break;
3140                 }
3141
3142         }
3143
3144         if (hw->mac.type != ixgbe_mac_82598EB) {
3145                 u32 dmatxctl, rttdcs;
3146                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3147                 dmatxctl |= IXGBE_DMATXCTL_TE;
3148                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3149                 /* Disable arbiter to set MTQC */
3150                 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3151                 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3152                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3153                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
3154                 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3155                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3156         }
3157
3158         return;
3159 }
3160
3161 /*********************************************************************
3162  *
3163  *  Free all transmit rings.
3164  *
3165  **********************************************************************/
3166 static void
3167 ixgbe_free_transmit_structures(struct adapter *adapter)
3168 {
3169         struct tx_ring *txr = adapter->tx_rings;
3170
3171         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3172                 IXGBE_TX_LOCK(txr);
3173                 ixgbe_free_transmit_buffers(txr);
3174                 ixgbe_dma_free(adapter, &txr->txdma);
3175                 IXGBE_TX_UNLOCK(txr);
3176                 IXGBE_TX_LOCK_DESTROY(txr);
3177         }
3178         free(adapter->tx_rings, M_DEVBUF);
3179 }
3180
3181 /*********************************************************************
3182  *
3183  *  Free transmit ring related data structures.
3184  *
3185  **********************************************************************/
3186 static void
3187 ixgbe_free_transmit_buffers(struct tx_ring *txr)
3188 {
3189         struct adapter *adapter = txr->adapter;
3190         struct ixgbe_tx_buf *tx_buffer;
3191         int             i;
3192
3193         INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
3194
3195         if (txr->tx_buffers == NULL)
3196                 return;
3197
3198         tx_buffer = txr->tx_buffers;
3199         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3200                 if (tx_buffer->m_head != NULL) {
3201                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3202                             BUS_DMASYNC_POSTWRITE);
3203                         bus_dmamap_unload(txr->txtag,
3204                             tx_buffer->map);
3205                         m_freem(tx_buffer->m_head);
3206                         tx_buffer->m_head = NULL;
3207                         if (tx_buffer->map != NULL) {
3208                                 bus_dmamap_destroy(txr->txtag,
3209                                     tx_buffer->map);
3210                                 tx_buffer->map = NULL;
3211                         }
3212                 } else if (tx_buffer->map != NULL) {
3213                         bus_dmamap_unload(txr->txtag,
3214                             tx_buffer->map);
3215                         bus_dmamap_destroy(txr->txtag,
3216                             tx_buffer->map);
3217                         tx_buffer->map = NULL;
3218                 }
3219         }
3220 #ifdef IXGBE_LEGACY_TX
3221         if (txr->br != NULL)
3222                 buf_ring_free(txr->br, M_DEVBUF);
3223 #endif
3224         if (txr->tx_buffers != NULL) {
3225                 free(txr->tx_buffers, M_DEVBUF);
3226                 txr->tx_buffers = NULL;
3227         }
3228         if (txr->txtag != NULL) {
3229                 bus_dma_tag_destroy(txr->txtag);
3230                 txr->txtag = NULL;
3231         }
3232         return;
3233 }
3234
3235 /*********************************************************************
3236  *
3237  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3238  *
3239  **********************************************************************/
3240
3241 static int
3242 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3243     u32 *cmd_type_len, u32 *olinfo_status)
3244 {
3245         struct ixgbe_adv_tx_context_desc *TXD;
3246         struct ether_vlan_header *eh;
3247         struct ip *ip;
3248         struct ip6_hdr *ip6;
3249         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3250         int     ehdrlen, ip_hlen = 0;
3251         u16     etype;
3252         u8      ipproto = 0;
3253         int     offload = TRUE;
3254         int     ctxd = txr->next_avail_desc;
3255         u16     vtag = 0;
3256
3257         /* First check if TSO is to be used */
3258         if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3259                 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3260
3261         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3262                 offload = FALSE;
3263
3264         /* Indicate the whole packet as payload when not doing TSO */
3265         *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
3266
3267         /* Now ready a context descriptor */
3268         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3269
3270         /*
3271         ** In advanced descriptors the vlan tag must 
3272         ** be placed into the context descriptor. Hence
3273         ** we need to make one even if not doing offloads.
3274         */
3275         if (mp->m_flags & M_VLANTAG) {
3276                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3277                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3278         } else if (offload == FALSE) /* ... no offload to do */
3279                 return (0);
3280
3281         /*
3282          * Determine where frame payload starts.
3283          * Jump over vlan headers if already present,
3284          * helpful for QinQ too.
3285          */
3286         eh = mtod(mp, struct ether_vlan_header *);
3287         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3288                 etype = ntohs(eh->evl_proto);
3289                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3290         } else {
3291                 etype = ntohs(eh->evl_encap_proto);
3292                 ehdrlen = ETHER_HDR_LEN;
3293         }
3294
3295         /* Set the ether header length */
3296         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3297
3298         switch (etype) {
3299                 case ETHERTYPE_IP:
3300                         ip = (struct ip *)(mp->m_data + ehdrlen);
3301                         ip_hlen = ip->ip_hl << 2;
3302                         ipproto = ip->ip_p;
3303                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3304                         break;
3305                 case ETHERTYPE_IPV6:
3306                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3307                         ip_hlen = sizeof(struct ip6_hdr);
3308                         /* XXX-BZ this will go badly in case of ext hdrs. */
3309                         ipproto = ip6->ip6_nxt;
3310                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3311                         break;
3312                 default:
3313                         offload = FALSE;
3314                         break;
3315         }
3316
3317         vlan_macip_lens |= ip_hlen;
3318         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3319
3320         switch (ipproto) {
3321                 case IPPROTO_TCP:
3322                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3323                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3324                         break;
3325
3326                 case IPPROTO_UDP:
3327                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3328                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
3329                         break;
3330
3331 #if __FreeBSD_version >= 800000
3332                 case IPPROTO_SCTP:
3333                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3334                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
3335                         break;
3336 #endif
3337                 default:
3338                         offload = FALSE;
3339                         break;
3340         }
3341
3342         if (offload) /* For the TX descriptor setup */
3343                 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3344
3345         /* Now copy bits into descriptor */
3346         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3347         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3348         TXD->seqnum_seed = htole32(0);
3349         TXD->mss_l4len_idx = htole32(0);
3350
3351         /* We've consumed the first desc, adjust counters */
3352         if (++ctxd == txr->num_desc)
3353                 ctxd = 0;
3354         txr->next_avail_desc = ctxd;
3355         --txr->tx_avail;
3356
3357         return (0);
3358 }
3359
3360 /**********************************************************************
3361  *
3362  *  Setup work for hardware segmentation offload (TSO) on
3363  *  adapters using advanced tx descriptors
3364  *
3365  **********************************************************************/
3366 static int
3367 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3368     u32 *cmd_type_len, u32 *olinfo_status)
3369 {
3370         struct ixgbe_adv_tx_context_desc *TXD;
3371         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3372         u32 mss_l4len_idx = 0, paylen;
3373         u16 vtag = 0, eh_type;
3374         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3375         struct ether_vlan_header *eh;
3376 #ifdef INET6
3377         struct ip6_hdr *ip6;
3378 #endif
3379 #ifdef INET
3380         struct ip *ip;
3381 #endif
3382         struct tcphdr *th;
3383
3384
3385         /*
3386          * Determine where frame payload starts.
3387          * Jump over vlan headers if already present
3388          */
3389         eh = mtod(mp, struct ether_vlan_header *);
3390         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3391                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3392                 eh_type = eh->evl_proto;
3393         } else {
3394                 ehdrlen = ETHER_HDR_LEN;
3395                 eh_type = eh->evl_encap_proto;
3396         }
3397
3398         switch (ntohs(eh_type)) {
3399 #ifdef INET6
3400         case ETHERTYPE_IPV6:
3401                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3402                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3403                 if (ip6->ip6_nxt != IPPROTO_TCP)
3404                         return (ENXIO);
3405                 ip_hlen = sizeof(struct ip6_hdr);
3406                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3407                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3408                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3409                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3410                 break;
3411 #endif
3412 #ifdef INET
3413         case ETHERTYPE_IP:
3414                 ip = (struct ip *)(mp->m_data + ehdrlen);
3415                 if (ip->ip_p != IPPROTO_TCP)
3416                         return (ENXIO);
3417                 ip->ip_sum = 0;
3418                 ip_hlen = ip->ip_hl << 2;
3419                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3420                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3421                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3422                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3423                 /* Tell transmit desc to also do IPv4 checksum. */
3424                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3425                 break;
3426 #endif
3427         default:
3428                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3429                     __func__, ntohs(eh_type));
3430                 break;
3431         }
3432
3433         ctxd = txr->next_avail_desc;
3434         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3435
3436         tcp_hlen = th->th_off << 2;
3437
3438         /* This is used in the transmit desc in encap */
3439         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3440
3441         /* VLAN MACLEN IPLEN */
3442         if (mp->m_flags & M_VLANTAG) {
3443                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3444                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3445         }
3446
3447         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3448         vlan_macip_lens |= ip_hlen;
3449         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3450
3451         /* ADV DTYPE TUCMD */
3452         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3453         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3454         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3455
3456         /* MSS L4LEN IDX */
3457         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
3458         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
3459         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3460
3461         TXD->seqnum_seed = htole32(0);
3462
3463         if (++ctxd == txr->num_desc)
3464                 ctxd = 0;
3465
3466         txr->tx_avail--;
3467         txr->next_avail_desc = ctxd;
3468         *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
3469         *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3470         *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
3471         ++txr->tso_tx;
3472         return (0);
3473 }
3474
3475 #ifdef IXGBE_FDIR
3476 /*
3477 ** This routine parses packet headers so that Flow
3478 ** Director can make a hashed filter table entry 
3479 ** allowing traffic flows to be identified and kept
3480 ** on the same cpu.  This would be a performance
3481 ** hit, but we only do it at IXGBE_FDIR_RATE of
3482 ** packets.
3483 */
3484 static void
3485 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
3486 {
3487         struct adapter                  *adapter = txr->adapter;
3488         struct ix_queue                 *que;
3489         struct ip                       *ip;
3490         struct tcphdr                   *th;
3491         struct udphdr                   *uh;
3492         struct ether_vlan_header        *eh;
3493         union ixgbe_atr_hash_dword      input = {.dword = 0}; 
3494         union ixgbe_atr_hash_dword      common = {.dword = 0}; 
3495         int                             ehdrlen, ip_hlen;
3496         u16                             etype;
3497
3498         eh = mtod(mp, struct ether_vlan_header *);
3499         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3500                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3501                 etype = eh->evl_proto;
3502         } else {
3503                 ehdrlen = ETHER_HDR_LEN;
3504                 etype = eh->evl_encap_proto;
3505         }
3506
3507         /* Only handling IPv4 */
3508         if (etype != htons(ETHERTYPE_IP))
3509                 return;
3510
3511         ip = (struct ip *)(mp->m_data + ehdrlen);
3512         ip_hlen = ip->ip_hl << 2;
3513
3514         /* check if we're UDP or TCP */
3515         switch (ip->ip_p) {
3516         case IPPROTO_TCP:
3517                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3518                 /* src and dst are inverted */
3519                 common.port.dst ^= th->th_sport;
3520                 common.port.src ^= th->th_dport;
3521                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
3522                 break;
3523         case IPPROTO_UDP:
3524                 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
3525                 /* src and dst are inverted */
3526                 common.port.dst ^= uh->uh_sport;
3527                 common.port.src ^= uh->uh_dport;
3528                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
3529                 break;
3530         default:
3531                 return;
3532         }
3533
3534         input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
3535         if (mp->m_pkthdr.ether_vtag)
3536                 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
3537         else
3538                 common.flex_bytes ^= etype;
3539         common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
3540
3541         que = &adapter->queues[txr->me];
3542         /*
3543         ** This assumes the Rx queue and Tx
3544         ** queue are bound to the same CPU
3545         */
3546         ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
3547             input, common, que->msix);
3548 }
3549 #endif /* IXGBE_FDIR */
3550
3551 /**********************************************************************
3552  *
3553  *  Examine each tx_buffer in the used queue. If the hardware is done
3554  *  processing the packet then free associated resources. The
3555  *  tx_buffer is put back on the free queue.
3556  *
3557  **********************************************************************/
3558 static void
3559 ixgbe_txeof(struct tx_ring *txr)
3560 {
3561         struct adapter          *adapter = txr->adapter;
3562         struct ifnet            *ifp = adapter->ifp;
3563         u32                     work, processed = 0;
3564         u16                     limit = txr->process_limit;
3565         struct ixgbe_tx_buf     *buf;
3566         union ixgbe_adv_tx_desc *txd;
3567
3568         mtx_assert(&txr->tx_mtx, MA_OWNED);
3569
3570 #ifdef DEV_NETMAP
3571         if (ifp->if_capenable & IFCAP_NETMAP) {
3572                 struct netmap_adapter *na = NA(ifp);
3573                 struct netmap_kring *kring = &na->tx_rings[txr->me];
3574                 txd = txr->tx_base;
3575                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3576                     BUS_DMASYNC_POSTREAD);
3577                 /*
3578                  * In netmap mode, all the work is done in the context
3579                  * of the client thread. Interrupt handlers only wake up
3580                  * clients, which may be sleeping on individual rings
3581                  * or on a global resource for all rings.
3582                  * To implement tx interrupt mitigation, we wake up the client
3583                  * thread roughly every half ring, even if the NIC interrupts
3584                  * more frequently. This is implemented as follows:
3585                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
3586                  *   the slot that should wake up the thread (nkr_num_slots
3587                  *   means the user thread should not be woken up);
3588                  * - the driver ignores tx interrupts unless netmap_mitigate=0
3589                  *   or the slot has the DD bit set.
3590                  *
3591                  * When the driver has separate locks, we need to
3592                  * release and re-acquire txlock to avoid deadlocks.
3593                  * XXX see if we can find a better way.
3594                  */
3595                 if (!netmap_mitigate ||
3596                     (kring->nr_kflags < kring->nkr_num_slots &&
3597                     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
3598                         netmap_tx_irq(ifp, txr->me |
3599                             (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT));
3600                 }
3601                 return;
3602         }
3603 #endif /* DEV_NETMAP */
3604
3605         if (txr->tx_avail == txr->num_desc) {
3606                 txr->queue_status = IXGBE_QUEUE_IDLE;
3607                 return;
3608         }
3609
3610         /* Get work starting point */
3611         work = txr->next_to_clean;
3612         buf = &txr->tx_buffers[work];
3613         txd = &txr->tx_base[work];
3614         work -= txr->num_desc; /* The distance to ring end */
3615         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3616             BUS_DMASYNC_POSTREAD);
3617
3618         do {
3619                 union ixgbe_adv_tx_desc *eop= buf->eop;
3620                 if (eop == NULL) /* No work */
3621                         break;
3622
3623                 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
3624                         break;  /* I/O not complete */
3625
3626                 if (buf->m_head) {
3627                         txr->bytes +=
3628                             buf->m_head->m_pkthdr.len;
3629                         bus_dmamap_sync(txr->txtag,
3630                             buf->map,
3631                             BUS_DMASYNC_POSTWRITE);
3632                         bus_dmamap_unload(txr->txtag,
3633                             buf->map);
3634                         m_freem(buf->m_head);
3635                         buf->m_head = NULL;
3636                         buf->map = NULL;
3637                 }
3638                 buf->eop = NULL;
3639                 ++txr->tx_avail;
3640
3641                 /* We clean the range if multi segment */
3642                 while (txd != eop) {
3643                         ++txd;
3644                         ++buf;
3645                         ++work;
3646                         /* wrap the ring? */
3647                         if (__predict_false(!work)) {
3648                                 work -= txr->num_desc;
3649                                 buf = txr->tx_buffers;
3650                                 txd = txr->tx_base;
3651                         }
3652                         if (buf->m_head) {
3653                                 txr->bytes +=
3654                                     buf->m_head->m_pkthdr.len;
3655                                 bus_dmamap_sync(txr->txtag,
3656                                     buf->map,
3657                                     BUS_DMASYNC_POSTWRITE);
3658                                 bus_dmamap_unload(txr->txtag,
3659                                     buf->map);
3660                                 m_freem(buf->m_head);
3661                                 buf->m_head = NULL;
3662                                 buf->map = NULL;
3663                         }
3664                         ++txr->tx_avail;
3665                         buf->eop = NULL;
3666
3667                 }
3668                 ++txr->packets;
3669                 ++processed;
3670                 ++ifp->if_opackets;
3671                 txr->watchdog_time = ticks;
3672
3673                 /* Try the next packet */
3674                 ++txd;
3675                 ++buf;
3676                 ++work;
3677                 /* reset with a wrap */
3678                 if (__predict_false(!work)) {
3679                         work -= txr->num_desc;
3680                         buf = txr->tx_buffers;
3681                         txd = txr->tx_base;
3682                 }
3683                 prefetch(txd);
3684         } while (__predict_true(--limit));
3685
3686         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3687             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3688
3689         work += txr->num_desc;
3690         txr->next_to_clean = work;
3691
3692         /*
3693         ** Watchdog calculation, we know there's
3694         ** work outstanding or the first return
3695         ** would have been taken, so none processed
3696         ** for too long indicates a hang.
3697         */
3698         if ((!processed) && ((ticks - txr->watchdog_time) > IXGBE_WATCHDOG))
3699                 txr->queue_status = IXGBE_QUEUE_HUNG;
3700
3701         if (txr->tx_avail == txr->num_desc)
3702                 txr->queue_status = IXGBE_QUEUE_IDLE;
3703
3704         return;
3705 }
3706
3707 /*********************************************************************
3708  *
3709  *  Refresh mbuf buffers for RX descriptor rings
3710  *   - now keeps its own state so discards due to resource
3711  *     exhaustion are unnecessary, if an mbuf cannot be obtained
3712  *     it just returns, keeping its placeholder, thus it can simply
3713  *     be recalled to try again.
3714  *
3715  **********************************************************************/
3716 static void
3717 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
3718 {
3719         struct adapter          *adapter = rxr->adapter;
3720         bus_dma_segment_t       seg[1];
3721         struct ixgbe_rx_buf     *rxbuf;
3722         struct mbuf             *mp;
3723         int                     i, j, nsegs, error;
3724         bool                    refreshed = FALSE;
3725
3726         i = j = rxr->next_to_refresh;
3727         /* Control the loop with one beyond */
3728         if (++j == rxr->num_desc)
3729                 j = 0;
3730
3731         while (j != limit) {
3732                 rxbuf = &rxr->rx_buffers[i];
3733                 if (rxbuf->buf == NULL) {
3734                         mp = m_getjcl(M_NOWAIT, MT_DATA,
3735                             M_PKTHDR, rxr->mbuf_sz);
3736                         if (mp == NULL)
3737                                 goto update;
3738                         if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3739                                 m_adj(mp, ETHER_ALIGN);
3740                 } else
3741                         mp = rxbuf->buf;
3742
3743                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
3744
3745                 /* If we're dealing with an mbuf that was copied rather
3746                  * than replaced, there's no need to go through busdma.
3747                  */
3748                 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
3749                         /* Get the memory mapping */
3750                         error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3751                             rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
3752                         if (error != 0) {
3753                                 printf("Refresh mbufs: payload dmamap load"
3754                                     " failure - %d\n", error);
3755                                 m_free(mp);
3756                                 rxbuf->buf = NULL;
3757                                 goto update;
3758                         }
3759                         rxbuf->buf = mp;
3760                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3761                             BUS_DMASYNC_PREREAD);
3762                         rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
3763                             htole64(seg[0].ds_addr);
3764                 } else {
3765                         rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
3766                         rxbuf->flags &= ~IXGBE_RX_COPY;
3767                 }
3768
3769                 refreshed = TRUE;
3770                 /* Next is precalculated */
3771                 i = j;
3772                 rxr->next_to_refresh = i;
3773                 if (++j == rxr->num_desc)
3774                         j = 0;
3775         }
3776 update:
3777         if (refreshed) /* Update hardware tail index */
3778                 IXGBE_WRITE_REG(&adapter->hw,
3779                     IXGBE_RDT(rxr->me), rxr->next_to_refresh);
3780         return;
3781 }
3782
3783 /*********************************************************************
3784  *
3785  *  Allocate memory for rx_buffer structures. Since we use one
3786  *  rx_buffer per received packet, the maximum number of rx_buffer's
3787  *  that we'll need is equal to the number of receive descriptors
3788  *  that we've allocated.
3789  *
3790  **********************************************************************/
3791 static int
3792 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
3793 {
3794         struct  adapter         *adapter = rxr->adapter;
3795         device_t                dev = adapter->dev;
3796         struct ixgbe_rx_buf     *rxbuf;
3797         int                     i, bsize, error;
3798
3799         bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
3800         if (!(rxr->rx_buffers =
3801             (struct ixgbe_rx_buf *) malloc(bsize,
3802             M_DEVBUF, M_NOWAIT | M_ZERO))) {
3803                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3804                 error = ENOMEM;
3805                 goto fail;
3806         }
3807
3808         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),   /* parent */
3809                                    1, 0,        /* alignment, bounds */
3810                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3811                                    BUS_SPACE_MAXADDR,   /* highaddr */
3812                                    NULL, NULL,          /* filter, filterarg */
3813                                    MJUM16BYTES,         /* maxsize */
3814                                    1,                   /* nsegments */
3815                                    MJUM16BYTES,         /* maxsegsize */
3816                                    0,                   /* flags */
3817                                    NULL,                /* lockfunc */
3818                                    NULL,                /* lockfuncarg */
3819                                    &rxr->ptag))) {
3820                 device_printf(dev, "Unable to create RX DMA tag\n");
3821                 goto fail;
3822         }
3823
3824         for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
3825                 rxbuf = &rxr->rx_buffers[i];
3826                 error = bus_dmamap_create(rxr->ptag,
3827                     BUS_DMA_NOWAIT, &rxbuf->pmap);
3828                 if (error) {
3829                         device_printf(dev, "Unable to create RX dma map\n");
3830                         goto fail;
3831                 }
3832         }
3833
3834         return (0);
3835
3836 fail:
3837         /* Frees all, but can handle partial completion */
3838         ixgbe_free_receive_structures(adapter);
3839         return (error);
3840 }
3841
3842 /*
3843 ** Used to detect a descriptor that has
3844 ** been merged by Hardware RSC.
3845 */
3846 static inline u32
3847 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
3848 {
3849         return (le32toh(rx->wb.lower.lo_dword.data) &
3850             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
3851 }
3852
3853 /*********************************************************************
3854  *
3855  *  Initialize Hardware RSC (LRO) feature on 82599
3856  *  for an RX ring, this is toggled by the LRO capability
3857  *  even though it is transparent to the stack.
3858  *
3859  *  NOTE: since this HW feature only works with IPV4 and 
3860  *        our testing has shown soft LRO to be as effective
3861  *        I have decided to disable this by default.
3862  *
3863  **********************************************************************/
3864 static void
3865 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
3866 {
3867         struct  adapter         *adapter = rxr->adapter;
3868         struct  ixgbe_hw        *hw = &adapter->hw;
3869         u32                     rscctrl, rdrxctl;
3870
3871         /* If turning LRO/RSC off we need to disable it */
3872         if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
3873                 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
3874                 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
3875                 return;
3876         }
3877
3878         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
3879         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
3880 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
3881         if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
3882 #endif /* DEV_NETMAP */
3883         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
3884         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
3885         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
3886
3887         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
3888         rscctrl |= IXGBE_RSCCTL_RSCEN;
3889         /*
3890         ** Limit the total number of descriptors that
3891         ** can be combined, so it does not exceed 64K
3892         */
3893         if (rxr->mbuf_sz == MCLBYTES)
3894                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
3895         else if (rxr->mbuf_sz == MJUMPAGESIZE)
3896                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
3897         else if (rxr->mbuf_sz == MJUM9BYTES)
3898                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
3899         else  /* Using 16K cluster */
3900                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
3901
3902         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
3903
3904         /* Enable TCP header recognition */
3905         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
3906             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
3907             IXGBE_PSRTYPE_TCPHDR));
3908
3909         /* Disable RSC for ACK packets */
3910         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
3911             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
3912
3913         rxr->hw_rsc = TRUE;
3914 }
3915
3916
3917 static void     
3918 ixgbe_free_receive_ring(struct rx_ring *rxr)
3919
3920         struct ixgbe_rx_buf       *rxbuf;
3921         int i;
3922
3923         for (i = 0; i < rxr->num_desc; i++) {
3924                 rxbuf = &rxr->rx_buffers[i];
3925                 if (rxbuf->buf != NULL) {
3926                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3927                             BUS_DMASYNC_POSTREAD);
3928                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3929                         rxbuf->buf->m_flags |= M_PKTHDR;
3930                         m_freem(rxbuf->buf);
3931                         rxbuf->buf = NULL;
3932                         rxbuf->flags = 0;
3933                 }
3934         }
3935 }
3936
3937
3938 /*********************************************************************
3939  *
3940  *  Initialize a receive ring and its buffers.
3941  *
3942  **********************************************************************/
3943 static int
3944 ixgbe_setup_receive_ring(struct rx_ring *rxr)
3945 {
3946         struct  adapter         *adapter;
3947         struct ifnet            *ifp;
3948         device_t                dev;
3949         struct ixgbe_rx_buf     *rxbuf;
3950         bus_dma_segment_t       seg[1];
3951         struct lro_ctrl         *lro = &rxr->lro;
3952         int                     rsize, nsegs, error = 0;
3953 #ifdef DEV_NETMAP
3954         struct netmap_adapter *na = NA(rxr->adapter->ifp);
3955         struct netmap_slot *slot;
3956 #endif /* DEV_NETMAP */
3957
3958         adapter = rxr->adapter;
3959         ifp = adapter->ifp;
3960         dev = adapter->dev;
3961
3962         /* Clear the ring contents */
3963         IXGBE_RX_LOCK(rxr);
3964 #ifdef DEV_NETMAP
3965         /* same as in ixgbe_setup_transmit_ring() */
3966         slot = netmap_reset(na, NR_RX, rxr->me, 0);
3967 #endif /* DEV_NETMAP */
3968         rsize = roundup2(adapter->num_rx_desc *
3969             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
3970         bzero((void *)rxr->rx_base, rsize);
3971         /* Cache the size */
3972         rxr->mbuf_sz = adapter->rx_mbuf_sz;
3973
3974         /* Free current RX buffer structs and their mbufs */
3975         ixgbe_free_receive_ring(rxr);
3976
3977         /* Now replenish the mbufs */
3978         for (int j = 0; j != rxr->num_desc; ++j) {
3979                 struct mbuf     *mp;
3980
3981                 rxbuf = &rxr->rx_buffers[j];
3982 #ifdef DEV_NETMAP
3983                 /*
3984                  * In netmap mode, fill the map and set the buffer
3985                  * address in the NIC ring, considering the offset
3986                  * between the netmap and NIC rings (see comment in
3987                  * ixgbe_setup_transmit_ring() ). No need to allocate
3988                  * an mbuf, so end the block with a continue;
3989                  */
3990                 if (slot) {
3991                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
3992                         uint64_t paddr;
3993                         void *addr;
3994
3995                         addr = PNMB(slot + sj, &paddr);
3996                         netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
3997                         /* Update descriptor and the cached value */
3998                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
3999                         rxbuf->addr = htole64(paddr);
4000                         continue;
4001                 }
4002 #endif /* DEV_NETMAP */
4003                 rxbuf->flags = 0; 
4004                 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
4005                     M_PKTHDR, adapter->rx_mbuf_sz);
4006                 if (rxbuf->buf == NULL) {
4007                         error = ENOBUFS;
4008                         goto fail;
4009                 }
4010                 mp = rxbuf->buf;
4011                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
4012                 /* Get the memory mapping */
4013                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4014                     rxbuf->pmap, mp, seg,
4015                     &nsegs, BUS_DMA_NOWAIT);
4016                 if (error != 0)
4017                         goto fail;
4018                 bus_dmamap_sync(rxr->ptag,
4019                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4020                 /* Update the descriptor and the cached value */
4021                 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
4022                 rxbuf->addr = htole64(seg[0].ds_addr);
4023         }
4024
4025
4026         /* Setup our descriptor indices */
4027         rxr->next_to_check = 0;
4028         rxr->next_to_refresh = 0;
4029         rxr->lro_enabled = FALSE;
4030         rxr->rx_copies = 0;
4031         rxr->rx_bytes = 0;
4032         rxr->discard = FALSE;
4033         rxr->vtag_strip = FALSE;
4034
4035         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4036             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4037
4038         /*
4039         ** Now set up the LRO interface:
4040         */
4041         if (ixgbe_rsc_enable)
4042                 ixgbe_setup_hw_rsc(rxr);
4043         else if (ifp->if_capenable & IFCAP_LRO) {
4044                 int err = tcp_lro_init(lro);
4045                 if (err) {
4046                         device_printf(dev, "LRO Initialization failed!\n");
4047                         goto fail;
4048                 }
4049                 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
4050                 rxr->lro_enabled = TRUE;
4051                 lro->ifp = adapter->ifp;
4052         }
4053
4054         IXGBE_RX_UNLOCK(rxr);
4055         return (0);
4056
4057 fail:
4058         ixgbe_free_receive_ring(rxr);
4059         IXGBE_RX_UNLOCK(rxr);
4060         return (error);
4061 }
4062
4063 /*********************************************************************
4064  *
4065  *  Initialize all receive rings.
4066  *
4067  **********************************************************************/
4068 static int
4069 ixgbe_setup_receive_structures(struct adapter *adapter)
4070 {
4071         struct rx_ring *rxr = adapter->rx_rings;
4072         int j;
4073
4074         for (j = 0; j < adapter->num_queues; j++, rxr++)
4075                 if (ixgbe_setup_receive_ring(rxr))
4076                         goto fail;
4077
4078         return (0);
4079 fail:
4080         /*
4081          * Free RX buffers allocated so far, we will only handle
4082          * the rings that completed, the failing case will have
4083          * cleaned up for itself. 'j' failed, so its the terminus.
4084          */
4085         for (int i = 0; i < j; ++i) {
4086                 rxr = &adapter->rx_rings[i];
4087                 ixgbe_free_receive_ring(rxr);
4088         }
4089
4090         return (ENOBUFS);
4091 }
4092
4093 /*********************************************************************
4094  *
4095  *  Setup receive registers and features.
4096  *
4097  **********************************************************************/
4098 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
4099
4100 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
4101         
4102 static void
4103 ixgbe_initialize_receive_units(struct adapter *adapter)
4104 {
4105         struct  rx_ring *rxr = adapter->rx_rings;
4106         struct ixgbe_hw *hw = &adapter->hw;
4107         struct ifnet   *ifp = adapter->ifp;
4108         u32             bufsz, rxctrl, fctrl, srrctl, rxcsum;
4109         u32             reta, mrqc = 0, hlreg, random[10];
4110
4111
4112         /*
4113          * Make sure receives are disabled while
4114          * setting up the descriptor ring
4115          */
4116         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4117         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
4118             rxctrl & ~IXGBE_RXCTRL_RXEN);
4119
4120         /* Enable broadcasts */
4121         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4122         fctrl |= IXGBE_FCTRL_BAM;
4123         fctrl |= IXGBE_FCTRL_DPF;
4124         fctrl |= IXGBE_FCTRL_PMCF;
4125         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4126
4127         /* Set for Jumbo Frames? */
4128         hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4129         if (ifp->if_mtu > ETHERMTU)
4130                 hlreg |= IXGBE_HLREG0_JUMBOEN;
4131         else
4132                 hlreg &= ~IXGBE_HLREG0_JUMBOEN;
4133 #ifdef DEV_NETMAP
4134         /* crcstrip is conditional in netmap (in RDRXCTL too ?) */
4135         if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
4136                 hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
4137         else
4138                 hlreg |= IXGBE_HLREG0_RXCRCSTRP;
4139 #endif /* DEV_NETMAP */
4140         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
4141
4142         bufsz = (adapter->rx_mbuf_sz +
4143             BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
4144
4145         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4146                 u64 rdba = rxr->rxdma.dma_paddr;
4147
4148                 /* Setup the Base and Length of the Rx Descriptor Ring */
4149                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
4150                                (rdba & 0x00000000ffffffffULL));
4151                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
4152                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
4153                     adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4154
4155                 /* Set up the SRRCTL register */
4156                 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
4157                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4158                 srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
4159                 srrctl |= bufsz;
4160                 srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4161                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
4162
4163                 /* Setup the HW Rx Head and Tail Descriptor Pointers */
4164                 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
4165                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
4166
4167                 /* Set the processing limit */
4168                 rxr->process_limit = ixgbe_rx_process_limit;
4169         }
4170
4171         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
4172                 u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
4173                               IXGBE_PSRTYPE_UDPHDR |
4174                               IXGBE_PSRTYPE_IPV4HDR |
4175                               IXGBE_PSRTYPE_IPV6HDR;
4176                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
4177         }
4178
4179         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4180
4181         /* Setup RSS */
4182         if (adapter->num_queues > 1) {
4183                 int i, j;
4184                 reta = 0;
4185
4186                 /* set up random bits */
4187                 arc4rand(&random, sizeof(random), 0);
4188
4189                 /* Set up the redirection table */
4190                 for (i = 0, j = 0; i < 128; i++, j++) {
4191                         if (j == adapter->num_queues) j = 0;
4192                         reta = (reta << 8) | (j * 0x11);
4193                         if ((i & 3) == 3)
4194                                 IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
4195                 }
4196
4197                 /* Now fill our hash function seeds */
4198                 for (int i = 0; i < 10; i++)
4199                         IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), random[i]);
4200
4201                 /* Perform hash on these packet types */
4202                 mrqc = IXGBE_MRQC_RSSEN
4203                      | IXGBE_MRQC_RSS_FIELD_IPV4
4204                      | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
4205                      | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
4206                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
4207                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX
4208                      | IXGBE_MRQC_RSS_FIELD_IPV6
4209                      | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
4210                      | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
4211                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
4212                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4213
4214                 /* RSS and RX IPP Checksum are mutually exclusive */
4215                 rxcsum |= IXGBE_RXCSUM_PCSD;
4216         }
4217
4218         if (ifp->if_capenable & IFCAP_RXCSUM)
4219                 rxcsum |= IXGBE_RXCSUM_PCSD;
4220
4221         if (!(rxcsum & IXGBE_RXCSUM_PCSD))
4222                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4223
4224         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4225
4226         return;
4227 }
4228
4229 /*********************************************************************
4230  *
4231  *  Free all receive rings.
4232  *
4233  **********************************************************************/
4234 static void
4235 ixgbe_free_receive_structures(struct adapter *adapter)
4236 {
4237         struct rx_ring *rxr = adapter->rx_rings;
4238
4239         INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
4240
4241         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4242                 struct lro_ctrl         *lro = &rxr->lro;
4243                 ixgbe_free_receive_buffers(rxr);
4244                 /* Free LRO memory */
4245                 tcp_lro_free(lro);
4246                 /* Free the ring memory as well */
4247                 ixgbe_dma_free(adapter, &rxr->rxdma);
4248         }
4249
4250         free(adapter->rx_rings, M_DEVBUF);
4251 }
4252
4253
4254 /*********************************************************************
4255  *
4256  *  Free receive ring data structures
4257  *
4258  **********************************************************************/
4259 static void
4260 ixgbe_free_receive_buffers(struct rx_ring *rxr)
4261 {
4262         struct adapter          *adapter = rxr->adapter;
4263         struct ixgbe_rx_buf     *rxbuf;
4264
4265         INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
4266
4267         /* Cleanup any existing buffers */
4268         if (rxr->rx_buffers != NULL) {
4269                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4270                         rxbuf = &rxr->rx_buffers[i];
4271                         if (rxbuf->buf != NULL) {
4272                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4273                                     BUS_DMASYNC_POSTREAD);
4274                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4275                                 rxbuf->buf->m_flags |= M_PKTHDR;
4276                                 m_freem(rxbuf->buf);
4277                         }
4278                         rxbuf->buf = NULL;
4279                         if (rxbuf->pmap != NULL) {
4280                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4281                                 rxbuf->pmap = NULL;
4282                         }
4283                 }
4284                 if (rxr->rx_buffers != NULL) {
4285                         free(rxr->rx_buffers, M_DEVBUF);
4286                         rxr->rx_buffers = NULL;
4287                 }
4288         }
4289
4290         if (rxr->ptag != NULL) {
4291                 bus_dma_tag_destroy(rxr->ptag);
4292                 rxr->ptag = NULL;
4293         }
4294
4295         return;
4296 }
4297
4298 static __inline void
4299 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4300 {
4301                  
4302         /*
4303          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
4304          * should be computed by hardware. Also it should not have VLAN tag in
4305          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
4306          */
4307         if (rxr->lro_enabled &&
4308             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4309             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4310             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4311             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
4312             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4313             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
4314             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4315             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4316                 /*
4317                  * Send to the stack if:
4318                  **  - LRO not enabled, or
4319                  **  - no LRO resources, or
4320                  **  - lro enqueue fails
4321                  */
4322                 if (rxr->lro.lro_cnt != 0)
4323                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4324                                 return;
4325         }
4326         IXGBE_RX_UNLOCK(rxr);
4327         (*ifp->if_input)(ifp, m);
4328         IXGBE_RX_LOCK(rxr);
4329 }
4330
4331 static __inline void
4332 ixgbe_rx_discard(struct rx_ring *rxr, int i)
4333 {
4334         struct ixgbe_rx_buf     *rbuf;
4335
4336         rbuf = &rxr->rx_buffers[i];
4337
4338         if (rbuf->fmp != NULL) {/* Partial chain ? */
4339                 rbuf->fmp->m_flags |= M_PKTHDR;
4340                 m_freem(rbuf->fmp);
4341                 rbuf->fmp = NULL;
4342         }
4343
4344         /*
4345         ** With advanced descriptors the writeback
4346         ** clobbers the buffer addrs, so its easier
4347         ** to just free the existing mbufs and take
4348         ** the normal refresh path to get new buffers
4349         ** and mapping.
4350         */
4351         if (rbuf->buf) {
4352                 m_free(rbuf->buf);
4353                 rbuf->buf = NULL;
4354         }
4355
4356         rbuf->flags = 0;
4357  
4358         return;
4359 }
4360
4361
4362 /*********************************************************************
4363  *
4364  *  This routine executes in interrupt context. It replenishes
4365  *  the mbufs in the descriptor and sends data which has been
4366  *  dma'ed into host memory to upper layer.
4367  *
4368  *  We loop at most count times if count is > 0, or until done if
4369  *  count < 0.
4370  *
4371  *  Return TRUE for more work, FALSE for all clean.
4372  *********************************************************************/
4373 static bool
4374 ixgbe_rxeof(struct ix_queue *que)
4375 {
4376         struct adapter          *adapter = que->adapter;
4377         struct rx_ring          *rxr = que->rxr;
4378         struct ifnet            *ifp = adapter->ifp;
4379         struct lro_ctrl         *lro = &rxr->lro;
4380         struct lro_entry        *queued;
4381         int                     i, nextp, processed = 0;
4382         u32                     staterr = 0;
4383         u16                     count = rxr->process_limit;
4384         union ixgbe_adv_rx_desc *cur;
4385         struct ixgbe_rx_buf     *rbuf, *nbuf;
4386
4387         IXGBE_RX_LOCK(rxr);
4388
4389 #ifdef DEV_NETMAP
4390         /* Same as the txeof routine: wakeup clients on intr. */
4391         if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4392                 return (FALSE);
4393 #endif /* DEV_NETMAP */
4394
4395         for (i = rxr->next_to_check; count != 0;) {
4396                 struct mbuf     *sendmp, *mp;
4397                 u32             rsc, ptype;
4398                 u16             len;
4399                 u16             vtag = 0;
4400                 bool            eop;
4401  
4402                 /* Sync the ring. */
4403                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4404                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4405
4406                 cur = &rxr->rx_base[i];
4407                 staterr = le32toh(cur->wb.upper.status_error);
4408
4409                 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
4410                         break;
4411                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4412                         break;
4413
4414                 count--;
4415                 sendmp = NULL;
4416                 nbuf = NULL;
4417                 rsc = 0;
4418                 cur->wb.upper.status_error = 0;
4419                 rbuf = &rxr->rx_buffers[i];
4420                 mp = rbuf->buf;
4421
4422                 len = le16toh(cur->wb.upper.length);
4423                 ptype = le32toh(cur->wb.lower.lo_dword.data) &
4424                     IXGBE_RXDADV_PKTTYPE_MASK;
4425                 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
4426
4427                 /* Make sure bad packets are discarded */
4428                 if (((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) ||
4429                     (rxr->discard)) {
4430                         rxr->rx_discarded++;
4431                         if (eop)
4432                                 rxr->discard = FALSE;
4433                         else
4434                                 rxr->discard = TRUE;
4435                         ixgbe_rx_discard(rxr, i);
4436                         goto next_desc;
4437                 }
4438
4439                 /*
4440                 ** On 82599 which supports a hardware
4441                 ** LRO (called HW RSC), packets need
4442                 ** not be fragmented across sequential
4443                 ** descriptors, rather the next descriptor
4444                 ** is indicated in bits of the descriptor.
4445                 ** This also means that we might proceses
4446                 ** more than one packet at a time, something
4447                 ** that has never been true before, it
4448                 ** required eliminating global chain pointers
4449                 ** in favor of what we are doing here.  -jfv
4450                 */
4451                 if (!eop) {
4452                         /*
4453                         ** Figure out the next descriptor
4454                         ** of this frame.
4455                         */
4456                         if (rxr->hw_rsc == TRUE) {
4457                                 rsc = ixgbe_rsc_count(cur);
4458                                 rxr->rsc_num += (rsc - 1);
4459                         }
4460                         if (rsc) { /* Get hardware index */
4461                                 nextp = ((staterr &
4462                                     IXGBE_RXDADV_NEXTP_MASK) >>
4463                                     IXGBE_RXDADV_NEXTP_SHIFT);
4464                         } else { /* Just sequential */
4465                                 nextp = i + 1;
4466                                 if (nextp == adapter->num_rx_desc)
4467                                         nextp = 0;
4468                         }
4469                         nbuf = &rxr->rx_buffers[nextp];
4470                         prefetch(nbuf);
4471                 }
4472                 /*
4473                 ** Rather than using the fmp/lmp global pointers
4474                 ** we now keep the head of a packet chain in the
4475                 ** buffer struct and pass this along from one
4476                 ** descriptor to the next, until we get EOP.
4477                 */
4478                 mp->m_len = len;
4479                 /*
4480                 ** See if there is a stored head
4481                 ** that determines what we are
4482                 */
4483                 sendmp = rbuf->fmp;
4484                 if (sendmp != NULL) {  /* secondary frag */
4485                         rbuf->buf = rbuf->fmp = NULL;
4486                         mp->m_flags &= ~M_PKTHDR;
4487                         sendmp->m_pkthdr.len += mp->m_len;
4488                 } else {
4489                         /*
4490                          * Optimize.  This might be a small packet,
4491                          * maybe just a TCP ACK.  Do a fast copy that
4492                          * is cache aligned into a new mbuf, and
4493                          * leave the old mbuf+cluster for re-use.
4494                          */
4495                         if (eop && len <= IXGBE_RX_COPY_LEN) {
4496                                 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
4497                                 if (sendmp != NULL) {
4498                                         sendmp->m_data +=
4499                                             IXGBE_RX_COPY_ALIGN;
4500                                         ixgbe_bcopy(mp->m_data,
4501                                             sendmp->m_data, len);
4502                                         sendmp->m_len = len;
4503                                         rxr->rx_copies++;
4504                                         rbuf->flags |= IXGBE_RX_COPY;
4505                                 }
4506                         }
4507                         if (sendmp == NULL) {
4508                                 rbuf->buf = rbuf->fmp = NULL;
4509                                 sendmp = mp;
4510                         }
4511
4512                         /* first desc of a non-ps chain */
4513                         sendmp->m_flags |= M_PKTHDR;
4514                         sendmp->m_pkthdr.len = mp->m_len;
4515                 }
4516                 ++processed;
4517
4518                 /* Pass the head pointer on */
4519                 if (eop == 0) {
4520                         nbuf->fmp = sendmp;
4521                         sendmp = NULL;
4522                         mp->m_next = nbuf->buf;
4523                 } else { /* Sending this frame */
4524                         sendmp->m_pkthdr.rcvif = ifp;
4525                         ifp->if_ipackets++;
4526                         rxr->rx_packets++;
4527                         /* capture data for AIM */
4528                         rxr->bytes += sendmp->m_pkthdr.len;
4529                         rxr->rx_bytes += sendmp->m_pkthdr.len;
4530                         /* Process vlan info */
4531                         if ((rxr->vtag_strip) &&
4532                             (staterr & IXGBE_RXD_STAT_VP))
4533                                 vtag = le16toh(cur->wb.upper.vlan);
4534                         if (vtag) {
4535                                 sendmp->m_pkthdr.ether_vtag = vtag;
4536                                 sendmp->m_flags |= M_VLANTAG;
4537                         }
4538                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4539                                 ixgbe_rx_checksum(staterr, sendmp, ptype);
4540 #if __FreeBSD_version >= 800000
4541                         sendmp->m_pkthdr.flowid = que->msix;
4542                         sendmp->m_flags |= M_FLOWID;
4543 #endif
4544                 }
4545 next_desc:
4546                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4547                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4548
4549                 /* Advance our pointers to the next descriptor. */
4550                 if (++i == rxr->num_desc)
4551                         i = 0;
4552
4553                 /* Now send to the stack or do LRO */
4554                 if (sendmp != NULL) {
4555                         rxr->next_to_check = i;
4556                         ixgbe_rx_input(rxr, ifp, sendmp, ptype);
4557                         i = rxr->next_to_check;
4558                 }
4559
4560                /* Every 8 descriptors we go to refresh mbufs */
4561                 if (processed == 8) {
4562                         ixgbe_refresh_mbufs(rxr, i);
4563                         processed = 0;
4564                 }
4565         }
4566
4567         /* Refresh any remaining buf structs */
4568         if (ixgbe_rx_unrefreshed(rxr))
4569                 ixgbe_refresh_mbufs(rxr, i);
4570
4571         rxr->next_to_check = i;
4572
4573         /*
4574          * Flush any outstanding LRO work
4575          */
4576         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4577                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4578                 tcp_lro_flush(lro, queued);
4579         }
4580
4581         IXGBE_RX_UNLOCK(rxr);
4582
4583         /*
4584         ** Still have cleaning to do?
4585         */
4586         if ((staterr & IXGBE_RXD_STAT_DD) != 0)
4587                 return (TRUE);
4588         else
4589                 return (FALSE);
4590 }
4591
4592
4593 /*********************************************************************
4594  *
4595  *  Verify that the hardware indicated that the checksum is valid.
4596  *  Inform the stack about the status of checksum so that stack
4597  *  doesn't spend time verifying the checksum.
4598  *
4599  *********************************************************************/
4600 static void
4601 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
4602 {
4603         u16     status = (u16) staterr;
4604         u8      errors = (u8) (staterr >> 24);
4605         bool    sctp = FALSE;
4606
4607         if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4608             (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
4609                 sctp = TRUE;
4610
4611         if (status & IXGBE_RXD_STAT_IPCS) {
4612                 if (!(errors & IXGBE_RXD_ERR_IPE)) {
4613                         /* IP Checksum Good */
4614                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4615                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4616
4617                 } else
4618                         mp->m_pkthdr.csum_flags = 0;
4619         }
4620         if (status & IXGBE_RXD_STAT_L4CS) {
4621                 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4622 #if __FreeBSD_version >= 800000
4623                 if (sctp)
4624                         type = CSUM_SCTP_VALID;
4625 #endif
4626                 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
4627                         mp->m_pkthdr.csum_flags |= type;
4628                         if (!sctp)
4629                                 mp->m_pkthdr.csum_data = htons(0xffff);
4630                 } 
4631         }
4632         return;
4633 }
4634
4635
4636 /*
4637 ** This routine is run via an vlan config EVENT,
4638 ** it enables us to use the HW Filter table since
4639 ** we can get the vlan id. This just creates the
4640 ** entry in the soft version of the VFTA, init will
4641 ** repopulate the real table.
4642 */
4643 static void
4644 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4645 {
4646         struct adapter  *adapter = ifp->if_softc;
4647         u16             index, bit;
4648
4649         if (ifp->if_softc !=  arg)   /* Not our event */
4650                 return;
4651
4652         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4653                 return;
4654
4655         IXGBE_CORE_LOCK(adapter);
4656         index = (vtag >> 5) & 0x7F;
4657         bit = vtag & 0x1F;
4658         adapter->shadow_vfta[index] |= (1 << bit);
4659         ++adapter->num_vlans;
4660         ixgbe_init_locked(adapter);
4661         IXGBE_CORE_UNLOCK(adapter);
4662 }
4663
4664 /*
4665 ** This routine is run via an vlan
4666 ** unconfig EVENT, remove our entry
4667 ** in the soft vfta.
4668 */
4669 static void
4670 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4671 {
4672         struct adapter  *adapter = ifp->if_softc;
4673         u16             index, bit;
4674
4675         if (ifp->if_softc !=  arg)
4676                 return;
4677
4678         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4679                 return;
4680
4681         IXGBE_CORE_LOCK(adapter);
4682         index = (vtag >> 5) & 0x7F;
4683         bit = vtag & 0x1F;
4684         adapter->shadow_vfta[index] &= ~(1 << bit);
4685         --adapter->num_vlans;
4686         /* Re-init to load the changes */
4687         ixgbe_init_locked(adapter);
4688         IXGBE_CORE_UNLOCK(adapter);
4689 }
4690
4691 static void
4692 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
4693 {
4694         struct ifnet    *ifp = adapter->ifp;
4695         struct ixgbe_hw *hw = &adapter->hw;
4696         struct rx_ring  *rxr;
4697         u32             ctrl;
4698
4699
4700         /*
4701         ** We get here thru init_locked, meaning
4702         ** a soft reset, this has already cleared
4703         ** the VFTA and other state, so if there
4704         ** have been no vlan's registered do nothing.
4705         */
4706         if (adapter->num_vlans == 0)
4707                 return;
4708
4709         /*
4710         ** A soft reset zero's out the VFTA, so
4711         ** we need to repopulate it now.
4712         */
4713         for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
4714                 if (adapter->shadow_vfta[i] != 0)
4715                         IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
4716                             adapter->shadow_vfta[i]);
4717
4718         ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4719         /* Enable the Filter Table if enabled */
4720         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
4721                 ctrl &= ~IXGBE_VLNCTRL_CFIEN;
4722                 ctrl |= IXGBE_VLNCTRL_VFE;
4723         }
4724         if (hw->mac.type == ixgbe_mac_82598EB)
4725                 ctrl |= IXGBE_VLNCTRL_VME;
4726         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
4727
4728         /* Setup the queues for vlans */
4729         for (int i = 0; i < adapter->num_queues; i++) {
4730                 rxr = &adapter->rx_rings[i];
4731                 /* On 82599 the VLAN enable is per/queue in RXDCTL */
4732                 if (hw->mac.type != ixgbe_mac_82598EB) {
4733                         ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
4734                         ctrl |= IXGBE_RXDCTL_VME;
4735                         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
4736                 }
4737                 rxr->vtag_strip = TRUE;
4738         }
4739 }
4740
4741 static void
4742 ixgbe_enable_intr(struct adapter *adapter)
4743 {
4744         struct ixgbe_hw *hw = &adapter->hw;
4745         struct ix_queue *que = adapter->queues;
4746         u32             mask, fwsm;
4747
4748         mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
4749         /* Enable Fan Failure detection */
4750         if (hw->device_id == IXGBE_DEV_ID_82598AT)
4751                     mask |= IXGBE_EIMS_GPI_SDP1;
4752
4753         switch (adapter->hw.mac.type) {
4754                 case ixgbe_mac_82599EB:
4755                         mask |= IXGBE_EIMS_ECC;
4756                         mask |= IXGBE_EIMS_GPI_SDP0;
4757                         mask |= IXGBE_EIMS_GPI_SDP1;
4758                         mask |= IXGBE_EIMS_GPI_SDP2;
4759 #ifdef IXGBE_FDIR
4760                         mask |= IXGBE_EIMS_FLOW_DIR;
4761 #endif
4762                         break;
4763                 case ixgbe_mac_X540:
4764                         mask |= IXGBE_EIMS_ECC;
4765                         /* Detect if Thermal Sensor is enabled */
4766                         fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
4767                         if (fwsm & IXGBE_FWSM_TS_ENABLED)
4768                                 mask |= IXGBE_EIMS_TS;
4769 #ifdef IXGBE_FDIR
4770                         mask |= IXGBE_EIMS_FLOW_DIR;
4771 #endif
4772                 /* falls through */
4773                 default:
4774                         break;
4775         }
4776
4777         IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
4778
4779         /* With RSS we use auto clear */
4780         if (adapter->msix_mem) {
4781                 mask = IXGBE_EIMS_ENABLE_MASK;
4782                 /* Don't autoclear Link */
4783                 mask &= ~IXGBE_EIMS_OTHER;
4784                 mask &= ~IXGBE_EIMS_LSC;
4785                 IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
4786         }
4787
4788         /*
4789         ** Now enable all queues, this is done separately to
4790         ** allow for handling the extended (beyond 32) MSIX
4791         ** vectors that can be used by 82599
4792         */
4793         for (int i = 0; i < adapter->num_queues; i++, que++)
4794                 ixgbe_enable_queue(adapter, que->msix);
4795
4796         IXGBE_WRITE_FLUSH(hw);
4797
4798         return;
4799 }
4800
4801 static void
4802 ixgbe_disable_intr(struct adapter *adapter)
4803 {
4804         if (adapter->msix_mem)
4805                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
4806         if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
4807                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
4808         } else {
4809                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
4810                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
4811                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
4812         }
4813         IXGBE_WRITE_FLUSH(&adapter->hw);
4814         return;
4815 }
4816
4817 u16
4818 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
4819 {
4820         u16 value;
4821
4822         value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
4823             reg, 2);
4824
4825         return (value);
4826 }
4827
4828 void
4829 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
4830 {
4831         pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
4832             reg, value, 2);
4833
4834         return;
4835 }
4836
4837 /*
4838 ** Get the width and transaction speed of
4839 ** the slot this adapter is plugged into.
4840 */
4841 static void
4842 ixgbe_get_slot_info(struct ixgbe_hw *hw)
4843 {
4844         device_t                dev = ((struct ixgbe_osdep *)hw->back)->dev;
4845         struct ixgbe_mac_info   *mac = &hw->mac;
4846         u16                     link;
4847         u32                     offset;
4848
4849         /* For most devices simply call the shared code routine */
4850         if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
4851                 ixgbe_get_bus_info(hw);
4852                 goto display;
4853         }
4854
4855         /*
4856         ** For the Quad port adapter we need to parse back
4857         ** up the PCI tree to find the speed of the expansion
4858         ** slot into which this adapter is plugged. A bit more work.
4859         */
4860         dev = device_get_parent(device_get_parent(dev));
4861 #ifdef IXGBE_DEBUG
4862         device_printf(dev, "parent pcib = %x,%x,%x\n",
4863             pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
4864 #endif
4865         dev = device_get_parent(device_get_parent(dev));
4866 #ifdef IXGBE_DEBUG
4867         device_printf(dev, "slot pcib = %x,%x,%x\n",
4868             pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
4869 #endif
4870         /* Now get the PCI Express Capabilities offset */
4871         pci_find_cap(dev, PCIY_EXPRESS, &offset);
4872         /* ...and read the Link Status Register */
4873         link = pci_read_config(dev, offset + PCIER_LINK_STA, 2);
4874         switch (link & IXGBE_PCI_LINK_WIDTH) {
4875         case IXGBE_PCI_LINK_WIDTH_1:
4876                 hw->bus.width = ixgbe_bus_width_pcie_x1;
4877                 break;
4878         case IXGBE_PCI_LINK_WIDTH_2:
4879                 hw->bus.width = ixgbe_bus_width_pcie_x2;
4880                 break;
4881         case IXGBE_PCI_LINK_WIDTH_4:
4882                 hw->bus.width = ixgbe_bus_width_pcie_x4;
4883                 break;
4884         case IXGBE_PCI_LINK_WIDTH_8:
4885                 hw->bus.width = ixgbe_bus_width_pcie_x8;
4886                 break;
4887         default:
4888                 hw->bus.width = ixgbe_bus_width_unknown;
4889                 break;
4890         }
4891
4892         switch (link & IXGBE_PCI_LINK_SPEED) {
4893         case IXGBE_PCI_LINK_SPEED_2500:
4894                 hw->bus.speed = ixgbe_bus_speed_2500;
4895                 break;
4896         case IXGBE_PCI_LINK_SPEED_5000:
4897                 hw->bus.speed = ixgbe_bus_speed_5000;
4898                 break;
4899         case IXGBE_PCI_LINK_SPEED_8000:
4900                 hw->bus.speed = ixgbe_bus_speed_8000;
4901                 break;
4902         default:
4903                 hw->bus.speed = ixgbe_bus_speed_unknown;
4904                 break;
4905         }
4906
4907         mac->ops.set_lan_id(hw);
4908
4909 display:
4910         device_printf(dev,"PCI Express Bus: Speed %s %s\n",
4911             ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s":
4912             (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s":
4913             (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s":"Unknown"),
4914             (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
4915             (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
4916             (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
4917             ("Unknown"));
4918
4919         if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) &&
4920             ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
4921             (hw->bus.speed == ixgbe_bus_speed_2500))) {
4922                 device_printf(dev, "PCI-Express bandwidth available"
4923                     " for this card\n     is not sufficient for"
4924                     " optimal performance.\n");
4925                 device_printf(dev, "For optimal performance a x8 "
4926                     "PCIE, or x4 PCIE Gen2 slot is required.\n");
4927         }
4928         if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) &&
4929             ((hw->bus.width <= ixgbe_bus_width_pcie_x8) &&
4930             (hw->bus.speed < ixgbe_bus_speed_8000))) {
4931                 device_printf(dev, "PCI-Express bandwidth available"
4932                     " for this card\n     is not sufficient for"
4933                     " optimal performance.\n");
4934                 device_printf(dev, "For optimal performance a x8 "
4935                     "PCIE Gen3 slot is required.\n");
4936         }
4937
4938         return;
4939 }
4940
4941
4942 /*
4943 ** Setup the correct IVAR register for a particular MSIX interrupt
4944 **   (yes this is all very magic and confusing :)
4945 **  - entry is the register array entry
4946 **  - vector is the MSIX vector for this queue
4947 **  - type is RX/TX/MISC
4948 */
4949 static void
4950 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
4951 {
4952         struct ixgbe_hw *hw = &adapter->hw;
4953         u32 ivar, index;
4954
4955         vector |= IXGBE_IVAR_ALLOC_VAL;
4956
4957         switch (hw->mac.type) {
4958
4959         case ixgbe_mac_82598EB:
4960                 if (type == -1)
4961                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4962                 else
4963                         entry += (type * 64);
4964                 index = (entry >> 2) & 0x1F;
4965                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4966                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4967                 ivar |= (vector << (8 * (entry & 0x3)));
4968                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
4969                 break;
4970
4971         case ixgbe_mac_82599EB:
4972         case ixgbe_mac_X540:
4973                 if (type == -1) { /* MISC IVAR */
4974                         index = (entry & 1) * 8;
4975                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4976                         ivar &= ~(0xFF << index);
4977                         ivar |= (vector << index);
4978                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4979                 } else {        /* RX/TX IVARS */
4980                         index = (16 * (entry & 1)) + (8 * type);
4981                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4982                         ivar &= ~(0xFF << index);
4983                         ivar |= (vector << index);
4984                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4985                 }
4986
4987         default:
4988                 break;
4989         }
4990 }
4991
4992 static void
4993 ixgbe_configure_ivars(struct adapter *adapter)
4994 {
4995         struct  ix_queue *que = adapter->queues;
4996         u32 newitr;
4997
4998         if (ixgbe_max_interrupt_rate > 0)
4999                 newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
5000         else
5001                 newitr = 0;
5002
5003         for (int i = 0; i < adapter->num_queues; i++, que++) {
5004                 /* First the RX queue entry */
5005                 ixgbe_set_ivar(adapter, i, que->msix, 0);
5006                 /* ... and the TX */
5007                 ixgbe_set_ivar(adapter, i, que->msix, 1);
5008                 /* Set an Initial EITR value */
5009                 IXGBE_WRITE_REG(&adapter->hw,
5010                     IXGBE_EITR(que->msix), newitr);
5011         }
5012
5013         /* For the Link interrupt */
5014         ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
5015 }
5016
5017 /*
5018 ** ixgbe_sfp_probe - called in the local timer to
5019 ** determine if a port had optics inserted.
5020 */  
5021 static bool ixgbe_sfp_probe(struct adapter *adapter)
5022 {
5023         struct ixgbe_hw *hw = &adapter->hw;
5024         device_t        dev = adapter->dev;
5025         bool            result = FALSE;
5026
5027         if ((hw->phy.type == ixgbe_phy_nl) &&
5028             (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
5029                 s32 ret = hw->phy.ops.identify_sfp(hw);
5030                 if (ret)
5031                         goto out;
5032                 ret = hw->phy.ops.reset(hw);
5033                 if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5034                         device_printf(dev,"Unsupported SFP+ module detected!");
5035                         printf(" Reload driver with supported module.\n");
5036                         adapter->sfp_probe = FALSE;
5037                         goto out;
5038                 } else
5039                         device_printf(dev,"SFP+ module detected!\n");
5040                 /* We now have supported optics */
5041                 adapter->sfp_probe = FALSE;
5042                 /* Set the optics type so system reports correctly */
5043                 ixgbe_setup_optics(adapter);
5044                 result = TRUE;
5045         }
5046 out:
5047         return (result);
5048 }
5049
5050 /*
5051 ** Tasklet handler for MSIX Link interrupts
5052 **  - do outside interrupt since it might sleep
5053 */
5054 static void
5055 ixgbe_handle_link(void *context, int pending)
5056 {
5057         struct adapter  *adapter = context;
5058
5059         ixgbe_check_link(&adapter->hw,
5060             &adapter->link_speed, &adapter->link_up, 0);
5061         ixgbe_update_link_status(adapter);
5062 }
5063
5064 /*
5065 ** Tasklet for handling SFP module interrupts
5066 */
5067 static void
5068 ixgbe_handle_mod(void *context, int pending)
5069 {
5070         struct adapter  *adapter = context;
5071         struct ixgbe_hw *hw = &adapter->hw;
5072         device_t        dev = adapter->dev;
5073         u32 err;
5074
5075         err = hw->phy.ops.identify_sfp(hw);
5076         if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5077                 device_printf(dev,
5078                     "Unsupported SFP+ module type was detected.\n");
5079                 return;
5080         }
5081         err = hw->mac.ops.setup_sfp(hw);
5082         if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5083                 device_printf(dev,
5084                     "Setup failure - unsupported SFP+ module type.\n");
5085                 return;
5086         }
5087         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
5088         return;
5089 }
5090
5091
5092 /*
5093 ** Tasklet for handling MSF (multispeed fiber) interrupts
5094 */
5095 static void
5096 ixgbe_handle_msf(void *context, int pending)
5097 {
5098         struct adapter  *adapter = context;
5099         struct ixgbe_hw *hw = &adapter->hw;
5100         u32 autoneg;
5101         bool negotiate;
5102
5103         autoneg = hw->phy.autoneg_advertised;
5104         if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
5105                 hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
5106         if (hw->mac.ops.setup_link)
5107                 hw->mac.ops.setup_link(hw, autoneg, TRUE);
5108         return;
5109 }
5110
5111 #ifdef IXGBE_FDIR
5112 /*
5113 ** Tasklet for reinitializing the Flow Director filter table
5114 */
5115 static void
5116 ixgbe_reinit_fdir(void *context, int pending)
5117 {
5118         struct adapter  *adapter = context;
5119         struct ifnet   *ifp = adapter->ifp;
5120
5121         if (adapter->fdir_reinit != 1) /* Shouldn't happen */
5122                 return;
5123         ixgbe_reinit_fdir_tables_82599(&adapter->hw);
5124         adapter->fdir_reinit = 0;
5125         /* re-enable flow director interrupts */
5126         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
5127         /* Restart the interface */
5128         ifp->if_drv_flags |= IFF_DRV_RUNNING;
5129         return;
5130 }
5131 #endif
5132
5133 /**********************************************************************
5134  *
5135  *  Update the board statistics counters.
5136  *
5137  **********************************************************************/
5138 static void
5139 ixgbe_update_stats_counters(struct adapter *adapter)
5140 {
5141         struct ifnet   *ifp = adapter->ifp;
5142         struct ixgbe_hw *hw = &adapter->hw;
5143         u32  missed_rx = 0, bprc, lxon, lxoff, total;
5144         u64  total_missed_rx = 0;
5145
5146         adapter->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
5147         adapter->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
5148         adapter->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
5149         adapter->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
5150
5151         /*
5152         ** Note: these are for the 8 possible traffic classes,
5153         **       which in current implementation is unused,
5154         **       therefore only 0 should read real data.
5155         */
5156         for (int i = 0; i < 8; i++) {
5157                 u32 mp;
5158                 mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
5159                 /* missed_rx tallies misses for the gprc workaround */
5160                 missed_rx += mp;
5161                 /* global total per queue */
5162                 adapter->stats.mpc[i] += mp;
5163                 /* Running comprehensive total for stats display */
5164                 total_missed_rx += adapter->stats.mpc[i];
5165                 if (hw->mac.type == ixgbe_mac_82598EB) {
5166                         adapter->stats.rnbc[i] +=
5167                             IXGBE_READ_REG(hw, IXGBE_RNBC(i));
5168                         adapter->stats.qbtc[i] +=
5169                             IXGBE_READ_REG(hw, IXGBE_QBTC(i));
5170                         adapter->stats.qbrc[i] +=
5171                             IXGBE_READ_REG(hw, IXGBE_QBRC(i));
5172                         adapter->stats.pxonrxc[i] +=
5173                             IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
5174                 } else
5175                         adapter->stats.pxonrxc[i] +=
5176                             IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
5177                 adapter->stats.pxontxc[i] +=
5178                     IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
5179                 adapter->stats.pxofftxc[i] +=
5180                     IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
5181                 adapter->stats.pxoffrxc[i] +=
5182                     IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
5183                 adapter->stats.pxon2offc[i] +=
5184                     IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
5185         }
5186         for (int i = 0; i < 16; i++) {
5187                 adapter->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
5188                 adapter->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
5189                 adapter->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
5190         }
5191         adapter->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
5192         adapter->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
5193         adapter->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
5194
5195         /* Hardware workaround, gprc counts missed packets */
5196         adapter->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
5197         adapter->stats.gprc -= missed_rx;
5198
5199         if (hw->mac.type != ixgbe_mac_82598EB) {
5200                 adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
5201                     ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
5202                 adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
5203                     ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
5204                 adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
5205                     ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
5206                 adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
5207                 adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
5208         } else {
5209                 adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
5210                 adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
5211                 /* 82598 only has a counter in the high register */
5212                 adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
5213                 adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
5214                 adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
5215         }
5216
5217         /*
5218          * Workaround: mprc hardware is incorrectly counting
5219          * broadcasts, so for now we subtract those.
5220          */
5221         bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
5222         adapter->stats.bprc += bprc;
5223         adapter->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
5224         if (hw->mac.type == ixgbe_mac_82598EB)
5225                 adapter->stats.mprc -= bprc;
5226
5227         adapter->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
5228         adapter->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
5229         adapter->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
5230         adapter->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
5231         adapter->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
5232         adapter->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
5233
5234         lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
5235         adapter->stats.lxontxc += lxon;
5236         lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
5237         adapter->stats.lxofftxc += lxoff;
5238         total = lxon + lxoff;
5239
5240         adapter->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
5241         adapter->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
5242         adapter->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
5243         adapter->stats.gptc -= total;
5244         adapter->stats.mptc -= total;
5245         adapter->stats.ptc64 -= total;
5246         adapter->stats.gotc -= total * ETHER_MIN_LEN;
5247
5248         adapter->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
5249         adapter->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
5250         adapter->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
5251         adapter->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
5252         adapter->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
5253         adapter->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
5254         adapter->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
5255         adapter->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
5256         adapter->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
5257         adapter->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
5258         adapter->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
5259         adapter->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
5260         adapter->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
5261         adapter->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
5262         adapter->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
5263         adapter->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
5264         adapter->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
5265         adapter->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
5266         /* Only read FCOE on 82599 */
5267         if (hw->mac.type != ixgbe_mac_82598EB) {
5268                 adapter->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
5269                 adapter->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
5270                 adapter->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
5271                 adapter->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
5272                 adapter->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
5273         }
5274
5275         /* Fill out the OS statistics structure */
5276         ifp->if_ipackets = adapter->stats.gprc;
5277         ifp->if_opackets = adapter->stats.gptc;
5278         ifp->if_ibytes = adapter->stats.gorc;
5279         ifp->if_obytes = adapter->stats.gotc;
5280         ifp->if_imcasts = adapter->stats.mprc;
5281         ifp->if_omcasts = adapter->stats.mptc;
5282         ifp->if_collisions = 0;
5283
5284         /* Rx Errors */
5285         ifp->if_iqdrops = total_missed_rx;
5286         ifp->if_ierrors = adapter->stats.crcerrs + adapter->stats.rlec;
5287 }
5288
5289 /** ixgbe_sysctl_tdh_handler - Handler function
5290  *  Retrieves the TDH value from the hardware
5291  */
5292 static int 
5293 ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS)
5294 {
5295         int error;
5296
5297         struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
5298         if (!txr) return 0;
5299
5300         unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
5301         error = sysctl_handle_int(oidp, &val, 0, req);
5302         if (error || !req->newptr)
5303                 return error;
5304         return 0;
5305 }
5306
5307 /** ixgbe_sysctl_tdt_handler - Handler function
5308  *  Retrieves the TDT value from the hardware
5309  */
5310 static int 
5311 ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS)
5312 {
5313         int error;
5314
5315         struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
5316         if (!txr) return 0;
5317
5318         unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
5319         error = sysctl_handle_int(oidp, &val, 0, req);
5320         if (error || !req->newptr)
5321                 return error;
5322         return 0;
5323 }
5324
5325 /** ixgbe_sysctl_rdh_handler - Handler function
5326  *  Retrieves the RDH value from the hardware
5327  */
5328 static int 
5329 ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS)
5330 {
5331         int error;
5332
5333         struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
5334         if (!rxr) return 0;
5335
5336         unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
5337         error = sysctl_handle_int(oidp, &val, 0, req);
5338         if (error || !req->newptr)
5339                 return error;
5340         return 0;
5341 }
5342
5343 /** ixgbe_sysctl_rdt_handler - Handler function
5344  *  Retrieves the RDT value from the hardware
5345  */
5346 static int 
5347 ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS)
5348 {
5349         int error;
5350
5351         struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
5352         if (!rxr) return 0;
5353
5354         unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
5355         error = sysctl_handle_int(oidp, &val, 0, req);
5356         if (error || !req->newptr)
5357                 return error;
5358         return 0;
5359 }
5360
5361 static int
5362 ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5363 {
5364         int error;
5365         struct ix_queue *que = ((struct ix_queue *)oidp->oid_arg1);
5366         unsigned int reg, usec, rate;
5367
5368         reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
5369         usec = ((reg & 0x0FF8) >> 3);
5370         if (usec > 0)
5371                 rate = 500000 / usec;
5372         else
5373                 rate = 0;
5374         error = sysctl_handle_int(oidp, &rate, 0, req);
5375         if (error || !req->newptr)
5376                 return error;
5377         reg &= ~0xfff; /* default, no limitation */
5378         ixgbe_max_interrupt_rate = 0;
5379         if (rate > 0 && rate < 500000) {
5380                 if (rate < 1000)
5381                         rate = 1000;
5382                 ixgbe_max_interrupt_rate = rate;
5383                 reg |= ((4000000/rate) & 0xff8 );
5384         }
5385         IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
5386         return 0;
5387 }
5388
5389 /*
5390  * Add sysctl variables, one per statistic, to the system.
5391  */
5392 static void
5393 ixgbe_add_hw_stats(struct adapter *adapter)
5394 {
5395
5396         device_t dev = adapter->dev;
5397
5398         struct tx_ring *txr = adapter->tx_rings;
5399         struct rx_ring *rxr = adapter->rx_rings;
5400
5401         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5402         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5403         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5404         struct ixgbe_hw_stats *stats = &adapter->stats;
5405
5406         struct sysctl_oid *stat_node, *queue_node;
5407         struct sysctl_oid_list *stat_list, *queue_list;
5408
5409 #define QUEUE_NAME_LEN 32
5410         char namebuf[QUEUE_NAME_LEN];
5411
5412         /* Driver Statistics */
5413         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5414                         CTLFLAG_RD, &adapter->dropped_pkts,
5415                         "Driver dropped packets");
5416         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_failed",
5417                         CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5418                         "m_defrag() failed");
5419         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events",
5420                         CTLFLAG_RD, &adapter->watchdog_events,
5421                         "Watchdog timeouts");
5422         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5423                         CTLFLAG_RD, &adapter->link_irq,
5424                         "Link MSIX IRQ Handled");
5425
5426         for (int i = 0; i < adapter->num_queues; i++, txr++) {
5427                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5428                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5429                                             CTLFLAG_RD, NULL, "Queue Name");
5430                 queue_list = SYSCTL_CHILDREN(queue_node);
5431
5432                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5433                                 CTLTYPE_UINT | CTLFLAG_RW, &adapter->queues[i],
5434                                 sizeof(&adapter->queues[i]),
5435                                 ixgbe_sysctl_interrupt_rate_handler, "IU",
5436                                 "Interrupt Rate");
5437                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs",
5438                                 CTLFLAG_RD, &(adapter->queues[i].irqs),
5439                                 "irqs on this queue");
5440                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5441                                 CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
5442                                 ixgbe_sysctl_tdh_handler, "IU",
5443                                 "Transmit Descriptor Head");
5444                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5445                                 CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
5446                                 ixgbe_sysctl_tdt_handler, "IU",
5447                                 "Transmit Descriptor Tail");
5448                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tso_tx",
5449                                 CTLFLAG_RD, &txr->tso_tx,
5450                                 "TSO");
5451                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_tx_dma_setup",
5452                                 CTLFLAG_RD, &txr->no_tx_dma_setup,
5453                                 "Driver tx dma failure in xmit");
5454                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5455                                 CTLFLAG_RD, &txr->no_desc_avail,
5456                                 "Queue No Descriptor Available");
5457                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5458                                 CTLFLAG_RD, &txr->total_packets,
5459                                 "Queue Packets Transmitted");
5460         }
5461
5462         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
5463                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5464                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
5465                                             CTLFLAG_RD, NULL, "Queue Name");
5466                 queue_list = SYSCTL_CHILDREN(queue_node);
5467
5468                 struct lro_ctrl *lro = &rxr->lro;
5469
5470                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5471                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
5472                                             CTLFLAG_RD, NULL, "Queue Name");
5473                 queue_list = SYSCTL_CHILDREN(queue_node);
5474
5475                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5476                                 CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
5477                                 ixgbe_sysctl_rdh_handler, "IU",
5478                                 "Receive Descriptor Head");
5479                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5480                                 CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
5481                                 ixgbe_sysctl_rdt_handler, "IU",
5482                                 "Receive Descriptor Tail");
5483                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5484                                 CTLFLAG_RD, &rxr->rx_packets,
5485                                 "Queue Packets Received");
5486                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5487                                 CTLFLAG_RD, &rxr->rx_bytes,
5488                                 "Queue Bytes Received");
5489                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_copies",
5490                                 CTLFLAG_RD, &rxr->rx_copies,
5491                                 "Copied RX Frames");
5492                 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
5493                                 CTLFLAG_RD, &lro->lro_queued, 0,
5494                                 "LRO Queued");
5495                 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
5496                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5497                                 "LRO Flushed");
5498         }
5499
5500         /* MAC stats get the own sub node */
5501
5502         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5503                                     CTLFLAG_RD, NULL, "MAC Statistics");
5504         stat_list = SYSCTL_CHILDREN(stat_node);
5505
5506         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5507                         CTLFLAG_RD, &stats->crcerrs,
5508                         "CRC Errors");
5509         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
5510                         CTLFLAG_RD, &stats->illerrc,
5511                         "Illegal Byte Errors");
5512         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
5513                         CTLFLAG_RD, &stats->errbc,
5514                         "Byte Errors");
5515         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
5516                         CTLFLAG_RD, &stats->mspdc,
5517                         "MAC Short Packets Discarded");
5518         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
5519                         CTLFLAG_RD, &stats->mlfc,
5520                         "MAC Local Faults");
5521         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
5522                         CTLFLAG_RD, &stats->mrfc,
5523                         "MAC Remote Faults");
5524         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
5525                         CTLFLAG_RD, &stats->rlec,
5526                         "Receive Length Errors");
5527
5528         /* Flow Control stats */
5529         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5530                         CTLFLAG_RD, &stats->lxontxc,
5531                         "Link XON Transmitted");
5532         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5533                         CTLFLAG_RD, &stats->lxonrxc,
5534                         "Link XON Received");
5535         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5536                         CTLFLAG_RD, &stats->lxofftxc,
5537                         "Link XOFF Transmitted");
5538         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5539                         CTLFLAG_RD, &stats->lxoffrxc,
5540                         "Link XOFF Received");
5541
5542         /* Packet Reception Stats */
5543         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
5544                         CTLFLAG_RD, &stats->tor, 
5545                         "Total Octets Received"); 
5546         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
5547                         CTLFLAG_RD, &stats->gorc, 
5548                         "Good Octets Received"); 
5549         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
5550                         CTLFLAG_RD, &stats->tpr,
5551                         "Total Packets Received");
5552         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
5553                         CTLFLAG_RD, &stats->gprc,
5554                         "Good Packets Received");
5555         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
5556                         CTLFLAG_RD, &stats->mprc,
5557                         "Multicast Packets Received");
5558         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
5559                         CTLFLAG_RD, &stats->bprc,
5560                         "Broadcast Packets Received");
5561         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5562                         CTLFLAG_RD, &stats->prc64,
5563                         "64 byte frames received ");
5564         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5565                         CTLFLAG_RD, &stats->prc127,
5566                         "65-127 byte frames received");
5567         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5568                         CTLFLAG_RD, &stats->prc255,
5569                         "128-255 byte frames received");
5570         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5571                         CTLFLAG_RD, &stats->prc511,
5572                         "256-511 byte frames received");
5573         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5574                         CTLFLAG_RD, &stats->prc1023,
5575                         "512-1023 byte frames received");
5576         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5577                         CTLFLAG_RD, &stats->prc1522,
5578                         "1023-1522 byte frames received");
5579         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
5580                         CTLFLAG_RD, &stats->ruc,
5581                         "Receive Undersized");
5582         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5583                         CTLFLAG_RD, &stats->rfc,
5584                         "Fragmented Packets Received ");
5585         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
5586                         CTLFLAG_RD, &stats->roc,
5587                         "Oversized Packets Received");
5588         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
5589                         CTLFLAG_RD, &stats->rjc,
5590                         "Received Jabber");
5591         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
5592                         CTLFLAG_RD, &stats->mngprc,
5593                         "Management Packets Received");
5594         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
5595                         CTLFLAG_RD, &stats->mngptc,
5596                         "Management Packets Dropped");
5597         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
5598                         CTLFLAG_RD, &stats->xec,
5599                         "Checksum Errors");
5600
5601         /* Packet Transmission Stats */
5602         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5603                         CTLFLAG_RD, &stats->gotc, 
5604                         "Good Octets Transmitted"); 
5605         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5606                         CTLFLAG_RD, &stats->tpt,
5607                         "Total Packets Transmitted");
5608         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5609                         CTLFLAG_RD, &stats->gptc,
5610                         "Good Packets Transmitted");
5611         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5612                         CTLFLAG_RD, &stats->bptc,
5613                         "Broadcast Packets Transmitted");
5614         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5615                         CTLFLAG_RD, &stats->mptc,
5616                         "Multicast Packets Transmitted");
5617         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
5618                         CTLFLAG_RD, &stats->mngptc,
5619                         "Management Packets Transmitted");
5620         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5621                         CTLFLAG_RD, &stats->ptc64,
5622                         "64 byte frames transmitted ");
5623         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5624                         CTLFLAG_RD, &stats->ptc127,
5625                         "65-127 byte frames transmitted");
5626         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5627                         CTLFLAG_RD, &stats->ptc255,
5628                         "128-255 byte frames transmitted");
5629         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5630                         CTLFLAG_RD, &stats->ptc511,
5631                         "256-511 byte frames transmitted");
5632         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5633                         CTLFLAG_RD, &stats->ptc1023,
5634                         "512-1023 byte frames transmitted");
5635         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5636                         CTLFLAG_RD, &stats->ptc1522,
5637                         "1024-1522 byte frames transmitted");
5638 }
5639
5640 /*
5641 ** Set flow control using sysctl:
5642 ** Flow control values:
5643 **      0 - off
5644 **      1 - rx pause
5645 **      2 - tx pause
5646 **      3 - full
5647 */
5648 static int
5649 ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS)
5650 {
5651         int error, last;
5652         struct adapter *adapter = (struct adapter *) arg1;
5653
5654         last = adapter->fc;
5655         error = sysctl_handle_int(oidp, &adapter->fc, 0, req);
5656         if ((error) || (req->newptr == NULL))
5657                 return (error);
5658
5659         /* Don't bother if it's not changed */
5660         if (adapter->fc == last)
5661                 return (0);
5662
5663         switch (adapter->fc) {
5664                 case ixgbe_fc_rx_pause:
5665                 case ixgbe_fc_tx_pause:
5666                 case ixgbe_fc_full:
5667                         adapter->hw.fc.requested_mode = adapter->fc;
5668                         if (adapter->num_queues > 1)
5669                                 ixgbe_disable_rx_drop(adapter);
5670                         break;
5671                 case ixgbe_fc_none:
5672                         adapter->hw.fc.requested_mode = ixgbe_fc_none;
5673                         if (adapter->num_queues > 1)
5674                                 ixgbe_enable_rx_drop(adapter);
5675                         break;
5676                 default:
5677                         adapter->fc = last;
5678                         return (EINVAL);
5679         }
5680         /* Don't autoneg if forcing a value */
5681         adapter->hw.fc.disable_fc_autoneg = TRUE;
5682         ixgbe_fc_enable(&adapter->hw);
5683         return error;
5684 }
5685
5686 /*
5687 ** Control link advertise speed:
5688 **      1 - advertise only 1G
5689 **      2 - advertise 100Mb
5690 **      3 - advertise normal
5691 */
5692 static int
5693 ixgbe_set_advertise(SYSCTL_HANDLER_ARGS)
5694 {
5695         int                     error = 0;
5696         struct adapter          *adapter;
5697         device_t                dev;
5698         struct ixgbe_hw         *hw;
5699         ixgbe_link_speed        speed, last;
5700
5701         adapter = (struct adapter *) arg1;
5702         dev = adapter->dev;
5703         hw = &adapter->hw;
5704         last = adapter->advertise;
5705
5706         error = sysctl_handle_int(oidp, &adapter->advertise, 0, req);
5707         if ((error) || (req->newptr == NULL))
5708                 return (error);
5709
5710         if (adapter->advertise == last) /* no change */
5711                 return (0);
5712
5713         if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
5714             (hw->phy.multispeed_fiber)))
5715                 return (EINVAL);
5716
5717         if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
5718                 device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
5719                 return (EINVAL);
5720         }
5721
5722         if (adapter->advertise == 1)
5723                 speed = IXGBE_LINK_SPEED_1GB_FULL;
5724         else if (adapter->advertise == 2)
5725                 speed = IXGBE_LINK_SPEED_100_FULL;
5726         else if (adapter->advertise == 3)
5727                 speed = IXGBE_LINK_SPEED_1GB_FULL |
5728                         IXGBE_LINK_SPEED_10GB_FULL;
5729         else {  /* bogus value */
5730                 adapter->advertise = last;
5731                 return (EINVAL);
5732         }
5733
5734         hw->mac.autotry_restart = TRUE;
5735         hw->mac.ops.setup_link(hw, speed, TRUE);
5736
5737         return (error);
5738 }
5739
5740 /*
5741 ** Thermal Shutdown Trigger
5742 **   - cause a Thermal Overtemp IRQ
5743 **   - this now requires firmware enabling
5744 */
5745 static int
5746 ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS)
5747 {
5748         int             error, fire = 0;
5749         struct adapter  *adapter = (struct adapter *) arg1;
5750         struct ixgbe_hw *hw = &adapter->hw;
5751
5752
5753         if (hw->mac.type != ixgbe_mac_X540)
5754                 return (0);
5755
5756         error = sysctl_handle_int(oidp, &fire, 0, req);
5757         if ((error) || (req->newptr == NULL))
5758                 return (error);
5759
5760         if (fire) {
5761                 u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
5762                 reg |= IXGBE_EICR_TS;
5763                 IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
5764         }
5765
5766         return (0);
5767 }
5768
5769 /*
5770 ** Enable the hardware to drop packets when the buffer is
5771 ** full. This is useful when multiqueue,so that no single
5772 ** queue being full stalls the entire RX engine. We only
5773 ** enable this when Multiqueue AND when Flow Control is 
5774 ** disabled.
5775 */
5776 static void
5777 ixgbe_enable_rx_drop(struct adapter *adapter)
5778 {
5779         struct ixgbe_hw *hw = &adapter->hw;
5780
5781         for (int i = 0; i < adapter->num_queues; i++) {
5782                 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
5783                 srrctl |= IXGBE_SRRCTL_DROP_EN;
5784                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
5785         }
5786 }
5787
5788 static void
5789 ixgbe_disable_rx_drop(struct adapter *adapter)
5790 {
5791         struct ixgbe_hw *hw = &adapter->hw;
5792
5793         for (int i = 0; i < adapter->num_queues; i++) {
5794                 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
5795                 srrctl &= ~IXGBE_SRRCTL_DROP_EN;
5796                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
5797         }
5798 }