]> CyberLeo.Net >> Repos - FreeBSD/releng/9.2.git/blob - sys/dev/ixgbe/ixgbe.c
MFC r253865: Fixes to RX_COPY optimization code allowing the removal of rearm_queues
[FreeBSD/releng/9.2.git] / sys / dev / ixgbe / ixgbe.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2013, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38 #include "ixgbe.h"
39
40 /*********************************************************************
41  *  Set this to one to display debug statistics
42  *********************************************************************/
43 int             ixgbe_display_debug_stats = 0;
44
45 /*********************************************************************
46  *  Driver version
47  *********************************************************************/
48 char ixgbe_driver_version[] = "2.5.15";
49
50 /*********************************************************************
51  *  PCI Device ID Table
52  *
53  *  Used by probe to select devices to load on
54  *  Last field stores an index into ixgbe_strings
55  *  Last entry must be all 0s
56  *
57  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
58  *********************************************************************/
59
60 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
61 {
62         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
63         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
64         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
65         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
66         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
67         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
68         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
69         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
70         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
71         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
72         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
73         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
74         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
75         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
76         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
77         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
78         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
79         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
80         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
81         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
82         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
83         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
84         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0},
85         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
86         /* required last entry */
87         {0, 0, 0, 0, 0}
88 };
89
90 /*********************************************************************
91  *  Table of branding strings
92  *********************************************************************/
93
94 static char    *ixgbe_strings[] = {
95         "Intel(R) PRO/10GbE PCI-Express Network Driver"
96 };
97
98 /*********************************************************************
99  *  Function prototypes
100  *********************************************************************/
101 static int      ixgbe_probe(device_t);
102 static int      ixgbe_attach(device_t);
103 static int      ixgbe_detach(device_t);
104 static int      ixgbe_shutdown(device_t);
105 #ifdef IXGBE_LEGACY_TX
106 static void     ixgbe_start(struct ifnet *);
107 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
108 #else /* ! IXGBE_LEGACY_TX */
109 static int      ixgbe_mq_start(struct ifnet *, struct mbuf *);
110 static int      ixgbe_mq_start_locked(struct ifnet *, struct tx_ring *);
111 static void     ixgbe_qflush(struct ifnet *);
112 static void     ixgbe_deferred_mq_start(void *, int);
113 #endif /* IXGBE_LEGACY_TX */
114 static int      ixgbe_ioctl(struct ifnet *, u_long, caddr_t);
115 static void     ixgbe_init(void *);
116 static void     ixgbe_init_locked(struct adapter *);
117 static void     ixgbe_stop(void *);
118 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
119 static int      ixgbe_media_change(struct ifnet *);
120 static void     ixgbe_identify_hardware(struct adapter *);
121 static int      ixgbe_allocate_pci_resources(struct adapter *);
122 static void     ixgbe_get_slot_info(struct ixgbe_hw *);
123 static int      ixgbe_allocate_msix(struct adapter *);
124 static int      ixgbe_allocate_legacy(struct adapter *);
125 static int      ixgbe_allocate_queues(struct adapter *);
126 static int      ixgbe_setup_msix(struct adapter *);
127 static void     ixgbe_free_pci_resources(struct adapter *);
128 static void     ixgbe_local_timer(void *);
129 static int      ixgbe_setup_interface(device_t, struct adapter *);
130 static void     ixgbe_config_link(struct adapter *);
131
132 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
133 static int      ixgbe_setup_transmit_structures(struct adapter *);
134 static void     ixgbe_setup_transmit_ring(struct tx_ring *);
135 static void     ixgbe_initialize_transmit_units(struct adapter *);
136 static void     ixgbe_free_transmit_structures(struct adapter *);
137 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
138
139 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
140 static int      ixgbe_setup_receive_structures(struct adapter *);
141 static int      ixgbe_setup_receive_ring(struct rx_ring *);
142 static void     ixgbe_initialize_receive_units(struct adapter *);
143 static void     ixgbe_free_receive_structures(struct adapter *);
144 static void     ixgbe_free_receive_buffers(struct rx_ring *);
145 static void     ixgbe_setup_hw_rsc(struct rx_ring *);
146
147 static void     ixgbe_enable_intr(struct adapter *);
148 static void     ixgbe_disable_intr(struct adapter *);
149 static void     ixgbe_update_stats_counters(struct adapter *);
150 static void     ixgbe_txeof(struct tx_ring *);
151 static bool     ixgbe_rxeof(struct ix_queue *);
152 static void     ixgbe_rx_checksum(u32, struct mbuf *, u32);
153 static void     ixgbe_set_promisc(struct adapter *);
154 static void     ixgbe_set_multi(struct adapter *);
155 static void     ixgbe_update_link_status(struct adapter *);
156 static void     ixgbe_refresh_mbufs(struct rx_ring *, int);
157 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
158 static int      ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS);
159 static int      ixgbe_set_advertise(SYSCTL_HANDLER_ARGS);
160 static int      ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS);
161 static int      ixgbe_dma_malloc(struct adapter *, bus_size_t,
162                     struct ixgbe_dma_alloc *, int);
163 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
164 static int      ixgbe_tx_ctx_setup(struct tx_ring *,
165                     struct mbuf *, u32 *, u32 *);
166 static int      ixgbe_tso_setup(struct tx_ring *,
167                     struct mbuf *, u32 *, u32 *);
168 static void     ixgbe_set_ivar(struct adapter *, u8, u8, s8);
169 static void     ixgbe_configure_ivars(struct adapter *);
170 static u8 *     ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
171
172 static void     ixgbe_setup_vlan_hw_support(struct adapter *);
173 static void     ixgbe_register_vlan(void *, struct ifnet *, u16);
174 static void     ixgbe_unregister_vlan(void *, struct ifnet *, u16);
175
176 static void     ixgbe_add_hw_stats(struct adapter *adapter);
177
178 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
179 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
180                     struct mbuf *, u32);
181
182 static void     ixgbe_enable_rx_drop(struct adapter *);
183 static void     ixgbe_disable_rx_drop(struct adapter *);
184
185 /* Support for pluggable optic modules */
186 static bool     ixgbe_sfp_probe(struct adapter *);
187 static void     ixgbe_setup_optics(struct adapter *);
188
189 /* Legacy (single vector interrupt handler */
190 static void     ixgbe_legacy_irq(void *);
191
192 /* The MSI/X Interrupt handlers */
193 static void     ixgbe_msix_que(void *);
194 static void     ixgbe_msix_link(void *);
195
196 /* Deferred interrupt tasklets */
197 static void     ixgbe_handle_que(void *, int);
198 static void     ixgbe_handle_link(void *, int);
199 static void     ixgbe_handle_msf(void *, int);
200 static void     ixgbe_handle_mod(void *, int);
201
202 #ifdef IXGBE_FDIR
203 static void     ixgbe_atr(struct tx_ring *, struct mbuf *);
204 static void     ixgbe_reinit_fdir(void *, int);
205 #endif
206
207 /* Missing shared code prototype */
208 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw);
209
210 /*********************************************************************
211  *  FreeBSD Device Interface Entry Points
212  *********************************************************************/
213
214 static device_method_t ixgbe_methods[] = {
215         /* Device interface */
216         DEVMETHOD(device_probe, ixgbe_probe),
217         DEVMETHOD(device_attach, ixgbe_attach),
218         DEVMETHOD(device_detach, ixgbe_detach),
219         DEVMETHOD(device_shutdown, ixgbe_shutdown),
220         DEVMETHOD_END
221 };
222
223 static driver_t ixgbe_driver = {
224         "ix", ixgbe_methods, sizeof(struct adapter),
225 };
226
227 devclass_t ixgbe_devclass;
228 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
229
230 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
231 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
232
233 /*
234 ** TUNEABLE PARAMETERS:
235 */
236
237 /*
238 ** AIM: Adaptive Interrupt Moderation
239 ** which means that the interrupt rate
240 ** is varied over time based on the
241 ** traffic for that interrupt vector
242 */
243 static int ixgbe_enable_aim = TRUE;
244 TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
245
246 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
247 TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
248
249 /* How many packets rxeof tries to clean at a time */
250 static int ixgbe_rx_process_limit = 256;
251 TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
252
253 /* How many packets txeof tries to clean at a time */
254 static int ixgbe_tx_process_limit = 256;
255 TUNABLE_INT("hw.ixgbe.tx_process_limit", &ixgbe_tx_process_limit);
256
257 /*
258 ** Smart speed setting, default to on
259 ** this only works as a compile option
260 ** right now as its during attach, set
261 ** this to 'ixgbe_smart_speed_off' to
262 ** disable.
263 */
264 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
265
266 /*
267  * MSIX should be the default for best performance,
268  * but this allows it to be forced off for testing.
269  */
270 static int ixgbe_enable_msix = 1;
271 TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
272
273 /*
274  * Number of Queues, can be set to 0,
275  * it then autoconfigures based on the
276  * number of cpus with a max of 8. This
277  * can be overriden manually here.
278  */
279 static int ixgbe_num_queues = 0;
280 TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
281
282 /*
283 ** Number of TX descriptors per ring,
284 ** setting higher than RX as this seems
285 ** the better performing choice.
286 */
287 static int ixgbe_txd = PERFORM_TXD;
288 TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd);
289
290 /* Number of RX descriptors per ring */
291 static int ixgbe_rxd = PERFORM_RXD;
292 TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd);
293
294 /*
295 ** Defining this on will allow the use
296 ** of unsupported SFP+ modules, note that
297 ** doing so you are on your own :)
298 */
299 static int allow_unsupported_sfp = FALSE;
300 TUNABLE_INT("hw.ixgbe.unsupported_sfp", &allow_unsupported_sfp);
301
302 /*
303 ** HW RSC control: 
304 **  this feature only works with
305 **  IPv4, and only on 82599 and later.
306 **  Also this will cause IP forwarding to
307 **  fail and that can't be controlled by
308 **  the stack as LRO can. For all these
309 **  reasons I've deemed it best to leave
310 **  this off and not bother with a tuneable
311 **  interface, this would need to be compiled
312 **  to enable.
313 */
314 static bool ixgbe_rsc_enable = FALSE;
315
316 /* Keep running tab on them for sanity check */
317 static int ixgbe_total_ports;
318
319 #ifdef IXGBE_FDIR
320 /*
321 ** For Flow Director: this is the
322 ** number of TX packets we sample
323 ** for the filter pool, this means
324 ** every 20th packet will be probed.
325 **
326 ** This feature can be disabled by 
327 ** setting this to 0.
328 */
329 static int atr_sample_rate = 20;
330 /* 
331 ** Flow Director actually 'steals'
332 ** part of the packet buffer as its
333 ** filter pool, this variable controls
334 ** how much it uses:
335 **  0 = 64K, 1 = 128K, 2 = 256K
336 */
337 static int fdir_pballoc = 1;
338 #endif
339
340 #ifdef DEV_NETMAP
341 /*
342  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
343  * be a reference on how to implement netmap support in a driver.
344  * Additional comments are in ixgbe_netmap.h .
345  *
346  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
347  * that extend the standard driver.
348  */
349 #include <dev/netmap/ixgbe_netmap.h>
350 #endif /* DEV_NETMAP */
351
352 /*********************************************************************
353  *  Device identification routine
354  *
355  *  ixgbe_probe determines if the driver should be loaded on
356  *  adapter based on PCI vendor/device id of the adapter.
357  *
358  *  return BUS_PROBE_DEFAULT on success, positive on failure
359  *********************************************************************/
360
361 static int
362 ixgbe_probe(device_t dev)
363 {
364         ixgbe_vendor_info_t *ent;
365
366         u16     pci_vendor_id = 0;
367         u16     pci_device_id = 0;
368         u16     pci_subvendor_id = 0;
369         u16     pci_subdevice_id = 0;
370         char    adapter_name[256];
371
372         INIT_DEBUGOUT("ixgbe_probe: begin");
373
374         pci_vendor_id = pci_get_vendor(dev);
375         if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID)
376                 return (ENXIO);
377
378         pci_device_id = pci_get_device(dev);
379         pci_subvendor_id = pci_get_subvendor(dev);
380         pci_subdevice_id = pci_get_subdevice(dev);
381
382         ent = ixgbe_vendor_info_array;
383         while (ent->vendor_id != 0) {
384                 if ((pci_vendor_id == ent->vendor_id) &&
385                     (pci_device_id == ent->device_id) &&
386
387                     ((pci_subvendor_id == ent->subvendor_id) ||
388                      (ent->subvendor_id == 0)) &&
389
390                     ((pci_subdevice_id == ent->subdevice_id) ||
391                      (ent->subdevice_id == 0))) {
392                         sprintf(adapter_name, "%s, Version - %s",
393                                 ixgbe_strings[ent->index],
394                                 ixgbe_driver_version);
395                         device_set_desc_copy(dev, adapter_name);
396                         ++ixgbe_total_ports;
397                         return (BUS_PROBE_DEFAULT);
398                 }
399                 ent++;
400         }
401         return (ENXIO);
402 }
403
404 /*********************************************************************
405  *  Device initialization routine
406  *
407  *  The attach entry point is called when the driver is being loaded.
408  *  This routine identifies the type of hardware, allocates all resources
409  *  and initializes the hardware.
410  *
411  *  return 0 on success, positive on failure
412  *********************************************************************/
413
414 static int
415 ixgbe_attach(device_t dev)
416 {
417         struct adapter *adapter;
418         struct ixgbe_hw *hw;
419         int             error = 0;
420         u16             csum;
421         u32             ctrl_ext;
422
423         INIT_DEBUGOUT("ixgbe_attach: begin");
424
425         /* Allocate, clear, and link in our adapter structure */
426         adapter = device_get_softc(dev);
427         adapter->dev = adapter->osdep.dev = dev;
428         hw = &adapter->hw;
429
430         /* Core Lock Init*/
431         IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
432
433         /* SYSCTL APIs */
434
435         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
436                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
437                         OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW,
438                         adapter, 0, ixgbe_set_flowcntl, "I", "Flow Control");
439
440         SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
441                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
442                         OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
443                         &ixgbe_enable_aim, 1, "Interrupt Moderation");
444
445         /*
446         ** Allow a kind of speed control by forcing the autoneg
447         ** advertised speed list to only a certain value, this
448         ** supports 1G on 82599 devices, and 100Mb on x540.
449         */
450         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
451                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
452                         OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW,
453                         adapter, 0, ixgbe_set_advertise, "I", "Link Speed");
454
455         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
456                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
457                         OID_AUTO, "ts", CTLTYPE_INT | CTLFLAG_RW, adapter,
458                         0, ixgbe_set_thermal_test, "I", "Thermal Test");
459
460         /* Set up the timer callout */
461         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
462
463         /* Determine hardware revision */
464         ixgbe_identify_hardware(adapter);
465
466         /* Do base PCI setup - map BAR0 */
467         if (ixgbe_allocate_pci_resources(adapter)) {
468                 device_printf(dev, "Allocation of PCI resources failed\n");
469                 error = ENXIO;
470                 goto err_out;
471         }
472
473         /* Do descriptor calc and sanity checks */
474         if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
475             ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
476                 device_printf(dev, "TXD config issue, using default!\n");
477                 adapter->num_tx_desc = DEFAULT_TXD;
478         } else
479                 adapter->num_tx_desc = ixgbe_txd;
480
481         /*
482         ** With many RX rings it is easy to exceed the
483         ** system mbuf allocation. Tuning nmbclusters
484         ** can alleviate this.
485         */
486         if (nmbclusters > 0 ) {
487                 int s;
488                 s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
489                 if (s > nmbclusters) {
490                         device_printf(dev, "RX Descriptors exceed "
491                             "system mbuf max, using default instead!\n");
492                         ixgbe_rxd = DEFAULT_RXD;
493                 }
494         }
495
496         if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
497             ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) {
498                 device_printf(dev, "RXD config issue, using default!\n");
499                 adapter->num_rx_desc = DEFAULT_RXD;
500         } else
501                 adapter->num_rx_desc = ixgbe_rxd;
502
503         /* Allocate our TX/RX Queues */
504         if (ixgbe_allocate_queues(adapter)) {
505                 error = ENOMEM;
506                 goto err_out;
507         }
508
509         /* Allocate multicast array memory. */
510         adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
511             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
512         if (adapter->mta == NULL) {
513                 device_printf(dev, "Can not allocate multicast setup array\n");
514                 error = ENOMEM;
515                 goto err_late;
516         }
517
518         /* Initialize the shared code */
519         hw->allow_unsupported_sfp = allow_unsupported_sfp;
520         error = ixgbe_init_shared_code(hw);
521         if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
522                 /*
523                 ** No optics in this port, set up
524                 ** so the timer routine will probe 
525                 ** for later insertion.
526                 */
527                 adapter->sfp_probe = TRUE;
528                 error = 0;
529         } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
530                 device_printf(dev,"Unsupported SFP+ module detected!\n");
531                 error = EIO;
532                 goto err_late;
533         } else if (error) {
534                 device_printf(dev,"Unable to initialize the shared code\n");
535                 error = EIO;
536                 goto err_late;
537         }
538
539         /* Make sure we have a good EEPROM before we read from it */
540         if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
541                 device_printf(dev,"The EEPROM Checksum Is Not Valid\n");
542                 error = EIO;
543                 goto err_late;
544         }
545
546         error = ixgbe_init_hw(hw);
547         switch (error) {
548         case IXGBE_ERR_EEPROM_VERSION:
549                 device_printf(dev, "This device is a pre-production adapter/"
550                     "LOM.  Please be aware there may be issues associated "
551                     "with your hardware.\n If you are experiencing problems "
552                     "please contact your Intel or hardware representative "
553                     "who provided you with this hardware.\n");
554                 break;
555         case IXGBE_ERR_SFP_NOT_SUPPORTED:
556                 device_printf(dev,"Unsupported SFP+ Module\n");
557                 error = EIO;
558                 goto err_late;
559         case IXGBE_ERR_SFP_NOT_PRESENT:
560                 device_printf(dev,"No SFP+ Module found\n");
561                 /* falls thru */
562         default:
563                 break;
564         }
565
566         /* Detect and set physical type */
567         ixgbe_setup_optics(adapter);
568
569         if ((adapter->msix > 1) && (ixgbe_enable_msix))
570                 error = ixgbe_allocate_msix(adapter); 
571         else
572                 error = ixgbe_allocate_legacy(adapter); 
573         if (error) 
574                 goto err_late;
575
576         /* Setup OS specific network interface */
577         if (ixgbe_setup_interface(dev, adapter) != 0)
578                 goto err_late;
579
580         /* Initialize statistics */
581         ixgbe_update_stats_counters(adapter);
582
583         /* Register for VLAN events */
584         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
585             ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
586         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
587             ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
588
589         /*
590         ** Check PCIE slot type/speed/width
591         */
592         ixgbe_get_slot_info(hw);
593
594         /* Set an initial default flow control value */
595         adapter->fc =  ixgbe_fc_full;
596
597         /* let hardware know driver is loaded */
598         ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
599         ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
600         IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
601
602         ixgbe_add_hw_stats(adapter);
603
604 #ifdef DEV_NETMAP
605         ixgbe_netmap_attach(adapter);
606 #endif /* DEV_NETMAP */
607         INIT_DEBUGOUT("ixgbe_attach: end");
608         return (0);
609 err_late:
610         ixgbe_free_transmit_structures(adapter);
611         ixgbe_free_receive_structures(adapter);
612 err_out:
613         if (adapter->ifp != NULL)
614                 if_free(adapter->ifp);
615         ixgbe_free_pci_resources(adapter);
616         free(adapter->mta, M_DEVBUF);
617         return (error);
618
619 }
620
621 /*********************************************************************
622  *  Device removal routine
623  *
624  *  The detach entry point is called when the driver is being removed.
625  *  This routine stops the adapter and deallocates all the resources
626  *  that were allocated for driver operation.
627  *
628  *  return 0 on success, positive on failure
629  *********************************************************************/
630
631 static int
632 ixgbe_detach(device_t dev)
633 {
634         struct adapter *adapter = device_get_softc(dev);
635         struct ix_queue *que = adapter->queues;
636         struct tx_ring *txr = adapter->tx_rings;
637         u32     ctrl_ext;
638
639         INIT_DEBUGOUT("ixgbe_detach: begin");
640
641         /* Make sure VLANS are not using driver */
642         if (adapter->ifp->if_vlantrunk != NULL) {
643                 device_printf(dev,"Vlan in use, detach first\n");
644                 return (EBUSY);
645         }
646
647         IXGBE_CORE_LOCK(adapter);
648         ixgbe_stop(adapter);
649         IXGBE_CORE_UNLOCK(adapter);
650
651         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
652                 if (que->tq) {
653 #ifndef IXGBE_LEGACY_TX
654                         taskqueue_drain(que->tq, &txr->txq_task);
655 #endif
656                         taskqueue_drain(que->tq, &que->que_task);
657                         taskqueue_free(que->tq);
658                 }
659         }
660
661         /* Drain the Link queue */
662         if (adapter->tq) {
663                 taskqueue_drain(adapter->tq, &adapter->link_task);
664                 taskqueue_drain(adapter->tq, &adapter->mod_task);
665                 taskqueue_drain(adapter->tq, &adapter->msf_task);
666 #ifdef IXGBE_FDIR
667                 taskqueue_drain(adapter->tq, &adapter->fdir_task);
668 #endif
669                 taskqueue_free(adapter->tq);
670         }
671
672         /* let hardware know driver is unloading */
673         ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
674         ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
675         IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
676
677         /* Unregister VLAN events */
678         if (adapter->vlan_attach != NULL)
679                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
680         if (adapter->vlan_detach != NULL)
681                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
682
683         ether_ifdetach(adapter->ifp);
684         callout_drain(&adapter->timer);
685 #ifdef DEV_NETMAP
686         netmap_detach(adapter->ifp);
687 #endif /* DEV_NETMAP */
688         ixgbe_free_pci_resources(adapter);
689         bus_generic_detach(dev);
690         if_free(adapter->ifp);
691
692         ixgbe_free_transmit_structures(adapter);
693         ixgbe_free_receive_structures(adapter);
694         free(adapter->mta, M_DEVBUF);
695
696         IXGBE_CORE_LOCK_DESTROY(adapter);
697         return (0);
698 }
699
700 /*********************************************************************
701  *
702  *  Shutdown entry point
703  *
704  **********************************************************************/
705
706 static int
707 ixgbe_shutdown(device_t dev)
708 {
709         struct adapter *adapter = device_get_softc(dev);
710         IXGBE_CORE_LOCK(adapter);
711         ixgbe_stop(adapter);
712         IXGBE_CORE_UNLOCK(adapter);
713         return (0);
714 }
715
716
717 #ifdef IXGBE_LEGACY_TX
718 /*********************************************************************
719  *  Transmit entry point
720  *
721  *  ixgbe_start is called by the stack to initiate a transmit.
722  *  The driver will remain in this routine as long as there are
723  *  packets to transmit and transmit resources are available.
724  *  In case resources are not available stack is notified and
725  *  the packet is requeued.
726  **********************************************************************/
727
728 static void
729 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
730 {
731         struct mbuf    *m_head;
732         struct adapter *adapter = txr->adapter;
733
734         IXGBE_TX_LOCK_ASSERT(txr);
735
736         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
737                 return;
738         if (!adapter->link_active)
739                 return;
740
741         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
742                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
743                         break;
744
745                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
746                 if (m_head == NULL)
747                         break;
748
749                 if (ixgbe_xmit(txr, &m_head)) {
750                         if (m_head != NULL)
751                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
752                         break;
753                 }
754                 /* Send a copy of the frame to the BPF listener */
755                 ETHER_BPF_MTAP(ifp, m_head);
756
757                 /* Set watchdog on */
758                 txr->watchdog_time = ticks;
759                 txr->queue_status = IXGBE_QUEUE_WORKING;
760
761         }
762         return;
763 }
764
765 /*
766  * Legacy TX start - called by the stack, this
767  * always uses the first tx ring, and should
768  * not be used with multiqueue tx enabled.
769  */
770 static void
771 ixgbe_start(struct ifnet *ifp)
772 {
773         struct adapter *adapter = ifp->if_softc;
774         struct tx_ring  *txr = adapter->tx_rings;
775
776         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
777                 IXGBE_TX_LOCK(txr);
778                 ixgbe_start_locked(txr, ifp);
779                 IXGBE_TX_UNLOCK(txr);
780         }
781         return;
782 }
783
784 #else /* ! IXGBE_LEGACY_TX */
785
786 /*
787 ** Multiqueue Transmit driver
788 **
789 */
790 static int
791 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
792 {
793         struct adapter  *adapter = ifp->if_softc;
794         struct ix_queue *que;
795         struct tx_ring  *txr;
796         int             i, err = 0;
797
798         /* Which queue to use */
799         if ((m->m_flags & M_FLOWID) != 0)
800                 i = m->m_pkthdr.flowid % adapter->num_queues;
801         else
802                 i = curcpu % adapter->num_queues;
803
804         txr = &adapter->tx_rings[i];
805         que = &adapter->queues[i];
806
807         err = drbr_enqueue(ifp, txr->br, m);
808         if (err)
809                 return (err);
810         if (IXGBE_TX_TRYLOCK(txr)) {
811                 err = ixgbe_mq_start_locked(ifp, txr);
812                 IXGBE_TX_UNLOCK(txr);
813         } else
814                 taskqueue_enqueue(que->tq, &txr->txq_task);
815
816         return (err);
817 }
818
819 static int
820 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
821 {
822         struct adapter  *adapter = txr->adapter;
823         struct mbuf     *next;
824         int             enqueued = 0, err = 0;
825
826         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
827             adapter->link_active == 0)
828                 return (ENETDOWN);
829
830         /* Process the queue */
831 #if __FreeBSD_version < 901504
832         next = drbr_dequeue(ifp, txr->br);
833         while (next != NULL) {
834                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
835                         if (next != NULL)
836                                 err = drbr_enqueue(ifp, txr->br, next);
837 #else
838         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
839                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
840                         if (next == NULL) {
841                                 drbr_advance(ifp, txr->br);
842                         } else {
843                                 drbr_putback(ifp, txr->br, next);
844                         }
845 #endif
846                         break;
847                 }
848 #if __FreeBSD_version >= 901504
849                 drbr_advance(ifp, txr->br);
850 #endif
851                 enqueued++;
852                 /* Send a copy of the frame to the BPF listener */
853                 ETHER_BPF_MTAP(ifp, next);
854                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
855                         break;
856 #if __FreeBSD_version < 901504
857                 next = drbr_dequeue(ifp, txr->br);
858 #endif
859         }
860
861         if (enqueued > 0) {
862                 /* Set watchdog on */
863                 txr->queue_status = IXGBE_QUEUE_WORKING;
864                 txr->watchdog_time = ticks;
865         }
866
867         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
868                 ixgbe_txeof(txr);
869
870         return (err);
871 }
872
873 /*
874  * Called from a taskqueue to drain queued transmit packets.
875  */
876 static void
877 ixgbe_deferred_mq_start(void *arg, int pending)
878 {
879         struct tx_ring *txr = arg;
880         struct adapter *adapter = txr->adapter;
881         struct ifnet *ifp = adapter->ifp;
882
883         IXGBE_TX_LOCK(txr);
884         if (!drbr_empty(ifp, txr->br))
885                 ixgbe_mq_start_locked(ifp, txr);
886         IXGBE_TX_UNLOCK(txr);
887 }
888
889 /*
890 ** Flush all ring buffers
891 */
892 static void
893 ixgbe_qflush(struct ifnet *ifp)
894 {
895         struct adapter  *adapter = ifp->if_softc;
896         struct tx_ring  *txr = adapter->tx_rings;
897         struct mbuf     *m;
898
899         for (int i = 0; i < adapter->num_queues; i++, txr++) {
900                 IXGBE_TX_LOCK(txr);
901                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
902                         m_freem(m);
903                 IXGBE_TX_UNLOCK(txr);
904         }
905         if_qflush(ifp);
906 }
907 #endif /* IXGBE_LEGACY_TX */
908
909 /*********************************************************************
910  *  Ioctl entry point
911  *
912  *  ixgbe_ioctl is called when the user wants to configure the
913  *  interface.
914  *
915  *  return 0 on success, positive on failure
916  **********************************************************************/
917
918 static int
919 ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data)
920 {
921         struct adapter  *adapter = ifp->if_softc;
922         struct ixgbe_hw *hw = &adapter->hw;
923         struct ifreq    *ifr = (struct ifreq *) data;
924 #if defined(INET) || defined(INET6)
925         struct ifaddr *ifa = (struct ifaddr *)data;
926         bool            avoid_reset = FALSE;
927 #endif
928         int             error = 0;
929
930         switch (command) {
931
932         case SIOCSIFADDR:
933 #ifdef INET
934                 if (ifa->ifa_addr->sa_family == AF_INET)
935                         avoid_reset = TRUE;
936 #endif
937 #ifdef INET6
938                 if (ifa->ifa_addr->sa_family == AF_INET6)
939                         avoid_reset = TRUE;
940 #endif
941 #if defined(INET) || defined(INET6)
942                 /*
943                 ** Calling init results in link renegotiation,
944                 ** so we avoid doing it when possible.
945                 */
946                 if (avoid_reset) {
947                         ifp->if_flags |= IFF_UP;
948                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
949                                 ixgbe_init(adapter);
950                         if (!(ifp->if_flags & IFF_NOARP))
951                                 arp_ifinit(ifp, ifa);
952                 } else
953                         error = ether_ioctl(ifp, command, data);
954 #endif
955                 break;
956         case SIOCSIFMTU:
957                 IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
958                 if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
959                         error = EINVAL;
960                 } else {
961                         IXGBE_CORE_LOCK(adapter);
962                         ifp->if_mtu = ifr->ifr_mtu;
963                         adapter->max_frame_size =
964                                 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
965                         ixgbe_init_locked(adapter);
966                         IXGBE_CORE_UNLOCK(adapter);
967                 }
968                 break;
969         case SIOCSIFFLAGS:
970                 IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
971                 IXGBE_CORE_LOCK(adapter);
972                 if (ifp->if_flags & IFF_UP) {
973                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
974                                 if ((ifp->if_flags ^ adapter->if_flags) &
975                                     (IFF_PROMISC | IFF_ALLMULTI)) {
976                                         ixgbe_set_promisc(adapter);
977                                 }
978                         } else
979                                 ixgbe_init_locked(adapter);
980                 } else
981                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
982                                 ixgbe_stop(adapter);
983                 adapter->if_flags = ifp->if_flags;
984                 IXGBE_CORE_UNLOCK(adapter);
985                 break;
986         case SIOCADDMULTI:
987         case SIOCDELMULTI:
988                 IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
989                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
990                         IXGBE_CORE_LOCK(adapter);
991                         ixgbe_disable_intr(adapter);
992                         ixgbe_set_multi(adapter);
993                         ixgbe_enable_intr(adapter);
994                         IXGBE_CORE_UNLOCK(adapter);
995                 }
996                 break;
997         case SIOCSIFMEDIA:
998         case SIOCGIFMEDIA:
999                 IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
1000                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1001                 break;
1002         case SIOCSIFCAP:
1003         {
1004                 int mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1005                 IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
1006                 if (mask & IFCAP_HWCSUM)
1007                         ifp->if_capenable ^= IFCAP_HWCSUM;
1008                 if (mask & IFCAP_TSO4)
1009                         ifp->if_capenable ^= IFCAP_TSO4;
1010                 if (mask & IFCAP_TSO6)
1011                         ifp->if_capenable ^= IFCAP_TSO6;
1012                 if (mask & IFCAP_LRO)
1013                         ifp->if_capenable ^= IFCAP_LRO;
1014                 if (mask & IFCAP_VLAN_HWTAGGING)
1015                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1016                 if (mask & IFCAP_VLAN_HWFILTER)
1017                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1018                 if (mask & IFCAP_VLAN_HWTSO)
1019                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1020                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1021                         IXGBE_CORE_LOCK(adapter);
1022                         ixgbe_init_locked(adapter);
1023                         IXGBE_CORE_UNLOCK(adapter);
1024                 }
1025                 VLAN_CAPABILITIES(ifp);
1026                 break;
1027         }
1028         case SIOCGI2C:
1029         {
1030                 struct ixgbe_i2c_req    i2c;
1031                 IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)");
1032                 error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
1033                 if (error)
1034                         break;
1035                 if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
1036                         error = EINVAL;
1037                         break;
1038                 }
1039                 hw->phy.ops.read_i2c_byte(hw, i2c.offset,
1040                     i2c.dev_addr, i2c.data);
1041                 error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
1042                 break;
1043         }
1044         default:
1045                 IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
1046                 error = ether_ioctl(ifp, command, data);
1047                 break;
1048         }
1049
1050         return (error);
1051 }
1052
1053 /*********************************************************************
1054  *  Init entry point
1055  *
1056  *  This routine is used in two ways. It is used by the stack as
1057  *  init entry point in network interface structure. It is also used
1058  *  by the driver as a hw/sw initialization routine to get to a
1059  *  consistent state.
1060  *
1061  *  return 0 on success, positive on failure
1062  **********************************************************************/
1063 #define IXGBE_MHADD_MFS_SHIFT 16
1064
1065 static void
1066 ixgbe_init_locked(struct adapter *adapter)
1067 {
1068         struct ifnet   *ifp = adapter->ifp;
1069         device_t        dev = adapter->dev;
1070         struct ixgbe_hw *hw = &adapter->hw;
1071         u32             k, txdctl, mhadd, gpie;
1072         u32             rxdctl, rxctrl;
1073
1074         mtx_assert(&adapter->core_mtx, MA_OWNED);
1075         INIT_DEBUGOUT("ixgbe_init_locked: begin");
1076         hw->adapter_stopped = FALSE;
1077         ixgbe_stop_adapter(hw);
1078         callout_stop(&adapter->timer);
1079
1080         /* reprogram the RAR[0] in case user changed it. */
1081         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
1082
1083         /* Get the latest mac address, User can use a LAA */
1084         bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr,
1085               IXGBE_ETH_LENGTH_OF_ADDRESS);
1086         ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
1087         hw->addr_ctrl.rar_used_count = 1;
1088
1089         /* Set the various hardware offload abilities */
1090         ifp->if_hwassist = 0;
1091         if (ifp->if_capenable & IFCAP_TSO)
1092                 ifp->if_hwassist |= CSUM_TSO;
1093         if (ifp->if_capenable & IFCAP_TXCSUM) {
1094                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1095 #if __FreeBSD_version >= 800000
1096                 if (hw->mac.type != ixgbe_mac_82598EB)
1097                         ifp->if_hwassist |= CSUM_SCTP;
1098 #endif
1099         }
1100
1101         /* Prepare transmit descriptors and buffers */
1102         if (ixgbe_setup_transmit_structures(adapter)) {
1103                 device_printf(dev,"Could not setup transmit structures\n");
1104                 ixgbe_stop(adapter);
1105                 return;
1106         }
1107
1108         ixgbe_init_hw(hw);
1109         ixgbe_initialize_transmit_units(adapter);
1110
1111         /* Setup Multicast table */
1112         ixgbe_set_multi(adapter);
1113
1114         /*
1115         ** Determine the correct mbuf pool
1116         ** for doing jumbo frames
1117         */
1118         if (adapter->max_frame_size <= 2048)
1119                 adapter->rx_mbuf_sz = MCLBYTES;
1120         else if (adapter->max_frame_size <= 4096)
1121                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1122         else if (adapter->max_frame_size <= 9216)
1123                 adapter->rx_mbuf_sz = MJUM9BYTES;
1124         else
1125                 adapter->rx_mbuf_sz = MJUM16BYTES;
1126
1127         /* Prepare receive descriptors and buffers */
1128         if (ixgbe_setup_receive_structures(adapter)) {
1129                 device_printf(dev,"Could not setup receive structures\n");
1130                 ixgbe_stop(adapter);
1131                 return;
1132         }
1133
1134         /* Configure RX settings */
1135         ixgbe_initialize_receive_units(adapter);
1136
1137         gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
1138
1139         /* Enable Fan Failure Interrupt */
1140         gpie |= IXGBE_SDP1_GPIEN;
1141
1142         /* Add for Module detection */
1143         if (hw->mac.type == ixgbe_mac_82599EB)
1144                 gpie |= IXGBE_SDP2_GPIEN;
1145
1146         /* Thermal Failure Detection */
1147         if (hw->mac.type == ixgbe_mac_X540)
1148                 gpie |= IXGBE_SDP0_GPIEN;
1149
1150         if (adapter->msix > 1) {
1151                 /* Enable Enhanced MSIX mode */
1152                 gpie |= IXGBE_GPIE_MSIX_MODE;
1153                 gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
1154                     IXGBE_GPIE_OCD;
1155         }
1156         IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
1157
1158         /* Set MTU size */
1159         if (ifp->if_mtu > ETHERMTU) {
1160                 mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
1161                 mhadd &= ~IXGBE_MHADD_MFS_MASK;
1162                 mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
1163                 IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
1164         }
1165         
1166         /* Now enable all the queues */
1167
1168         for (int i = 0; i < adapter->num_queues; i++) {
1169                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
1170                 txdctl |= IXGBE_TXDCTL_ENABLE;
1171                 /* Set WTHRESH to 8, burst writeback */
1172                 txdctl |= (8 << 16);
1173                 /*
1174                  * When the internal queue falls below PTHRESH (32),
1175                  * start prefetching as long as there are at least
1176                  * HTHRESH (1) buffers ready. The values are taken
1177                  * from the Intel linux driver 3.8.21.
1178                  * Prefetching enables tx line rate even with 1 queue.
1179                  */
1180                 txdctl |= (32 << 0) | (1 << 8);
1181                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
1182         }
1183
1184         for (int i = 0; i < adapter->num_queues; i++) {
1185                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
1186                 if (hw->mac.type == ixgbe_mac_82598EB) {
1187                         /*
1188                         ** PTHRESH = 21
1189                         ** HTHRESH = 4
1190                         ** WTHRESH = 8
1191                         */
1192                         rxdctl &= ~0x3FFFFF;
1193                         rxdctl |= 0x080420;
1194                 }
1195                 rxdctl |= IXGBE_RXDCTL_ENABLE;
1196                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
1197                 for (k = 0; k < 10; k++) {
1198                         if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
1199                             IXGBE_RXDCTL_ENABLE)
1200                                 break;
1201                         else
1202                                 msec_delay(1);
1203                 }
1204                 wmb();
1205 #ifdef DEV_NETMAP
1206                 /*
1207                  * In netmap mode, we must preserve the buffers made
1208                  * available to userspace before the if_init()
1209                  * (this is true by default on the TX side, because
1210                  * init makes all buffers available to userspace).
1211                  *
1212                  * netmap_reset() and the device specific routines
1213                  * (e.g. ixgbe_setup_receive_rings()) map these
1214                  * buffers at the end of the NIC ring, so here we
1215                  * must set the RDT (tail) register to make sure
1216                  * they are not overwritten.
1217                  *
1218                  * In this driver the NIC ring starts at RDH = 0,
1219                  * RDT points to the last slot available for reception (?),
1220                  * so RDT = num_rx_desc - 1 means the whole ring is available.
1221                  */
1222                 if (ifp->if_capenable & IFCAP_NETMAP) {
1223                         struct netmap_adapter *na = NA(adapter->ifp);
1224                         struct netmap_kring *kring = &na->rx_rings[i];
1225                         int t = na->num_rx_desc - 1 - kring->nr_hwavail;
1226
1227                         IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
1228                 } else
1229 #endif /* DEV_NETMAP */
1230                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
1231         }
1232
1233         /* Set up VLAN support and filter */
1234         ixgbe_setup_vlan_hw_support(adapter);
1235
1236         /* Enable Receive engine */
1237         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1238         if (hw->mac.type == ixgbe_mac_82598EB)
1239                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
1240         rxctrl |= IXGBE_RXCTRL_RXEN;
1241         ixgbe_enable_rx_dma(hw, rxctrl);
1242
1243         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
1244
1245         /* Set up MSI/X routing */
1246         if (ixgbe_enable_msix)  {
1247                 ixgbe_configure_ivars(adapter);
1248                 /* Set up auto-mask */
1249                 if (hw->mac.type == ixgbe_mac_82598EB)
1250                         IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1251                 else {
1252                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1253                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1254                 }
1255         } else {  /* Simple settings for Legacy/MSI */
1256                 ixgbe_set_ivar(adapter, 0, 0, 0);
1257                 ixgbe_set_ivar(adapter, 0, 0, 1);
1258                 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1259         }
1260
1261 #ifdef IXGBE_FDIR
1262         /* Init Flow director */
1263         if (hw->mac.type != ixgbe_mac_82598EB) {
1264                 u32 hdrm = 32 << fdir_pballoc;
1265
1266                 hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
1267                 ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
1268         }
1269 #endif
1270
1271         /*
1272         ** Check on any SFP devices that
1273         ** need to be kick-started
1274         */
1275         if (hw->phy.type == ixgbe_phy_none) {
1276                 int err = hw->phy.ops.identify(hw);
1277                 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1278                         device_printf(dev,
1279                             "Unsupported SFP+ module type was detected.\n");
1280                         return;
1281                 }
1282         }
1283
1284         /* Set moderation on the Link interrupt */
1285         IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
1286
1287         /* Config/Enable Link */
1288         ixgbe_config_link(adapter);
1289
1290         /* Hardware Packet Buffer & Flow Control setup */
1291         {
1292                 u32 rxpb, frame, size, tmp;
1293
1294                 frame = adapter->max_frame_size;
1295
1296                 /* Calculate High Water */
1297                 if (hw->mac.type == ixgbe_mac_X540)
1298                         tmp = IXGBE_DV_X540(frame, frame);
1299                 else
1300                         tmp = IXGBE_DV(frame, frame);
1301                 size = IXGBE_BT2KB(tmp);
1302                 rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
1303                 hw->fc.high_water[0] = rxpb - size;
1304
1305                 /* Now calculate Low Water */
1306                 if (hw->mac.type == ixgbe_mac_X540)
1307                         tmp = IXGBE_LOW_DV_X540(frame);
1308                 else
1309                         tmp = IXGBE_LOW_DV(frame);
1310                 hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1311                 
1312                 hw->fc.requested_mode = adapter->fc;
1313                 hw->fc.pause_time = IXGBE_FC_PAUSE;
1314                 hw->fc.send_xon = TRUE;
1315         }
1316         /* Initialize the FC settings */
1317         ixgbe_start_hw(hw);
1318
1319         /* And now turn on interrupts */
1320         ixgbe_enable_intr(adapter);
1321
1322         /* Now inform the stack we're ready */
1323         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1324
1325         return;
1326 }
1327
1328 static void
1329 ixgbe_init(void *arg)
1330 {
1331         struct adapter *adapter = arg;
1332
1333         IXGBE_CORE_LOCK(adapter);
1334         ixgbe_init_locked(adapter);
1335         IXGBE_CORE_UNLOCK(adapter);
1336         return;
1337 }
1338
1339
1340 /*
1341 **
1342 ** MSIX Interrupt Handlers and Tasklets
1343 **
1344 */
1345
1346 static inline void
1347 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
1348 {
1349         struct ixgbe_hw *hw = &adapter->hw;
1350         u64     queue = (u64)(1 << vector);
1351         u32     mask;
1352
1353         if (hw->mac.type == ixgbe_mac_82598EB) {
1354                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1355                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
1356         } else {
1357                 mask = (queue & 0xFFFFFFFF);
1358                 if (mask)
1359                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
1360                 mask = (queue >> 32);
1361                 if (mask)
1362                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
1363         }
1364 }
1365
1366 static inline void
1367 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
1368 {
1369         struct ixgbe_hw *hw = &adapter->hw;
1370         u64     queue = (u64)(1 << vector);
1371         u32     mask;
1372
1373         if (hw->mac.type == ixgbe_mac_82598EB) {
1374                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1375                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
1376         } else {
1377                 mask = (queue & 0xFFFFFFFF);
1378                 if (mask)
1379                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
1380                 mask = (queue >> 32);
1381                 if (mask)
1382                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
1383         }
1384 }
1385
1386 static void
1387 ixgbe_handle_que(void *context, int pending)
1388 {
1389         struct ix_queue *que = context;
1390         struct adapter  *adapter = que->adapter;
1391         struct tx_ring  *txr = que->txr;
1392         struct ifnet    *ifp = adapter->ifp;
1393         bool            more;
1394
1395         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1396                 more = ixgbe_rxeof(que);
1397                 IXGBE_TX_LOCK(txr);
1398                 ixgbe_txeof(txr);
1399 #ifndef IXGBE_LEGACY_TX
1400                 if (!drbr_empty(ifp, txr->br))
1401                         ixgbe_mq_start_locked(ifp, txr);
1402 #else
1403                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1404                         ixgbe_start_locked(txr, ifp);
1405 #endif
1406                 IXGBE_TX_UNLOCK(txr);
1407         }
1408
1409         /* Reenable this interrupt */
1410         if (que->res != NULL)
1411                 ixgbe_enable_queue(adapter, que->msix);
1412         else
1413                 ixgbe_enable_intr(adapter);
1414         return;
1415 }
1416
1417
1418 /*********************************************************************
1419  *
1420  *  Legacy Interrupt Service routine
1421  *
1422  **********************************************************************/
1423
1424 static void
1425 ixgbe_legacy_irq(void *arg)
1426 {
1427         struct ix_queue *que = arg;
1428         struct adapter  *adapter = que->adapter;
1429         struct ixgbe_hw *hw = &adapter->hw;
1430         struct ifnet    *ifp = adapter->ifp;
1431         struct          tx_ring *txr = adapter->tx_rings;
1432         bool            more;
1433         u32             reg_eicr;
1434
1435
1436         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1437
1438         ++que->irqs;
1439         if (reg_eicr == 0) {
1440                 ixgbe_enable_intr(adapter);
1441                 return;
1442         }
1443
1444         more = ixgbe_rxeof(que);
1445
1446         IXGBE_TX_LOCK(txr);
1447         ixgbe_txeof(txr);
1448 #ifdef IXGBE_LEGACY_TX
1449         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1450                 ixgbe_start_locked(txr, ifp);
1451 #else
1452         if (!drbr_empty(ifp, txr->br))
1453                 ixgbe_mq_start_locked(ifp, txr);
1454 #endif
1455         IXGBE_TX_UNLOCK(txr);
1456
1457         /* Check for fan failure */
1458         if ((hw->phy.media_type == ixgbe_media_type_copper) &&
1459             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1460                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1461                     "REPLACE IMMEDIATELY!!\n");
1462                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
1463         }
1464
1465         /* Link status change */
1466         if (reg_eicr & IXGBE_EICR_LSC)
1467                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1468
1469         if (more)
1470                 taskqueue_enqueue(que->tq, &que->que_task);
1471         else
1472                 ixgbe_enable_intr(adapter);
1473         return;
1474 }
1475
1476
1477 /*********************************************************************
1478  *
1479  *  MSIX Queue Interrupt Service routine
1480  *
1481  **********************************************************************/
1482 void
1483 ixgbe_msix_que(void *arg)
1484 {
1485         struct ix_queue *que = arg;
1486         struct adapter  *adapter = que->adapter;
1487         struct ifnet    *ifp = adapter->ifp;
1488         struct tx_ring  *txr = que->txr;
1489         struct rx_ring  *rxr = que->rxr;
1490         bool            more;
1491         u32             newitr = 0;
1492
1493         /* Protect against spurious interrupts */
1494         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1495                 return;
1496
1497         ixgbe_disable_queue(adapter, que->msix);
1498         ++que->irqs;
1499
1500         more = ixgbe_rxeof(que);
1501
1502         IXGBE_TX_LOCK(txr);
1503         ixgbe_txeof(txr);
1504 #ifdef IXGBE_LEGACY_TX
1505         if (!IFQ_DRV_IS_EMPTY(ifp->if_snd))
1506                 ixgbe_start_locked(txr, ifp);
1507 #else
1508         if (!drbr_empty(ifp, txr->br))
1509                 ixgbe_mq_start_locked(ifp, txr);
1510 #endif
1511         IXGBE_TX_UNLOCK(txr);
1512
1513         /* Do AIM now? */
1514
1515         if (ixgbe_enable_aim == FALSE)
1516                 goto no_calc;
1517         /*
1518         ** Do Adaptive Interrupt Moderation:
1519         **  - Write out last calculated setting
1520         **  - Calculate based on average size over
1521         **    the last interval.
1522         */
1523         if (que->eitr_setting)
1524                 IXGBE_WRITE_REG(&adapter->hw,
1525                     IXGBE_EITR(que->msix), que->eitr_setting);
1526  
1527         que->eitr_setting = 0;
1528
1529         /* Idle, do nothing */
1530         if ((txr->bytes == 0) && (rxr->bytes == 0))
1531                 goto no_calc;
1532                                 
1533         if ((txr->bytes) && (txr->packets))
1534                 newitr = txr->bytes/txr->packets;
1535         if ((rxr->bytes) && (rxr->packets))
1536                 newitr = max(newitr,
1537                     (rxr->bytes / rxr->packets));
1538         newitr += 24; /* account for hardware frame, crc */
1539
1540         /* set an upper boundary */
1541         newitr = min(newitr, 3000);
1542
1543         /* Be nice to the mid range */
1544         if ((newitr > 300) && (newitr < 1200))
1545                 newitr = (newitr / 3);
1546         else
1547                 newitr = (newitr / 2);
1548
1549         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
1550                 newitr |= newitr << 16;
1551         else
1552                 newitr |= IXGBE_EITR_CNT_WDIS;
1553                  
1554         /* save for next interrupt */
1555         que->eitr_setting = newitr;
1556
1557         /* Reset state */
1558         txr->bytes = 0;
1559         txr->packets = 0;
1560         rxr->bytes = 0;
1561         rxr->packets = 0;
1562
1563 no_calc:
1564         if (more)
1565                 taskqueue_enqueue(que->tq, &que->que_task);
1566         else
1567                 ixgbe_enable_queue(adapter, que->msix);
1568         return;
1569 }
1570
1571
1572 static void
1573 ixgbe_msix_link(void *arg)
1574 {
1575         struct adapter  *adapter = arg;
1576         struct ixgbe_hw *hw = &adapter->hw;
1577         u32             reg_eicr;
1578
1579         ++adapter->link_irq;
1580
1581         /* First get the cause */
1582         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
1583         /* Be sure the queue bits are not cleared */
1584         reg_eicr &= ~IXGBE_EICR_RTX_QUEUE;
1585         /* Clear interrupt with write */
1586         IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
1587
1588         /* Link status change */
1589         if (reg_eicr & IXGBE_EICR_LSC)
1590                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1591
1592         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
1593 #ifdef IXGBE_FDIR
1594                 if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
1595                         /* This is probably overkill :) */
1596                         if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
1597                                 return;
1598                         /* Disable the interrupt */
1599                         IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
1600                         taskqueue_enqueue(adapter->tq, &adapter->fdir_task);
1601                 } else
1602 #endif
1603                 if (reg_eicr & IXGBE_EICR_ECC) {
1604                         device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
1605                             "Please Reboot!!\n");
1606                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
1607                 } else
1608
1609                 if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
1610                         /* Clear the interrupt */
1611                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1612                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
1613                 } else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
1614                         /* Clear the interrupt */
1615                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
1616                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
1617                 }
1618         } 
1619
1620         /* Check for fan failure */
1621         if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
1622             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1623                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1624                     "REPLACE IMMEDIATELY!!\n");
1625                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1626         }
1627
1628         /* Check for over temp condition */
1629         if ((hw->mac.type == ixgbe_mac_X540) &&
1630             (reg_eicr & IXGBE_EICR_TS)) {
1631                 device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
1632                     "PHY IS SHUT DOWN!!\n");
1633                 device_printf(adapter->dev, "System shutdown required\n");
1634                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
1635         }
1636
1637         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
1638         return;
1639 }
1640
1641 /*********************************************************************
1642  *
1643  *  Media Ioctl callback
1644  *
1645  *  This routine is called whenever the user queries the status of
1646  *  the interface using ifconfig.
1647  *
1648  **********************************************************************/
1649 static void
1650 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
1651 {
1652         struct adapter *adapter = ifp->if_softc;
1653
1654         INIT_DEBUGOUT("ixgbe_media_status: begin");
1655         IXGBE_CORE_LOCK(adapter);
1656         ixgbe_update_link_status(adapter);
1657
1658         ifmr->ifm_status = IFM_AVALID;
1659         ifmr->ifm_active = IFM_ETHER;
1660
1661         if (!adapter->link_active) {
1662                 IXGBE_CORE_UNLOCK(adapter);
1663                 return;
1664         }
1665
1666         ifmr->ifm_status |= IFM_ACTIVE;
1667
1668         switch (adapter->link_speed) {
1669                 case IXGBE_LINK_SPEED_100_FULL:
1670                         ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1671                         break;
1672                 case IXGBE_LINK_SPEED_1GB_FULL:
1673                         ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1674                         break;
1675                 case IXGBE_LINK_SPEED_10GB_FULL:
1676                         ifmr->ifm_active |= adapter->optics | IFM_FDX;
1677                         break;
1678         }
1679
1680         IXGBE_CORE_UNLOCK(adapter);
1681
1682         return;
1683 }
1684
1685 /*********************************************************************
1686  *
1687  *  Media Ioctl callback
1688  *
1689  *  This routine is called when the user changes speed/duplex using
1690  *  media/mediopt option with ifconfig.
1691  *
1692  **********************************************************************/
1693 static int
1694 ixgbe_media_change(struct ifnet * ifp)
1695 {
1696         struct adapter *adapter = ifp->if_softc;
1697         struct ifmedia *ifm = &adapter->media;
1698
1699         INIT_DEBUGOUT("ixgbe_media_change: begin");
1700
1701         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1702                 return (EINVAL);
1703
1704         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1705         case IFM_AUTO:
1706                 adapter->hw.phy.autoneg_advertised =
1707                     IXGBE_LINK_SPEED_100_FULL |
1708                     IXGBE_LINK_SPEED_1GB_FULL |
1709                     IXGBE_LINK_SPEED_10GB_FULL;
1710                 break;
1711         default:
1712                 device_printf(adapter->dev, "Only auto media type\n");
1713                 return (EINVAL);
1714         }
1715
1716         return (0);
1717 }
1718
1719 /*********************************************************************
1720  *
1721  *  This routine maps the mbufs to tx descriptors, allowing the
1722  *  TX engine to transmit the packets. 
1723  *      - return 0 on success, positive on failure
1724  *
1725  **********************************************************************/
1726
1727 static int
1728 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1729 {
1730         struct adapter  *adapter = txr->adapter;
1731         u32             olinfo_status = 0, cmd_type_len;
1732         int             i, j, error, nsegs;
1733         int             first;
1734         bool            remap = TRUE;
1735         struct mbuf     *m_head;
1736         bus_dma_segment_t segs[adapter->num_segs];
1737         bus_dmamap_t    map;
1738         struct ixgbe_tx_buf *txbuf;
1739         union ixgbe_adv_tx_desc *txd = NULL;
1740
1741         m_head = *m_headp;
1742
1743         /* Basic descriptor defines */
1744         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1745             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1746
1747         if (m_head->m_flags & M_VLANTAG)
1748                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1749
1750         /*
1751          * Important to capture the first descriptor
1752          * used because it will contain the index of
1753          * the one we tell the hardware to report back
1754          */
1755         first = txr->next_avail_desc;
1756         txbuf = &txr->tx_buffers[first];
1757         map = txbuf->map;
1758
1759         /*
1760          * Map the packet for DMA.
1761          */
1762 retry:
1763         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1764             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1765
1766         if (__predict_false(error)) {
1767                 struct mbuf *m;
1768
1769                 switch (error) {
1770                 case EFBIG:
1771                         /* Try it again? - one try */
1772                         if (remap == TRUE) {
1773                                 remap = FALSE;
1774                                 m = m_defrag(*m_headp, M_NOWAIT);
1775                                 if (m == NULL) {
1776                                         adapter->mbuf_defrag_failed++;
1777                                         m_freem(*m_headp);
1778                                         *m_headp = NULL;
1779                                         return (ENOBUFS);
1780                                 }
1781                                 *m_headp = m;
1782                                 goto retry;
1783                         } else
1784                                 return (error);
1785                 case ENOMEM:
1786                         txr->no_tx_dma_setup++;
1787                         return (error);
1788                 default:
1789                         txr->no_tx_dma_setup++;
1790                         m_freem(*m_headp);
1791                         *m_headp = NULL;
1792                         return (error);
1793                 }
1794         }
1795
1796         /* Make certain there are enough descriptors */
1797         if (nsegs > txr->tx_avail - 2) {
1798                 txr->no_desc_avail++;
1799                 bus_dmamap_unload(txr->txtag, map);
1800                 return (ENOBUFS);
1801         }
1802         m_head = *m_headp;
1803
1804         /*
1805         ** Set up the appropriate offload context
1806         ** this will consume the first descriptor
1807         */
1808         error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1809         if (__predict_false(error)) {
1810                 if (error == ENOBUFS)
1811                         *m_headp = NULL;
1812                 return (error);
1813         }
1814
1815 #ifdef IXGBE_FDIR
1816         /* Do the flow director magic */
1817         if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
1818                 ++txr->atr_count;
1819                 if (txr->atr_count >= atr_sample_rate) {
1820                         ixgbe_atr(txr, m_head);
1821                         txr->atr_count = 0;
1822                 }
1823         }
1824 #endif
1825
1826         i = txr->next_avail_desc;
1827         for (j = 0; j < nsegs; j++) {
1828                 bus_size_t seglen;
1829                 bus_addr_t segaddr;
1830
1831                 txbuf = &txr->tx_buffers[i];
1832                 txd = &txr->tx_base[i];
1833                 seglen = segs[j].ds_len;
1834                 segaddr = htole64(segs[j].ds_addr);
1835
1836                 txd->read.buffer_addr = segaddr;
1837                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
1838                     cmd_type_len |seglen);
1839                 txd->read.olinfo_status = htole32(olinfo_status);
1840
1841                 if (++i == txr->num_desc)
1842                         i = 0;
1843         }
1844
1845         txd->read.cmd_type_len |=
1846             htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
1847         txr->tx_avail -= nsegs;
1848         txr->next_avail_desc = i;
1849
1850         txbuf->m_head = m_head;
1851         /*
1852         ** Here we swap the map so the last descriptor,
1853         ** which gets the completion interrupt has the
1854         ** real map, and the first descriptor gets the
1855         ** unused map from this descriptor.
1856         */
1857         txr->tx_buffers[first].map = txbuf->map;
1858         txbuf->map = map;
1859         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1860
1861         /* Set the EOP descriptor that will be marked done */
1862         txbuf = &txr->tx_buffers[first];
1863         txbuf->eop = txd;
1864
1865         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1866             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1867         /*
1868          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1869          * hardware that this frame is available to transmit.
1870          */
1871         ++txr->total_packets;
1872         IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
1873
1874         return (0);
1875
1876 }
1877
1878 static void
1879 ixgbe_set_promisc(struct adapter *adapter)
1880 {
1881         u_int32_t       reg_rctl;
1882         struct ifnet   *ifp = adapter->ifp;
1883         int             mcnt = 0;
1884
1885         reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1886         reg_rctl &= (~IXGBE_FCTRL_UPE);
1887         if (ifp->if_flags & IFF_ALLMULTI)
1888                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
1889         else {
1890                 struct  ifmultiaddr *ifma;
1891 #if __FreeBSD_version < 800000
1892                 IF_ADDR_LOCK(ifp);
1893 #else
1894                 if_maddr_rlock(ifp);
1895 #endif
1896                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1897                         if (ifma->ifma_addr->sa_family != AF_LINK)
1898                                 continue;
1899                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1900                                 break;
1901                         mcnt++;
1902                 }
1903 #if __FreeBSD_version < 800000
1904                 IF_ADDR_UNLOCK(ifp);
1905 #else
1906                 if_maddr_runlock(ifp);
1907 #endif
1908         }
1909         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
1910                 reg_rctl &= (~IXGBE_FCTRL_MPE);
1911         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1912
1913         if (ifp->if_flags & IFF_PROMISC) {
1914                 reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1915                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1916         } else if (ifp->if_flags & IFF_ALLMULTI) {
1917                 reg_rctl |= IXGBE_FCTRL_MPE;
1918                 reg_rctl &= ~IXGBE_FCTRL_UPE;
1919                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1920         }
1921         return;
1922 }
1923
1924
1925 /*********************************************************************
1926  *  Multicast Update
1927  *
1928  *  This routine is called whenever multicast address list is updated.
1929  *
1930  **********************************************************************/
1931 #define IXGBE_RAR_ENTRIES 16
1932
1933 static void
1934 ixgbe_set_multi(struct adapter *adapter)
1935 {
1936         u32     fctrl;
1937         u8      *mta;
1938         u8      *update_ptr;
1939         struct  ifmultiaddr *ifma;
1940         int     mcnt = 0;
1941         struct ifnet   *ifp = adapter->ifp;
1942
1943         IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
1944
1945         mta = adapter->mta;
1946         bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
1947             MAX_NUM_MULTICAST_ADDRESSES);
1948
1949 #if __FreeBSD_version < 800000
1950         IF_ADDR_LOCK(ifp);
1951 #else
1952         if_maddr_rlock(ifp);
1953 #endif
1954         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1955                 if (ifma->ifma_addr->sa_family != AF_LINK)
1956                         continue;
1957                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1958                         break;
1959                 bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr),
1960                     &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1961                     IXGBE_ETH_LENGTH_OF_ADDRESS);
1962                 mcnt++;
1963         }
1964 #if __FreeBSD_version < 800000
1965         IF_ADDR_UNLOCK(ifp);
1966 #else
1967         if_maddr_runlock(ifp);
1968 #endif
1969
1970         fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1971         fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1972         if (ifp->if_flags & IFF_PROMISC)
1973                 fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1974         else if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES ||
1975             ifp->if_flags & IFF_ALLMULTI) {
1976                 fctrl |= IXGBE_FCTRL_MPE;
1977                 fctrl &= ~IXGBE_FCTRL_UPE;
1978         } else
1979                 fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1980         
1981         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
1982
1983         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
1984                 update_ptr = mta;
1985                 ixgbe_update_mc_addr_list(&adapter->hw,
1986                     update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
1987         }
1988
1989         return;
1990 }
1991
1992 /*
1993  * This is an iterator function now needed by the multicast
1994  * shared code. It simply feeds the shared code routine the
1995  * addresses in the array of ixgbe_set_multi() one by one.
1996  */
1997 static u8 *
1998 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
1999 {
2000         u8 *addr = *update_ptr;
2001         u8 *newptr;
2002         *vmdq = 0;
2003
2004         newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
2005         *update_ptr = newptr;
2006         return addr;
2007 }
2008
2009
2010 /*********************************************************************
2011  *  Timer routine
2012  *
2013  *  This routine checks for link status,updates statistics,
2014  *  and runs the watchdog check.
2015  *
2016  **********************************************************************/
2017
2018 static void
2019 ixgbe_local_timer(void *arg)
2020 {
2021         struct adapter  *adapter = arg;
2022         device_t        dev = adapter->dev;
2023         struct ix_queue *que = adapter->queues;
2024         struct tx_ring  *txr = adapter->tx_rings;
2025         int             hung = 0, paused = 0;
2026
2027         mtx_assert(&adapter->core_mtx, MA_OWNED);
2028
2029         /* Check for pluggable optics */
2030         if (adapter->sfp_probe)
2031                 if (!ixgbe_sfp_probe(adapter))
2032                         goto out; /* Nothing to do */
2033
2034         ixgbe_update_link_status(adapter);
2035         ixgbe_update_stats_counters(adapter);
2036
2037         /*
2038          * If the interface has been paused
2039          * then don't do the watchdog check
2040          */
2041         if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
2042                 paused = 1;
2043
2044         /*
2045         ** Check the TX queues status
2046         **      - watchdog only if all queues show hung
2047         */          
2048         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2049                 if ((txr->queue_status == IXGBE_QUEUE_HUNG) &&
2050                     (paused == 0))
2051                         ++hung;
2052                 else if (txr->queue_status == IXGBE_QUEUE_WORKING)
2053                         taskqueue_enqueue(que->tq, &txr->txq_task);
2054         }
2055         /* Only truely watchdog if all queues show hung */
2056         if (hung == adapter->num_queues)
2057                 goto watchdog;
2058
2059 out:
2060         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
2061         return;
2062
2063 watchdog:
2064         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2065         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2066             IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
2067             IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
2068         device_printf(dev,"TX(%d) desc avail = %d,"
2069             "Next TX to Clean = %d\n",
2070             txr->me, txr->tx_avail, txr->next_to_clean);
2071         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2072         adapter->watchdog_events++;
2073         ixgbe_init_locked(adapter);
2074 }
2075
2076 /*
2077 ** Note: this routine updates the OS on the link state
2078 **      the real check of the hardware only happens with
2079 **      a link interrupt.
2080 */
2081 static void
2082 ixgbe_update_link_status(struct adapter *adapter)
2083 {
2084         struct ifnet    *ifp = adapter->ifp;
2085         device_t dev = adapter->dev;
2086
2087
2088         if (adapter->link_up){ 
2089                 if (adapter->link_active == FALSE) {
2090                         if (bootverbose)
2091                                 device_printf(dev,"Link is up %d Gbps %s \n",
2092                                     ((adapter->link_speed == 128)? 10:1),
2093                                     "Full Duplex");
2094                         adapter->link_active = TRUE;
2095                         /* Update any Flow Control changes */
2096                         ixgbe_fc_enable(&adapter->hw);
2097                         if_link_state_change(ifp, LINK_STATE_UP);
2098                 }
2099         } else { /* Link down */
2100                 if (adapter->link_active == TRUE) {
2101                         if (bootverbose)
2102                                 device_printf(dev,"Link is Down\n");
2103                         if_link_state_change(ifp, LINK_STATE_DOWN);
2104                         adapter->link_active = FALSE;
2105                 }
2106         }
2107
2108         return;
2109 }
2110
2111
2112 /*********************************************************************
2113  *
2114  *  This routine disables all traffic on the adapter by issuing a
2115  *  global reset on the MAC and deallocates TX/RX buffers.
2116  *
2117  **********************************************************************/
2118
2119 static void
2120 ixgbe_stop(void *arg)
2121 {
2122         struct ifnet   *ifp;
2123         struct adapter *adapter = arg;
2124         struct ixgbe_hw *hw = &adapter->hw;
2125         ifp = adapter->ifp;
2126
2127         mtx_assert(&adapter->core_mtx, MA_OWNED);
2128
2129         INIT_DEBUGOUT("ixgbe_stop: begin\n");
2130         ixgbe_disable_intr(adapter);
2131         callout_stop(&adapter->timer);
2132
2133         /* Let the stack know...*/
2134         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2135
2136         ixgbe_reset_hw(hw);
2137         hw->adapter_stopped = FALSE;
2138         ixgbe_stop_adapter(hw);
2139         if (hw->mac.type == ixgbe_mac_82599EB)
2140                 ixgbe_stop_mac_link_on_d3_82599(hw);
2141         /* Turn off the laser - noop with no optics */
2142         ixgbe_disable_tx_laser(hw);
2143
2144         /* Update the stack */
2145         adapter->link_up = FALSE;
2146         ixgbe_update_link_status(adapter);
2147
2148         /* reprogram the RAR[0] in case user changed it. */
2149         ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
2150
2151         return;
2152 }
2153
2154
2155 /*********************************************************************
2156  *
2157  *  Determine hardware revision.
2158  *
2159  **********************************************************************/
2160 static void
2161 ixgbe_identify_hardware(struct adapter *adapter)
2162 {
2163         device_t        dev = adapter->dev;
2164         struct ixgbe_hw *hw = &adapter->hw;
2165
2166         /* Save off the information about this board */
2167         hw->vendor_id = pci_get_vendor(dev);
2168         hw->device_id = pci_get_device(dev);
2169         hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
2170         hw->subsystem_vendor_id =
2171             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2172         hw->subsystem_device_id =
2173             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2174
2175         /* We need this here to set the num_segs below */
2176         ixgbe_set_mac_type(hw);
2177
2178         /* Pick up the 82599 and VF settings */
2179         if (hw->mac.type != ixgbe_mac_82598EB) {
2180                 hw->phy.smart_speed = ixgbe_smart_speed;
2181                 adapter->num_segs = IXGBE_82599_SCATTER;
2182         } else
2183                 adapter->num_segs = IXGBE_82598_SCATTER;
2184
2185         return;
2186 }
2187
2188 /*********************************************************************
2189  *
2190  *  Determine optic type
2191  *
2192  **********************************************************************/
2193 static void
2194 ixgbe_setup_optics(struct adapter *adapter)
2195 {
2196         struct ixgbe_hw *hw = &adapter->hw;
2197         int             layer;
2198
2199         layer = ixgbe_get_supported_physical_layer(hw);
2200
2201         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
2202                 adapter->optics = IFM_10G_T;
2203                 return;
2204         }
2205
2206         if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
2207                 adapter->optics = IFM_1000_T;
2208                 return;
2209         }
2210
2211         if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
2212                 adapter->optics = IFM_1000_SX;
2213                 return;
2214         }
2215
2216         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
2217             IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
2218                 adapter->optics = IFM_10G_LR;
2219                 return;
2220         }
2221
2222         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
2223                 adapter->optics = IFM_10G_SR;
2224                 return;
2225         }
2226
2227         if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
2228                 adapter->optics = IFM_10G_TWINAX;
2229                 return;
2230         }
2231
2232         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
2233             IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
2234                 adapter->optics = IFM_10G_CX4;
2235                 return;
2236         }
2237
2238         /* If we get here just set the default */
2239         adapter->optics = IFM_ETHER | IFM_AUTO;
2240         return;
2241 }
2242
2243 /*********************************************************************
2244  *
2245  *  Setup the Legacy or MSI Interrupt handler
2246  *
2247  **********************************************************************/
2248 static int
2249 ixgbe_allocate_legacy(struct adapter *adapter)
2250 {
2251         device_t        dev = adapter->dev;
2252         struct          ix_queue *que = adapter->queues;
2253 #ifndef IXGBE_LEGACY_TX
2254         struct tx_ring          *txr = adapter->tx_rings;
2255 #endif
2256         int             error, rid = 0;
2257
2258         /* MSI RID at 1 */
2259         if (adapter->msix == 1)
2260                 rid = 1;
2261
2262         /* We allocate a single interrupt resource */
2263         adapter->res = bus_alloc_resource_any(dev,
2264             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2265         if (adapter->res == NULL) {
2266                 device_printf(dev, "Unable to allocate bus resource: "
2267                     "interrupt\n");
2268                 return (ENXIO);
2269         }
2270
2271         /*
2272          * Try allocating a fast interrupt and the associated deferred
2273          * processing contexts.
2274          */
2275 #ifndef IXGBE_LEGACY_TX
2276         TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr);
2277 #endif
2278         TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2279         que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,
2280             taskqueue_thread_enqueue, &que->tq);
2281         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s ixq",
2282             device_get_nameunit(adapter->dev));
2283
2284         /* Tasklets for Link, SFP and Multispeed Fiber */
2285         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2286         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2287         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2288 #ifdef IXGBE_FDIR
2289         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2290 #endif
2291         adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT,
2292             taskqueue_thread_enqueue, &adapter->tq);
2293         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2294             device_get_nameunit(adapter->dev));
2295
2296         if ((error = bus_setup_intr(dev, adapter->res,
2297             INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_legacy_irq,
2298             que, &adapter->tag)) != 0) {
2299                 device_printf(dev, "Failed to register fast interrupt "
2300                     "handler: %d\n", error);
2301                 taskqueue_free(que->tq);
2302                 taskqueue_free(adapter->tq);
2303                 que->tq = NULL;
2304                 adapter->tq = NULL;
2305                 return (error);
2306         }
2307         /* For simplicity in the handlers */
2308         adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
2309
2310         return (0);
2311 }
2312
2313
2314 /*********************************************************************
2315  *
2316  *  Setup MSIX Interrupt resources and handlers 
2317  *
2318  **********************************************************************/
2319 static int
2320 ixgbe_allocate_msix(struct adapter *adapter)
2321 {
2322         device_t        dev = adapter->dev;
2323         struct          ix_queue *que = adapter->queues;
2324         struct          tx_ring *txr = adapter->tx_rings;
2325         int             error, rid, vector = 0;
2326
2327         for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
2328                 rid = vector + 1;
2329                 que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2330                     RF_SHAREABLE | RF_ACTIVE);
2331                 if (que->res == NULL) {
2332                         device_printf(dev,"Unable to allocate"
2333                             " bus resource: que interrupt [%d]\n", vector);
2334                         return (ENXIO);
2335                 }
2336                 /* Set the handler function */
2337                 error = bus_setup_intr(dev, que->res,
2338                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2339                     ixgbe_msix_que, que, &que->tag);
2340                 if (error) {
2341                         que->res = NULL;
2342                         device_printf(dev, "Failed to register QUE handler");
2343                         return (error);
2344                 }
2345 #if __FreeBSD_version >= 800504
2346                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2347 #endif
2348                 que->msix = vector;
2349                 adapter->que_mask |= (u64)(1 << que->msix);
2350                 /*
2351                 ** Bind the msix vector, and thus the
2352                 ** ring to the corresponding cpu.
2353                 */
2354                 if (adapter->num_queues > 1)
2355                         bus_bind_intr(dev, que->res, i);
2356
2357 #ifndef IXGBE_LEGACY_TX
2358                 TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr);
2359 #endif
2360                 TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2361                 que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,
2362                     taskqueue_thread_enqueue, &que->tq);
2363                 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2364                     device_get_nameunit(adapter->dev));
2365         }
2366
2367         /* and Link */
2368         rid = vector + 1;
2369         adapter->res = bus_alloc_resource_any(dev,
2370             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2371         if (!adapter->res) {
2372                 device_printf(dev,"Unable to allocate"
2373             " bus resource: Link interrupt [%d]\n", rid);
2374                 return (ENXIO);
2375         }
2376         /* Set the link handler function */
2377         error = bus_setup_intr(dev, adapter->res,
2378             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2379             ixgbe_msix_link, adapter, &adapter->tag);
2380         if (error) {
2381                 adapter->res = NULL;
2382                 device_printf(dev, "Failed to register LINK handler");
2383                 return (error);
2384         }
2385 #if __FreeBSD_version >= 800504
2386         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2387 #endif
2388         adapter->linkvec = vector;
2389         /* Tasklets for Link, SFP and Multispeed Fiber */
2390         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2391         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2392         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2393 #ifdef IXGBE_FDIR
2394         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2395 #endif
2396         adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT,
2397             taskqueue_thread_enqueue, &adapter->tq);
2398         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2399             device_get_nameunit(adapter->dev));
2400
2401         return (0);
2402 }
2403
2404 /*
2405  * Setup Either MSI/X or MSI
2406  */
2407 static int
2408 ixgbe_setup_msix(struct adapter *adapter)
2409 {
2410         device_t dev = adapter->dev;
2411         int rid, want, queues, msgs;
2412
2413         /* Override by tuneable */
2414         if (ixgbe_enable_msix == 0)
2415                 goto msi;
2416
2417         /* First try MSI/X */
2418         rid = PCIR_BAR(MSIX_82598_BAR);
2419         adapter->msix_mem = bus_alloc_resource_any(dev,
2420             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2421         if (!adapter->msix_mem) {
2422                 rid += 4;       /* 82599 maps in higher BAR */
2423                 adapter->msix_mem = bus_alloc_resource_any(dev,
2424                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2425         }
2426         if (!adapter->msix_mem) {
2427                 /* May not be enabled */
2428                 device_printf(adapter->dev,
2429                     "Unable to map MSIX table \n");
2430                 goto msi;
2431         }
2432
2433         msgs = pci_msix_count(dev); 
2434         if (msgs == 0) { /* system has msix disabled */
2435                 bus_release_resource(dev, SYS_RES_MEMORY,
2436                     rid, adapter->msix_mem);
2437                 adapter->msix_mem = NULL;
2438                 goto msi;
2439         }
2440
2441         /* Figure out a reasonable auto config value */
2442         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2443
2444         if (ixgbe_num_queues != 0)
2445                 queues = ixgbe_num_queues;
2446         /* Set max queues to 8 when autoconfiguring */
2447         else if ((ixgbe_num_queues == 0) && (queues > 8))
2448                 queues = 8;
2449
2450         /*
2451         ** Want one vector (RX/TX pair) per queue
2452         ** plus an additional for Link.
2453         */
2454         want = queues + 1;
2455         if (msgs >= want)
2456                 msgs = want;
2457         else {
2458                 device_printf(adapter->dev,
2459                     "MSIX Configuration Problem, "
2460                     "%d vectors but %d queues wanted!\n",
2461                     msgs, want);
2462                 return (0); /* Will go to Legacy setup */
2463         }
2464         if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2465                 device_printf(adapter->dev,
2466                     "Using MSIX interrupts with %d vectors\n", msgs);
2467                 adapter->num_queues = queues;
2468                 return (msgs);
2469         }
2470 msi:
2471         msgs = pci_msi_count(dev);
2472         if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2473                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2474         else
2475                 device_printf(adapter->dev,"Using a Legacy interrupt\n");
2476         return (msgs);
2477 }
2478
2479
2480 static int
2481 ixgbe_allocate_pci_resources(struct adapter *adapter)
2482 {
2483         int             rid;
2484         device_t        dev = adapter->dev;
2485
2486         rid = PCIR_BAR(0);
2487         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2488             &rid, RF_ACTIVE);
2489
2490         if (!(adapter->pci_mem)) {
2491                 device_printf(dev,"Unable to allocate bus resource: memory\n");
2492                 return (ENXIO);
2493         }
2494
2495         adapter->osdep.mem_bus_space_tag =
2496                 rman_get_bustag(adapter->pci_mem);
2497         adapter->osdep.mem_bus_space_handle =
2498                 rman_get_bushandle(adapter->pci_mem);
2499         adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle;
2500
2501         /* Legacy defaults */
2502         adapter->num_queues = 1;
2503         adapter->hw.back = &adapter->osdep;
2504
2505         /*
2506         ** Now setup MSI or MSI/X, should
2507         ** return us the number of supported
2508         ** vectors. (Will be 1 for MSI)
2509         */
2510         adapter->msix = ixgbe_setup_msix(adapter);
2511         return (0);
2512 }
2513
2514 static void
2515 ixgbe_free_pci_resources(struct adapter * adapter)
2516 {
2517         struct          ix_queue *que = adapter->queues;
2518         device_t        dev = adapter->dev;
2519         int             rid, memrid;
2520
2521         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
2522                 memrid = PCIR_BAR(MSIX_82598_BAR);
2523         else
2524                 memrid = PCIR_BAR(MSIX_82599_BAR);
2525
2526         /*
2527         ** There is a slight possibility of a failure mode
2528         ** in attach that will result in entering this function
2529         ** before interrupt resources have been initialized, and
2530         ** in that case we do not want to execute the loops below
2531         ** We can detect this reliably by the state of the adapter
2532         ** res pointer.
2533         */
2534         if (adapter->res == NULL)
2535                 goto mem;
2536
2537         /*
2538         **  Release all msix queue resources:
2539         */
2540         for (int i = 0; i < adapter->num_queues; i++, que++) {
2541                 rid = que->msix + 1;
2542                 if (que->tag != NULL) {
2543                         bus_teardown_intr(dev, que->res, que->tag);
2544                         que->tag = NULL;
2545                 }
2546                 if (que->res != NULL)
2547                         bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
2548         }
2549
2550
2551         /* Clean the Legacy or Link interrupt last */
2552         if (adapter->linkvec) /* we are doing MSIX */
2553                 rid = adapter->linkvec + 1;
2554         else
2555                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2556
2557         if (adapter->tag != NULL) {
2558                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2559                 adapter->tag = NULL;
2560         }
2561         if (adapter->res != NULL)
2562                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2563
2564 mem:
2565         if (adapter->msix)
2566                 pci_release_msi(dev);
2567
2568         if (adapter->msix_mem != NULL)
2569                 bus_release_resource(dev, SYS_RES_MEMORY,
2570                     memrid, adapter->msix_mem);
2571
2572         if (adapter->pci_mem != NULL)
2573                 bus_release_resource(dev, SYS_RES_MEMORY,
2574                     PCIR_BAR(0), adapter->pci_mem);
2575
2576         return;
2577 }
2578
2579 /*********************************************************************
2580  *
2581  *  Setup networking device structure and register an interface.
2582  *
2583  **********************************************************************/
2584 static int
2585 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
2586 {
2587         struct ixgbe_hw *hw = &adapter->hw;
2588         struct ifnet   *ifp;
2589
2590         INIT_DEBUGOUT("ixgbe_setup_interface: begin");
2591
2592         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2593         if (ifp == NULL) {
2594                 device_printf(dev, "can not allocate ifnet structure\n");
2595                 return (-1);
2596         }
2597         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2598 #if __FreeBSD_version < 1000025
2599         ifp->if_baudrate = 1000000000;
2600 #else
2601         if_initbaudrate(ifp, IF_Gbps(10));
2602 #endif
2603         ifp->if_init = ixgbe_init;
2604         ifp->if_softc = adapter;
2605         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2606         ifp->if_ioctl = ixgbe_ioctl;
2607 #ifndef IXGBE_LEGACY_TX
2608         ifp->if_transmit = ixgbe_mq_start;
2609         ifp->if_qflush = ixgbe_qflush;
2610 #else
2611         ifp->if_start = ixgbe_start;
2612         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
2613         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2;
2614         IFQ_SET_READY(&ifp->if_snd);
2615 #endif
2616
2617         ether_ifattach(ifp, adapter->hw.mac.addr);
2618
2619         adapter->max_frame_size =
2620             ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
2621
2622         /*
2623          * Tell the upper layer(s) we support long frames.
2624          */
2625         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2626
2627         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWCSUM;
2628         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2629         ifp->if_capabilities |= IFCAP_LRO;
2630         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2631                              |  IFCAP_VLAN_HWTSO
2632                              |  IFCAP_VLAN_MTU;
2633         ifp->if_capenable = ifp->if_capabilities;
2634
2635         /*
2636         ** Don't turn this on by default, if vlans are
2637         ** created on another pseudo device (eg. lagg)
2638         ** then vlan events are not passed thru, breaking
2639         ** operation, but with HW FILTER off it works. If
2640         ** using vlans directly on the ixgbe driver you can
2641         ** enable this and get full hardware tag filtering.
2642         */
2643         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2644
2645         /*
2646          * Specify the media types supported by this adapter and register
2647          * callbacks to update media and link information
2648          */
2649         ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
2650                      ixgbe_media_status);
2651         ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
2652         ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
2653         if (hw->device_id == IXGBE_DEV_ID_82598AT) {
2654                 ifmedia_add(&adapter->media,
2655                     IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2656                 ifmedia_add(&adapter->media,
2657                     IFM_ETHER | IFM_1000_T, 0, NULL);
2658         }
2659         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2660         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2661
2662         return (0);
2663 }
2664
2665 static void
2666 ixgbe_config_link(struct adapter *adapter)
2667 {
2668         struct ixgbe_hw *hw = &adapter->hw;
2669         u32     autoneg, err = 0;
2670         bool    sfp, negotiate;
2671
2672         sfp = ixgbe_is_sfp(hw);
2673
2674         if (sfp) { 
2675                 if (hw->phy.multispeed_fiber) {
2676                         hw->mac.ops.setup_sfp(hw);
2677                         ixgbe_enable_tx_laser(hw);
2678                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
2679                 } else
2680                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
2681         } else {
2682                 if (hw->mac.ops.check_link)
2683                         err = ixgbe_check_link(hw, &adapter->link_speed,
2684                             &adapter->link_up, FALSE);
2685                 if (err)
2686                         goto out;
2687                 autoneg = hw->phy.autoneg_advertised;
2688                 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
2689                         err  = hw->mac.ops.get_link_capabilities(hw,
2690                             &autoneg, &negotiate);
2691                 if (err)
2692                         goto out;
2693                 if (hw->mac.ops.setup_link)
2694                         err = hw->mac.ops.setup_link(hw,
2695                             autoneg, adapter->link_up);
2696         }
2697 out:
2698         return;
2699 }
2700
2701 /********************************************************************
2702  * Manage DMA'able memory.
2703  *******************************************************************/
2704 static void
2705 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2706 {
2707         if (error)
2708                 return;
2709         *(bus_addr_t *) arg = segs->ds_addr;
2710         return;
2711 }
2712
2713 static int
2714 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2715                 struct ixgbe_dma_alloc *dma, int mapflags)
2716 {
2717         device_t dev = adapter->dev;
2718         int             r;
2719
2720         r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),   /* parent */
2721                                DBA_ALIGN, 0,    /* alignment, bounds */
2722                                BUS_SPACE_MAXADDR,       /* lowaddr */
2723                                BUS_SPACE_MAXADDR,       /* highaddr */
2724                                NULL, NULL,      /* filter, filterarg */
2725                                size,    /* maxsize */
2726                                1,       /* nsegments */
2727                                size,    /* maxsegsize */
2728                                BUS_DMA_ALLOCNOW,        /* flags */
2729                                NULL,    /* lockfunc */
2730                                NULL,    /* lockfuncarg */
2731                                &dma->dma_tag);
2732         if (r != 0) {
2733                 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2734                        "error %u\n", r);
2735                 goto fail_0;
2736         }
2737         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2738                              BUS_DMA_NOWAIT, &dma->dma_map);
2739         if (r != 0) {
2740                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2741                        "error %u\n", r);
2742                 goto fail_1;
2743         }
2744         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2745                             size,
2746                             ixgbe_dmamap_cb,
2747                             &dma->dma_paddr,
2748                             mapflags | BUS_DMA_NOWAIT);
2749         if (r != 0) {
2750                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2751                        "error %u\n", r);
2752                 goto fail_2;
2753         }
2754         dma->dma_size = size;
2755         return (0);
2756 fail_2:
2757         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2758 fail_1:
2759         bus_dma_tag_destroy(dma->dma_tag);
2760 fail_0:
2761         dma->dma_map = NULL;
2762         dma->dma_tag = NULL;
2763         return (r);
2764 }
2765
2766 static void
2767 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2768 {
2769         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2770             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2771         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2772         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2773         bus_dma_tag_destroy(dma->dma_tag);
2774 }
2775
2776
2777 /*********************************************************************
2778  *
2779  *  Allocate memory for the transmit and receive rings, and then
2780  *  the descriptors associated with each, called only once at attach.
2781  *
2782  **********************************************************************/
2783 static int
2784 ixgbe_allocate_queues(struct adapter *adapter)
2785 {
2786         device_t        dev = adapter->dev;
2787         struct ix_queue *que;
2788         struct tx_ring  *txr;
2789         struct rx_ring  *rxr;
2790         int rsize, tsize, error = IXGBE_SUCCESS;
2791         int txconf = 0, rxconf = 0;
2792
2793         /* First allocate the top level queue structs */
2794         if (!(adapter->queues =
2795             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2796             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2797                 device_printf(dev, "Unable to allocate queue memory\n");
2798                 error = ENOMEM;
2799                 goto fail;
2800         }
2801
2802         /* First allocate the TX ring struct memory */
2803         if (!(adapter->tx_rings =
2804             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2805             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2806                 device_printf(dev, "Unable to allocate TX ring memory\n");
2807                 error = ENOMEM;
2808                 goto tx_fail;
2809         }
2810
2811         /* Next allocate the RX */
2812         if (!(adapter->rx_rings =
2813             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2814             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2815                 device_printf(dev, "Unable to allocate RX ring memory\n");
2816                 error = ENOMEM;
2817                 goto rx_fail;
2818         }
2819
2820         /* For the ring itself */
2821         tsize = roundup2(adapter->num_tx_desc *
2822             sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2823
2824         /*
2825          * Now set up the TX queues, txconf is needed to handle the
2826          * possibility that things fail midcourse and we need to
2827          * undo memory gracefully
2828          */ 
2829         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2830                 /* Set up some basics */
2831                 txr = &adapter->tx_rings[i];
2832                 txr->adapter = adapter;
2833                 txr->me = i;
2834                 txr->num_desc = adapter->num_tx_desc;
2835
2836                 /* Initialize the TX side lock */
2837                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2838                     device_get_nameunit(dev), txr->me);
2839                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2840
2841                 if (ixgbe_dma_malloc(adapter, tsize,
2842                         &txr->txdma, BUS_DMA_NOWAIT)) {
2843                         device_printf(dev,
2844                             "Unable to allocate TX Descriptor memory\n");
2845                         error = ENOMEM;
2846                         goto err_tx_desc;
2847                 }
2848                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2849                 bzero((void *)txr->tx_base, tsize);
2850
2851                 /* Now allocate transmit buffers for the ring */
2852                 if (ixgbe_allocate_transmit_buffers(txr)) {
2853                         device_printf(dev,
2854                             "Critical Failure setting up transmit buffers\n");
2855                         error = ENOMEM;
2856                         goto err_tx_desc;
2857                 }
2858 #ifndef IXGBE_LEGACY_TX
2859                 /* Allocate a buf ring */
2860                 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2861                     M_WAITOK, &txr->tx_mtx);
2862                 if (txr->br == NULL) {
2863                         device_printf(dev,
2864                             "Critical Failure setting up buf ring\n");
2865                         error = ENOMEM;
2866                         goto err_tx_desc;
2867                 }
2868 #endif
2869         }
2870
2871         /*
2872          * Next the RX queues...
2873          */ 
2874         rsize = roundup2(adapter->num_rx_desc *
2875             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2876         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2877                 rxr = &adapter->rx_rings[i];
2878                 /* Set up some basics */
2879                 rxr->adapter = adapter;
2880                 rxr->me = i;
2881                 rxr->num_desc = adapter->num_rx_desc;
2882
2883                 /* Initialize the RX side lock */
2884                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2885                     device_get_nameunit(dev), rxr->me);
2886                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2887
2888                 if (ixgbe_dma_malloc(adapter, rsize,
2889                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2890                         device_printf(dev,
2891                             "Unable to allocate RxDescriptor memory\n");
2892                         error = ENOMEM;
2893                         goto err_rx_desc;
2894                 }
2895                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2896                 bzero((void *)rxr->rx_base, rsize);
2897
2898                 /* Allocate receive buffers for the ring*/
2899                 if (ixgbe_allocate_receive_buffers(rxr)) {
2900                         device_printf(dev,
2901                             "Critical Failure setting up receive buffers\n");
2902                         error = ENOMEM;
2903                         goto err_rx_desc;
2904                 }
2905         }
2906
2907         /*
2908         ** Finally set up the queue holding structs
2909         */
2910         for (int i = 0; i < adapter->num_queues; i++) {
2911                 que = &adapter->queues[i];
2912                 que->adapter = adapter;
2913                 que->txr = &adapter->tx_rings[i];
2914                 que->rxr = &adapter->rx_rings[i];
2915         }
2916
2917         return (0);
2918
2919 err_rx_desc:
2920         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2921                 ixgbe_dma_free(adapter, &rxr->rxdma);
2922 err_tx_desc:
2923         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2924                 ixgbe_dma_free(adapter, &txr->txdma);
2925         free(adapter->rx_rings, M_DEVBUF);
2926 rx_fail:
2927         free(adapter->tx_rings, M_DEVBUF);
2928 tx_fail:
2929         free(adapter->queues, M_DEVBUF);
2930 fail:
2931         return (error);
2932 }
2933
2934 /*********************************************************************
2935  *
2936  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2937  *  the information needed to transmit a packet on the wire. This is
2938  *  called only once at attach, setup is done every reset.
2939  *
2940  **********************************************************************/
2941 static int
2942 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
2943 {
2944         struct adapter *adapter = txr->adapter;
2945         device_t dev = adapter->dev;
2946         struct ixgbe_tx_buf *txbuf;
2947         int error, i;
2948
2949         /*
2950          * Setup DMA descriptor areas.
2951          */
2952         if ((error = bus_dma_tag_create(
2953                                bus_get_dma_tag(adapter->dev),   /* parent */
2954                                1, 0,            /* alignment, bounds */
2955                                BUS_SPACE_MAXADDR,       /* lowaddr */
2956                                BUS_SPACE_MAXADDR,       /* highaddr */
2957                                NULL, NULL,              /* filter, filterarg */
2958                                IXGBE_TSO_SIZE,          /* maxsize */
2959                                adapter->num_segs,       /* nsegments */
2960                                PAGE_SIZE,               /* maxsegsize */
2961                                0,                       /* flags */
2962                                NULL,                    /* lockfunc */
2963                                NULL,                    /* lockfuncarg */
2964                                &txr->txtag))) {
2965                 device_printf(dev,"Unable to allocate TX DMA tag\n");
2966                 goto fail;
2967         }
2968
2969         if (!(txr->tx_buffers =
2970             (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
2971             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2972                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
2973                 error = ENOMEM;
2974                 goto fail;
2975         }
2976
2977         /* Create the descriptor buffer dma maps */
2978         txbuf = txr->tx_buffers;
2979         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2980                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2981                 if (error != 0) {
2982                         device_printf(dev, "Unable to create TX DMA map\n");
2983                         goto fail;
2984                 }
2985         }
2986
2987         return 0;
2988 fail:
2989         /* We free all, it handles case where we are in the middle */
2990         ixgbe_free_transmit_structures(adapter);
2991         return (error);
2992 }
2993
2994 /*********************************************************************
2995  *
2996  *  Initialize a transmit ring.
2997  *
2998  **********************************************************************/
2999 static void
3000 ixgbe_setup_transmit_ring(struct tx_ring *txr)
3001 {
3002         struct adapter *adapter = txr->adapter;
3003         struct ixgbe_tx_buf *txbuf;
3004         int i;
3005 #ifdef DEV_NETMAP
3006         struct netmap_adapter *na = NA(adapter->ifp);
3007         struct netmap_slot *slot;
3008 #endif /* DEV_NETMAP */
3009
3010         /* Clear the old ring contents */
3011         IXGBE_TX_LOCK(txr);
3012 #ifdef DEV_NETMAP
3013         /*
3014          * (under lock): if in netmap mode, do some consistency
3015          * checks and set slot to entry 0 of the netmap ring.
3016          */
3017         slot = netmap_reset(na, NR_TX, txr->me, 0);
3018 #endif /* DEV_NETMAP */
3019         bzero((void *)txr->tx_base,
3020               (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
3021         /* Reset indices */
3022         txr->next_avail_desc = 0;
3023         txr->next_to_clean = 0;
3024
3025         /* Free any existing tx buffers. */
3026         txbuf = txr->tx_buffers;
3027         for (i = 0; i < txr->num_desc; i++, txbuf++) {
3028                 if (txbuf->m_head != NULL) {
3029                         bus_dmamap_sync(txr->txtag, txbuf->map,
3030                             BUS_DMASYNC_POSTWRITE);
3031                         bus_dmamap_unload(txr->txtag, txbuf->map);
3032                         m_freem(txbuf->m_head);
3033                         txbuf->m_head = NULL;
3034                 }
3035 #ifdef DEV_NETMAP
3036                 /*
3037                  * In netmap mode, set the map for the packet buffer.
3038                  * NOTE: Some drivers (not this one) also need to set
3039                  * the physical buffer address in the NIC ring.
3040                  * Slots in the netmap ring (indexed by "si") are
3041                  * kring->nkr_hwofs positions "ahead" wrt the
3042                  * corresponding slot in the NIC ring. In some drivers
3043                  * (not here) nkr_hwofs can be negative. Function
3044                  * netmap_idx_n2k() handles wraparounds properly.
3045                  */
3046                 if (slot) {
3047                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3048                         netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3049                 }
3050 #endif /* DEV_NETMAP */
3051                 /* Clear the EOP descriptor pointer */
3052                 txbuf->eop = NULL;
3053         }
3054
3055 #ifdef IXGBE_FDIR
3056         /* Set the rate at which we sample packets */
3057         if (adapter->hw.mac.type != ixgbe_mac_82598EB)
3058                 txr->atr_sample = atr_sample_rate;
3059 #endif
3060
3061         /* Set number of descriptors available */
3062         txr->tx_avail = adapter->num_tx_desc;
3063
3064         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3065             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3066         IXGBE_TX_UNLOCK(txr);
3067 }
3068
3069 /*********************************************************************
3070  *
3071  *  Initialize all transmit rings.
3072  *
3073  **********************************************************************/
3074 static int
3075 ixgbe_setup_transmit_structures(struct adapter *adapter)
3076 {
3077         struct tx_ring *txr = adapter->tx_rings;
3078
3079         for (int i = 0; i < adapter->num_queues; i++, txr++)
3080                 ixgbe_setup_transmit_ring(txr);
3081
3082         return (0);
3083 }
3084
3085 /*********************************************************************
3086  *
3087  *  Enable transmit unit.
3088  *
3089  **********************************************************************/
3090 static void
3091 ixgbe_initialize_transmit_units(struct adapter *adapter)
3092 {
3093         struct tx_ring  *txr = adapter->tx_rings;
3094         struct ixgbe_hw *hw = &adapter->hw;
3095
3096         /* Setup the Base and Length of the Tx Descriptor Ring */
3097
3098         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3099                 u64     tdba = txr->txdma.dma_paddr;
3100                 u32     txctrl;
3101
3102                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
3103                        (tdba & 0x00000000ffffffffULL));
3104                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
3105                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
3106                     adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
3107
3108                 /* Setup the HW Tx Head and Tail descriptor pointers */
3109                 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
3110                 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
3111
3112                 /* Setup Transmit Descriptor Cmd Settings */
3113                 txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
3114                 txr->queue_status = IXGBE_QUEUE_IDLE;
3115
3116                 /* Set the processing limit */
3117                 txr->process_limit = ixgbe_tx_process_limit;
3118
3119                 /* Disable Head Writeback */
3120                 switch (hw->mac.type) {
3121                 case ixgbe_mac_82598EB:
3122                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
3123                         break;
3124                 case ixgbe_mac_82599EB:
3125                 case ixgbe_mac_X540:
3126                 default:
3127                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
3128                         break;
3129                 }
3130                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3131                 switch (hw->mac.type) {
3132                 case ixgbe_mac_82598EB:
3133                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
3134                         break;
3135                 case ixgbe_mac_82599EB:
3136                 case ixgbe_mac_X540:
3137                 default:
3138                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
3139                         break;
3140                 }
3141
3142         }
3143
3144         if (hw->mac.type != ixgbe_mac_82598EB) {
3145                 u32 dmatxctl, rttdcs;
3146                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3147                 dmatxctl |= IXGBE_DMATXCTL_TE;
3148                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3149                 /* Disable arbiter to set MTQC */
3150                 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3151                 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3152                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3153                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
3154                 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3155                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3156         }
3157
3158         return;
3159 }
3160
3161 /*********************************************************************
3162  *
3163  *  Free all transmit rings.
3164  *
3165  **********************************************************************/
3166 static void
3167 ixgbe_free_transmit_structures(struct adapter *adapter)
3168 {
3169         struct tx_ring *txr = adapter->tx_rings;
3170
3171         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3172                 IXGBE_TX_LOCK(txr);
3173                 ixgbe_free_transmit_buffers(txr);
3174                 ixgbe_dma_free(adapter, &txr->txdma);
3175                 IXGBE_TX_UNLOCK(txr);
3176                 IXGBE_TX_LOCK_DESTROY(txr);
3177         }
3178         free(adapter->tx_rings, M_DEVBUF);
3179 }
3180
3181 /*********************************************************************
3182  *
3183  *  Free transmit ring related data structures.
3184  *
3185  **********************************************************************/
3186 static void
3187 ixgbe_free_transmit_buffers(struct tx_ring *txr)
3188 {
3189         struct adapter *adapter = txr->adapter;
3190         struct ixgbe_tx_buf *tx_buffer;
3191         int             i;
3192
3193         INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
3194
3195         if (txr->tx_buffers == NULL)
3196                 return;
3197
3198         tx_buffer = txr->tx_buffers;
3199         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3200                 if (tx_buffer->m_head != NULL) {
3201                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3202                             BUS_DMASYNC_POSTWRITE);
3203                         bus_dmamap_unload(txr->txtag,
3204                             tx_buffer->map);
3205                         m_freem(tx_buffer->m_head);
3206                         tx_buffer->m_head = NULL;
3207                         if (tx_buffer->map != NULL) {
3208                                 bus_dmamap_destroy(txr->txtag,
3209                                     tx_buffer->map);
3210                                 tx_buffer->map = NULL;
3211                         }
3212                 } else if (tx_buffer->map != NULL) {
3213                         bus_dmamap_unload(txr->txtag,
3214                             tx_buffer->map);
3215                         bus_dmamap_destroy(txr->txtag,
3216                             tx_buffer->map);
3217                         tx_buffer->map = NULL;
3218                 }
3219         }
3220 #ifdef IXGBE_LEGACY_TX
3221         if (txr->br != NULL)
3222                 buf_ring_free(txr->br, M_DEVBUF);
3223 #endif
3224         if (txr->tx_buffers != NULL) {
3225                 free(txr->tx_buffers, M_DEVBUF);
3226                 txr->tx_buffers = NULL;
3227         }
3228         if (txr->txtag != NULL) {
3229                 bus_dma_tag_destroy(txr->txtag);
3230                 txr->txtag = NULL;
3231         }
3232         return;
3233 }
3234
3235 /*********************************************************************
3236  *
3237  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3238  *
3239  **********************************************************************/
3240
3241 static int
3242 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3243     u32 *cmd_type_len, u32 *olinfo_status)
3244 {
3245         struct ixgbe_adv_tx_context_desc *TXD;
3246         struct ether_vlan_header *eh;
3247         struct ip *ip;
3248         struct ip6_hdr *ip6;
3249         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3250         int     ehdrlen, ip_hlen = 0;
3251         u16     etype;
3252         u8      ipproto = 0;
3253         int     offload = TRUE;
3254         int     ctxd = txr->next_avail_desc;
3255         u16     vtag = 0;
3256
3257         /* First check if TSO is to be used */
3258         if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3259                 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3260
3261         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3262                 offload = FALSE;
3263
3264         /* Indicate the whole packet as payload when not doing TSO */
3265         *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
3266
3267         /* Now ready a context descriptor */
3268         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3269
3270         /*
3271         ** In advanced descriptors the vlan tag must 
3272         ** be placed into the context descriptor. Hence
3273         ** we need to make one even if not doing offloads.
3274         */
3275         if (mp->m_flags & M_VLANTAG) {
3276                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3277                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3278         } else if (offload == FALSE) /* ... no offload to do */
3279                 return (0);
3280
3281         /*
3282          * Determine where frame payload starts.
3283          * Jump over vlan headers if already present,
3284          * helpful for QinQ too.
3285          */
3286         eh = mtod(mp, struct ether_vlan_header *);
3287         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3288                 etype = ntohs(eh->evl_proto);
3289                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3290         } else {
3291                 etype = ntohs(eh->evl_encap_proto);
3292                 ehdrlen = ETHER_HDR_LEN;
3293         }
3294
3295         /* Set the ether header length */
3296         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3297
3298         switch (etype) {
3299                 case ETHERTYPE_IP:
3300                         ip = (struct ip *)(mp->m_data + ehdrlen);
3301                         ip_hlen = ip->ip_hl << 2;
3302                         ipproto = ip->ip_p;
3303                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3304                         break;
3305                 case ETHERTYPE_IPV6:
3306                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3307                         ip_hlen = sizeof(struct ip6_hdr);
3308                         /* XXX-BZ this will go badly in case of ext hdrs. */
3309                         ipproto = ip6->ip6_nxt;
3310                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3311                         break;
3312                 default:
3313                         offload = FALSE;
3314                         break;
3315         }
3316
3317         vlan_macip_lens |= ip_hlen;
3318         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3319
3320         switch (ipproto) {
3321                 case IPPROTO_TCP:
3322                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3323                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3324                         break;
3325
3326                 case IPPROTO_UDP:
3327                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3328                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
3329                         break;
3330
3331 #if __FreeBSD_version >= 800000
3332                 case IPPROTO_SCTP:
3333                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3334                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
3335                         break;
3336 #endif
3337                 default:
3338                         offload = FALSE;
3339                         break;
3340         }
3341
3342         if (offload) /* For the TX descriptor setup */
3343                 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3344
3345         /* Now copy bits into descriptor */
3346         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3347         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3348         TXD->seqnum_seed = htole32(0);
3349         TXD->mss_l4len_idx = htole32(0);
3350
3351         /* We've consumed the first desc, adjust counters */
3352         if (++ctxd == txr->num_desc)
3353                 ctxd = 0;
3354         txr->next_avail_desc = ctxd;
3355         --txr->tx_avail;
3356
3357         return (0);
3358 }
3359
3360 /**********************************************************************
3361  *
3362  *  Setup work for hardware segmentation offload (TSO) on
3363  *  adapters using advanced tx descriptors
3364  *
3365  **********************************************************************/
3366 static int
3367 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3368     u32 *cmd_type_len, u32 *olinfo_status)
3369 {
3370         struct ixgbe_adv_tx_context_desc *TXD;
3371         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3372         u32 mss_l4len_idx = 0, paylen;
3373         u16 vtag = 0, eh_type;
3374         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3375         struct ether_vlan_header *eh;
3376 #ifdef INET6
3377         struct ip6_hdr *ip6;
3378 #endif
3379 #ifdef INET
3380         struct ip *ip;
3381 #endif
3382         struct tcphdr *th;
3383
3384
3385         /*
3386          * Determine where frame payload starts.
3387          * Jump over vlan headers if already present
3388          */
3389         eh = mtod(mp, struct ether_vlan_header *);
3390         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3391                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3392                 eh_type = eh->evl_proto;
3393         } else {
3394                 ehdrlen = ETHER_HDR_LEN;
3395                 eh_type = eh->evl_encap_proto;
3396         }
3397
3398         switch (ntohs(eh_type)) {
3399 #ifdef INET6
3400         case ETHERTYPE_IPV6:
3401                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3402                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3403                 if (ip6->ip6_nxt != IPPROTO_TCP)
3404                         return (ENXIO);
3405                 ip_hlen = sizeof(struct ip6_hdr);
3406                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3407                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3408                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3409                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3410                 break;
3411 #endif
3412 #ifdef INET
3413         case ETHERTYPE_IP:
3414                 ip = (struct ip *)(mp->m_data + ehdrlen);
3415                 if (ip->ip_p != IPPROTO_TCP)
3416                         return (ENXIO);
3417                 ip->ip_sum = 0;
3418                 ip_hlen = ip->ip_hl << 2;
3419                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3420                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3421                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3422                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3423                 /* Tell transmit desc to also do IPv4 checksum. */
3424                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3425                 break;
3426 #endif
3427         default:
3428                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3429                     __func__, ntohs(eh_type));
3430                 break;
3431         }
3432
3433         ctxd = txr->next_avail_desc;
3434         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3435
3436         tcp_hlen = th->th_off << 2;
3437
3438         /* This is used in the transmit desc in encap */
3439         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3440
3441         /* VLAN MACLEN IPLEN */
3442         if (mp->m_flags & M_VLANTAG) {
3443                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3444                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3445         }
3446
3447         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3448         vlan_macip_lens |= ip_hlen;
3449         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3450
3451         /* ADV DTYPE TUCMD */
3452         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3453         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3454         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3455
3456         /* MSS L4LEN IDX */
3457         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
3458         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
3459         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3460
3461         TXD->seqnum_seed = htole32(0);
3462
3463         if (++ctxd == txr->num_desc)
3464                 ctxd = 0;
3465
3466         txr->tx_avail--;
3467         txr->next_avail_desc = ctxd;
3468         *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
3469         *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3470         *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
3471         ++txr->tso_tx;
3472         return (0);
3473 }
3474
3475 #ifdef IXGBE_FDIR
3476 /*
3477 ** This routine parses packet headers so that Flow
3478 ** Director can make a hashed filter table entry 
3479 ** allowing traffic flows to be identified and kept
3480 ** on the same cpu.  This would be a performance
3481 ** hit, but we only do it at IXGBE_FDIR_RATE of
3482 ** packets.
3483 */
3484 static void
3485 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
3486 {
3487         struct adapter                  *adapter = txr->adapter;
3488         struct ix_queue                 *que;
3489         struct ip                       *ip;
3490         struct tcphdr                   *th;
3491         struct udphdr                   *uh;
3492         struct ether_vlan_header        *eh;
3493         union ixgbe_atr_hash_dword      input = {.dword = 0}; 
3494         union ixgbe_atr_hash_dword      common = {.dword = 0}; 
3495         int                             ehdrlen, ip_hlen;
3496         u16                             etype;
3497
3498         eh = mtod(mp, struct ether_vlan_header *);
3499         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3500                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3501                 etype = eh->evl_proto;
3502         } else {
3503                 ehdrlen = ETHER_HDR_LEN;
3504                 etype = eh->evl_encap_proto;
3505         }
3506
3507         /* Only handling IPv4 */
3508         if (etype != htons(ETHERTYPE_IP))
3509                 return;
3510
3511         ip = (struct ip *)(mp->m_data + ehdrlen);
3512         ip_hlen = ip->ip_hl << 2;
3513
3514         /* check if we're UDP or TCP */
3515         switch (ip->ip_p) {
3516         case IPPROTO_TCP:
3517                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3518                 /* src and dst are inverted */
3519                 common.port.dst ^= th->th_sport;
3520                 common.port.src ^= th->th_dport;
3521                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
3522                 break;
3523         case IPPROTO_UDP:
3524                 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
3525                 /* src and dst are inverted */
3526                 common.port.dst ^= uh->uh_sport;
3527                 common.port.src ^= uh->uh_dport;
3528                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
3529                 break;
3530         default:
3531                 return;
3532         }
3533
3534         input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
3535         if (mp->m_pkthdr.ether_vtag)
3536                 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
3537         else
3538                 common.flex_bytes ^= etype;
3539         common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
3540
3541         que = &adapter->queues[txr->me];
3542         /*
3543         ** This assumes the Rx queue and Tx
3544         ** queue are bound to the same CPU
3545         */
3546         ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
3547             input, common, que->msix);
3548 }
3549 #endif /* IXGBE_FDIR */
3550
3551 /**********************************************************************
3552  *
3553  *  Examine each tx_buffer in the used queue. If the hardware is done
3554  *  processing the packet then free associated resources. The
3555  *  tx_buffer is put back on the free queue.
3556  *
3557  **********************************************************************/
3558 static void
3559 ixgbe_txeof(struct tx_ring *txr)
3560 {
3561         struct adapter          *adapter = txr->adapter;
3562         struct ifnet            *ifp = adapter->ifp;
3563         u32                     work, processed = 0;
3564         u16                     limit = txr->process_limit;
3565         struct ixgbe_tx_buf     *buf;
3566         union ixgbe_adv_tx_desc *txd;
3567
3568         mtx_assert(&txr->tx_mtx, MA_OWNED);
3569
3570 #ifdef DEV_NETMAP
3571         if (ifp->if_capenable & IFCAP_NETMAP) {
3572                 struct netmap_adapter *na = NA(ifp);
3573                 struct netmap_kring *kring = &na->tx_rings[txr->me];
3574                 txd = txr->tx_base;
3575                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3576                     BUS_DMASYNC_POSTREAD);
3577                 /*
3578                  * In netmap mode, all the work is done in the context
3579                  * of the client thread. Interrupt handlers only wake up
3580                  * clients, which may be sleeping on individual rings
3581                  * or on a global resource for all rings.
3582                  * To implement tx interrupt mitigation, we wake up the client
3583                  * thread roughly every half ring, even if the NIC interrupts
3584                  * more frequently. This is implemented as follows:
3585                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
3586                  *   the slot that should wake up the thread (nkr_num_slots
3587                  *   means the user thread should not be woken up);
3588                  * - the driver ignores tx interrupts unless netmap_mitigate=0
3589                  *   or the slot has the DD bit set.
3590                  *
3591                  * When the driver has separate locks, we need to
3592                  * release and re-acquire txlock to avoid deadlocks.
3593                  * XXX see if we can find a better way.
3594                  */
3595                 if (!netmap_mitigate ||
3596                     (kring->nr_kflags < kring->nkr_num_slots &&
3597                     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
3598                         netmap_tx_irq(ifp, txr->me |
3599                             (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT));
3600                 }
3601                 return;
3602         }
3603 #endif /* DEV_NETMAP */
3604
3605         if (txr->tx_avail == txr->num_desc) {
3606                 txr->queue_status = IXGBE_QUEUE_IDLE;
3607                 return;
3608         }
3609
3610         /* Get work starting point */
3611         work = txr->next_to_clean;
3612         buf = &txr->tx_buffers[work];
3613         txd = &txr->tx_base[work];
3614         work -= txr->num_desc; /* The distance to ring end */
3615         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3616             BUS_DMASYNC_POSTREAD);
3617
3618         do {
3619                 union ixgbe_adv_tx_desc *eop= buf->eop;
3620                 if (eop == NULL) /* No work */
3621                         break;
3622
3623                 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
3624                         break;  /* I/O not complete */
3625
3626                 if (buf->m_head) {
3627                         txr->bytes +=
3628                             buf->m_head->m_pkthdr.len;
3629                         bus_dmamap_sync(txr->txtag,
3630                             buf->map,
3631                             BUS_DMASYNC_POSTWRITE);
3632                         bus_dmamap_unload(txr->txtag,
3633                             buf->map);
3634                         m_freem(buf->m_head);
3635                         buf->m_head = NULL;
3636                         buf->map = NULL;
3637                 }
3638                 buf->eop = NULL;
3639                 ++txr->tx_avail;
3640
3641                 /* We clean the range if multi segment */
3642                 while (txd != eop) {
3643                         ++txd;
3644                         ++buf;
3645                         ++work;
3646                         /* wrap the ring? */
3647                         if (__predict_false(!work)) {
3648                                 work -= txr->num_desc;
3649                                 buf = txr->tx_buffers;
3650                                 txd = txr->tx_base;
3651                         }
3652                         if (buf->m_head) {
3653                                 txr->bytes +=
3654                                     buf->m_head->m_pkthdr.len;
3655                                 bus_dmamap_sync(txr->txtag,
3656                                     buf->map,
3657                                     BUS_DMASYNC_POSTWRITE);
3658                                 bus_dmamap_unload(txr->txtag,
3659                                     buf->map);
3660                                 m_freem(buf->m_head);
3661                                 buf->m_head = NULL;
3662                                 buf->map = NULL;
3663                         }
3664                         ++txr->tx_avail;
3665                         buf->eop = NULL;
3666
3667                 }
3668                 ++txr->packets;
3669                 ++processed;
3670                 ++ifp->if_opackets;
3671                 txr->watchdog_time = ticks;
3672
3673                 /* Try the next packet */
3674                 ++txd;
3675                 ++buf;
3676                 ++work;
3677                 /* reset with a wrap */
3678                 if (__predict_false(!work)) {
3679                         work -= txr->num_desc;
3680                         buf = txr->tx_buffers;
3681                         txd = txr->tx_base;
3682                 }
3683                 prefetch(txd);
3684         } while (__predict_true(--limit));
3685
3686         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3687             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3688
3689         work += txr->num_desc;
3690         txr->next_to_clean = work;
3691
3692         /*
3693         ** Watchdog calculation, we know there's
3694         ** work outstanding or the first return
3695         ** would have been taken, so none processed
3696         ** for too long indicates a hang.
3697         */
3698         if ((!processed) && ((ticks - txr->watchdog_time) > IXGBE_WATCHDOG))
3699                 txr->queue_status = IXGBE_QUEUE_HUNG;
3700
3701         if (txr->tx_avail == txr->num_desc)
3702                 txr->queue_status = IXGBE_QUEUE_IDLE;
3703
3704         return;
3705 }
3706
3707 /*********************************************************************
3708  *
3709  *  Refresh mbuf buffers for RX descriptor rings
3710  *   - now keeps its own state so discards due to resource
3711  *     exhaustion are unnecessary, if an mbuf cannot be obtained
3712  *     it just returns, keeping its placeholder, thus it can simply
3713  *     be recalled to try again.
3714  *
3715  **********************************************************************/
3716 static void
3717 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
3718 {
3719         struct adapter          *adapter = rxr->adapter;
3720         bus_dma_segment_t       seg[1];
3721         struct ixgbe_rx_buf     *rxbuf;
3722         struct mbuf             *mp;
3723         int                     i, j, nsegs, error;
3724         bool                    refreshed = FALSE;
3725
3726         i = j = rxr->next_to_refresh;
3727         /* Control the loop with one beyond */
3728         if (++j == rxr->num_desc)
3729                 j = 0;
3730
3731         while (j != limit) {
3732                 rxbuf = &rxr->rx_buffers[i];
3733                 if (rxbuf->buf == NULL) {
3734                         mp = m_getjcl(M_NOWAIT, MT_DATA,
3735                             M_PKTHDR, rxr->mbuf_sz);
3736                         if (mp == NULL)
3737                                 goto update;
3738                         if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3739                                 m_adj(mp, ETHER_ALIGN);
3740                 } else
3741                         mp = rxbuf->buf;
3742
3743                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
3744
3745                 /* If we're dealing with an mbuf that was copied rather
3746                  * than replaced, there's no need to go through busdma.
3747                  */
3748                 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
3749                         /* Get the memory mapping */
3750                         error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3751                             rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
3752                         if (error != 0) {
3753                                 printf("Refresh mbufs: payload dmamap load"
3754                                     " failure - %d\n", error);
3755                                 m_free(mp);
3756                                 rxbuf->buf = NULL;
3757                                 goto update;
3758                         }
3759                         rxbuf->buf = mp;
3760                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3761                             BUS_DMASYNC_PREREAD);
3762                         rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
3763                             htole64(seg[0].ds_addr);
3764                 } else {
3765                         rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
3766                         rxbuf->flags &= ~IXGBE_RX_COPY;
3767                 }
3768
3769                 refreshed = TRUE;
3770                 /* Next is precalculated */
3771                 i = j;
3772                 rxr->next_to_refresh = i;
3773                 if (++j == rxr->num_desc)
3774                         j = 0;
3775         }
3776 update:
3777         if (refreshed) /* Update hardware tail index */
3778                 IXGBE_WRITE_REG(&adapter->hw,
3779                     IXGBE_RDT(rxr->me), rxr->next_to_refresh);
3780         return;
3781 }
3782
3783 /*********************************************************************
3784  *
3785  *  Allocate memory for rx_buffer structures. Since we use one
3786  *  rx_buffer per received packet, the maximum number of rx_buffer's
3787  *  that we'll need is equal to the number of receive descriptors
3788  *  that we've allocated.
3789  *
3790  **********************************************************************/
3791 static int
3792 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
3793 {
3794         struct  adapter         *adapter = rxr->adapter;
3795         device_t                dev = adapter->dev;
3796         struct ixgbe_rx_buf     *rxbuf;
3797         int                     i, bsize, error;
3798
3799         bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
3800         if (!(rxr->rx_buffers =
3801             (struct ixgbe_rx_buf *) malloc(bsize,
3802             M_DEVBUF, M_NOWAIT | M_ZERO))) {
3803                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3804                 error = ENOMEM;
3805                 goto fail;
3806         }
3807
3808         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),   /* parent */
3809                                    1, 0,        /* alignment, bounds */
3810                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3811                                    BUS_SPACE_MAXADDR,   /* highaddr */
3812                                    NULL, NULL,          /* filter, filterarg */
3813                                    MJUM16BYTES,         /* maxsize */
3814                                    1,                   /* nsegments */
3815                                    MJUM16BYTES,         /* maxsegsize */
3816                                    0,                   /* flags */
3817                                    NULL,                /* lockfunc */
3818                                    NULL,                /* lockfuncarg */
3819                                    &rxr->ptag))) {
3820                 device_printf(dev, "Unable to create RX DMA tag\n");
3821                 goto fail;
3822         }
3823
3824         for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
3825                 rxbuf = &rxr->rx_buffers[i];
3826                 error = bus_dmamap_create(rxr->ptag,
3827                     BUS_DMA_NOWAIT, &rxbuf->pmap);
3828                 if (error) {
3829                         device_printf(dev, "Unable to create RX dma map\n");
3830                         goto fail;
3831                 }
3832         }
3833
3834         return (0);
3835
3836 fail:
3837         /* Frees all, but can handle partial completion */
3838         ixgbe_free_receive_structures(adapter);
3839         return (error);
3840 }
3841
3842 /*
3843 ** Used to detect a descriptor that has
3844 ** been merged by Hardware RSC.
3845 */
3846 static inline u32
3847 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
3848 {
3849         return (le32toh(rx->wb.lower.lo_dword.data) &
3850             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
3851 }
3852
3853 /*********************************************************************
3854  *
3855  *  Initialize Hardware RSC (LRO) feature on 82599
3856  *  for an RX ring, this is toggled by the LRO capability
3857  *  even though it is transparent to the stack.
3858  *
3859  *  NOTE: since this HW feature only works with IPV4 and 
3860  *        our testing has shown soft LRO to be as effective
3861  *        I have decided to disable this by default.
3862  *
3863  **********************************************************************/
3864 static void
3865 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
3866 {
3867         struct  adapter         *adapter = rxr->adapter;
3868         struct  ixgbe_hw        *hw = &adapter->hw;
3869         u32                     rscctrl, rdrxctl;
3870
3871         /* If turning LRO/RSC off we need to disable it */
3872         if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
3873                 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
3874                 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
3875                 return;
3876         }
3877
3878         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
3879         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
3880 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
3881         if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
3882 #endif /* DEV_NETMAP */
3883         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
3884         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
3885         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
3886
3887         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
3888         rscctrl |= IXGBE_RSCCTL_RSCEN;
3889         /*
3890         ** Limit the total number of descriptors that
3891         ** can be combined, so it does not exceed 64K
3892         */
3893         if (rxr->mbuf_sz == MCLBYTES)
3894                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
3895         else if (rxr->mbuf_sz == MJUMPAGESIZE)
3896                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
3897         else if (rxr->mbuf_sz == MJUM9BYTES)
3898                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
3899         else  /* Using 16K cluster */
3900                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
3901
3902         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
3903
3904         /* Enable TCP header recognition */
3905         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
3906             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
3907             IXGBE_PSRTYPE_TCPHDR));
3908
3909         /* Disable RSC for ACK packets */
3910         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
3911             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
3912
3913         rxr->hw_rsc = TRUE;
3914 }
3915
3916
3917 static void     
3918 ixgbe_free_receive_ring(struct rx_ring *rxr)
3919
3920         struct ixgbe_rx_buf       *rxbuf;
3921         int i;
3922
3923         for (i = 0; i < rxr->num_desc; i++) {
3924                 rxbuf = &rxr->rx_buffers[i];
3925                 if (rxbuf->buf != NULL) {
3926                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3927                             BUS_DMASYNC_POSTREAD);
3928                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3929                         rxbuf->buf->m_flags |= M_PKTHDR;
3930                         m_freem(rxbuf->buf);
3931                         rxbuf->buf = NULL;
3932                 }
3933         }
3934 }
3935
3936
3937 /*********************************************************************
3938  *
3939  *  Initialize a receive ring and its buffers.
3940  *
3941  **********************************************************************/
3942 static int
3943 ixgbe_setup_receive_ring(struct rx_ring *rxr)
3944 {
3945         struct  adapter         *adapter;
3946         struct ifnet            *ifp;
3947         device_t                dev;
3948         struct ixgbe_rx_buf     *rxbuf;
3949         bus_dma_segment_t       seg[1];
3950         struct lro_ctrl         *lro = &rxr->lro;
3951         int                     rsize, nsegs, error = 0;
3952 #ifdef DEV_NETMAP
3953         struct netmap_adapter *na = NA(rxr->adapter->ifp);
3954         struct netmap_slot *slot;
3955 #endif /* DEV_NETMAP */
3956
3957         adapter = rxr->adapter;
3958         ifp = adapter->ifp;
3959         dev = adapter->dev;
3960
3961         /* Clear the ring contents */
3962         IXGBE_RX_LOCK(rxr);
3963 #ifdef DEV_NETMAP
3964         /* same as in ixgbe_setup_transmit_ring() */
3965         slot = netmap_reset(na, NR_RX, rxr->me, 0);
3966 #endif /* DEV_NETMAP */
3967         rsize = roundup2(adapter->num_rx_desc *
3968             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
3969         bzero((void *)rxr->rx_base, rsize);
3970         /* Cache the size */
3971         rxr->mbuf_sz = adapter->rx_mbuf_sz;
3972
3973         /* Free current RX buffer structs and their mbufs */
3974         ixgbe_free_receive_ring(rxr);
3975
3976         /* Now replenish the mbufs */
3977         for (int j = 0; j != rxr->num_desc; ++j) {
3978                 struct mbuf     *mp;
3979
3980                 rxbuf = &rxr->rx_buffers[j];
3981 #ifdef DEV_NETMAP
3982                 /*
3983                  * In netmap mode, fill the map and set the buffer
3984                  * address in the NIC ring, considering the offset
3985                  * between the netmap and NIC rings (see comment in
3986                  * ixgbe_setup_transmit_ring() ). No need to allocate
3987                  * an mbuf, so end the block with a continue;
3988                  */
3989                 if (slot) {
3990                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
3991                         uint64_t paddr;
3992                         void *addr;
3993
3994                         addr = PNMB(slot + sj, &paddr);
3995                         netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
3996                         /* Update descriptor and the cached value */
3997                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
3998                         rxbuf->addr = htole64(paddr);
3999                         continue;
4000                 }
4001 #endif /* DEV_NETMAP */
4002                 rxbuf->flags = 0; 
4003                 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
4004                     M_PKTHDR, adapter->rx_mbuf_sz);
4005                 if (rxbuf->buf == NULL) {
4006                         error = ENOBUFS;
4007                         goto fail;
4008                 }
4009                 mp = rxbuf->buf;
4010                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
4011                 /* Get the memory mapping */
4012                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4013                     rxbuf->pmap, mp, seg,
4014                     &nsegs, BUS_DMA_NOWAIT);
4015                 if (error != 0)
4016                         goto fail;
4017                 bus_dmamap_sync(rxr->ptag,
4018                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4019                 /* Update the descriptor and the cached value */
4020                 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
4021                 rxbuf->addr = htole64(seg[0].ds_addr);
4022         }
4023
4024
4025         /* Setup our descriptor indices */
4026         rxr->next_to_check = 0;
4027         rxr->next_to_refresh = 0;
4028         rxr->lro_enabled = FALSE;
4029         rxr->rx_copies = 0;
4030         rxr->rx_bytes = 0;
4031         rxr->discard = FALSE;
4032         rxr->vtag_strip = FALSE;
4033
4034         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4035             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4036
4037         /*
4038         ** Now set up the LRO interface:
4039         */
4040         if (ixgbe_rsc_enable)
4041                 ixgbe_setup_hw_rsc(rxr);
4042         else if (ifp->if_capenable & IFCAP_LRO) {
4043                 int err = tcp_lro_init(lro);
4044                 if (err) {
4045                         device_printf(dev, "LRO Initialization failed!\n");
4046                         goto fail;
4047                 }
4048                 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
4049                 rxr->lro_enabled = TRUE;
4050                 lro->ifp = adapter->ifp;
4051         }
4052
4053         IXGBE_RX_UNLOCK(rxr);
4054         return (0);
4055
4056 fail:
4057         ixgbe_free_receive_ring(rxr);
4058         IXGBE_RX_UNLOCK(rxr);
4059         return (error);
4060 }
4061
4062 /*********************************************************************
4063  *
4064  *  Initialize all receive rings.
4065  *
4066  **********************************************************************/
4067 static int
4068 ixgbe_setup_receive_structures(struct adapter *adapter)
4069 {
4070         struct rx_ring *rxr = adapter->rx_rings;
4071         int j;
4072
4073         for (j = 0; j < adapter->num_queues; j++, rxr++)
4074                 if (ixgbe_setup_receive_ring(rxr))
4075                         goto fail;
4076
4077         return (0);
4078 fail:
4079         /*
4080          * Free RX buffers allocated so far, we will only handle
4081          * the rings that completed, the failing case will have
4082          * cleaned up for itself. 'j' failed, so its the terminus.
4083          */
4084         for (int i = 0; i < j; ++i) {
4085                 rxr = &adapter->rx_rings[i];
4086                 ixgbe_free_receive_ring(rxr);
4087         }
4088
4089         return (ENOBUFS);
4090 }
4091
4092 /*********************************************************************
4093  *
4094  *  Setup receive registers and features.
4095  *
4096  **********************************************************************/
4097 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
4098
4099 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
4100         
4101 static void
4102 ixgbe_initialize_receive_units(struct adapter *adapter)
4103 {
4104         struct  rx_ring *rxr = adapter->rx_rings;
4105         struct ixgbe_hw *hw = &adapter->hw;
4106         struct ifnet   *ifp = adapter->ifp;
4107         u32             bufsz, rxctrl, fctrl, srrctl, rxcsum;
4108         u32             reta, mrqc = 0, hlreg, random[10];
4109
4110
4111         /*
4112          * Make sure receives are disabled while
4113          * setting up the descriptor ring
4114          */
4115         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4116         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
4117             rxctrl & ~IXGBE_RXCTRL_RXEN);
4118
4119         /* Enable broadcasts */
4120         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4121         fctrl |= IXGBE_FCTRL_BAM;
4122         fctrl |= IXGBE_FCTRL_DPF;
4123         fctrl |= IXGBE_FCTRL_PMCF;
4124         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4125
4126         /* Set for Jumbo Frames? */
4127         hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4128         if (ifp->if_mtu > ETHERMTU)
4129                 hlreg |= IXGBE_HLREG0_JUMBOEN;
4130         else
4131                 hlreg &= ~IXGBE_HLREG0_JUMBOEN;
4132 #ifdef DEV_NETMAP
4133         /* crcstrip is conditional in netmap (in RDRXCTL too ?) */
4134         if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
4135                 hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
4136         else
4137                 hlreg |= IXGBE_HLREG0_RXCRCSTRP;
4138 #endif /* DEV_NETMAP */
4139         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
4140
4141         bufsz = (adapter->rx_mbuf_sz +
4142             BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
4143
4144         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4145                 u64 rdba = rxr->rxdma.dma_paddr;
4146
4147                 /* Setup the Base and Length of the Rx Descriptor Ring */
4148                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
4149                                (rdba & 0x00000000ffffffffULL));
4150                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
4151                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
4152                     adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4153
4154                 /* Set up the SRRCTL register */
4155                 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
4156                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4157                 srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
4158                 srrctl |= bufsz;
4159                 srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4160                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
4161
4162                 /* Setup the HW Rx Head and Tail Descriptor Pointers */
4163                 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
4164                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
4165
4166                 /* Set the processing limit */
4167                 rxr->process_limit = ixgbe_rx_process_limit;
4168         }
4169
4170         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
4171                 u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
4172                               IXGBE_PSRTYPE_UDPHDR |
4173                               IXGBE_PSRTYPE_IPV4HDR |
4174                               IXGBE_PSRTYPE_IPV6HDR;
4175                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
4176         }
4177
4178         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4179
4180         /* Setup RSS */
4181         if (adapter->num_queues > 1) {
4182                 int i, j;
4183                 reta = 0;
4184
4185                 /* set up random bits */
4186                 arc4rand(&random, sizeof(random), 0);
4187
4188                 /* Set up the redirection table */
4189                 for (i = 0, j = 0; i < 128; i++, j++) {
4190                         if (j == adapter->num_queues) j = 0;
4191                         reta = (reta << 8) | (j * 0x11);
4192                         if ((i & 3) == 3)
4193                                 IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
4194                 }
4195
4196                 /* Now fill our hash function seeds */
4197                 for (int i = 0; i < 10; i++)
4198                         IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), random[i]);
4199
4200                 /* Perform hash on these packet types */
4201                 mrqc = IXGBE_MRQC_RSSEN
4202                      | IXGBE_MRQC_RSS_FIELD_IPV4
4203                      | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
4204                      | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
4205                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
4206                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX
4207                      | IXGBE_MRQC_RSS_FIELD_IPV6
4208                      | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
4209                      | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
4210                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
4211                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4212
4213                 /* RSS and RX IPP Checksum are mutually exclusive */
4214                 rxcsum |= IXGBE_RXCSUM_PCSD;
4215         }
4216
4217         if (ifp->if_capenable & IFCAP_RXCSUM)
4218                 rxcsum |= IXGBE_RXCSUM_PCSD;
4219
4220         if (!(rxcsum & IXGBE_RXCSUM_PCSD))
4221                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4222
4223         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4224
4225         return;
4226 }
4227
4228 /*********************************************************************
4229  *
4230  *  Free all receive rings.
4231  *
4232  **********************************************************************/
4233 static void
4234 ixgbe_free_receive_structures(struct adapter *adapter)
4235 {
4236         struct rx_ring *rxr = adapter->rx_rings;
4237
4238         INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
4239
4240         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4241                 struct lro_ctrl         *lro = &rxr->lro;
4242                 ixgbe_free_receive_buffers(rxr);
4243                 /* Free LRO memory */
4244                 tcp_lro_free(lro);
4245                 /* Free the ring memory as well */
4246                 ixgbe_dma_free(adapter, &rxr->rxdma);
4247         }
4248
4249         free(adapter->rx_rings, M_DEVBUF);
4250 }
4251
4252
4253 /*********************************************************************
4254  *
4255  *  Free receive ring data structures
4256  *
4257  **********************************************************************/
4258 static void
4259 ixgbe_free_receive_buffers(struct rx_ring *rxr)
4260 {
4261         struct adapter          *adapter = rxr->adapter;
4262         struct ixgbe_rx_buf     *rxbuf;
4263
4264         INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
4265
4266         /* Cleanup any existing buffers */
4267         if (rxr->rx_buffers != NULL) {
4268                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4269                         rxbuf = &rxr->rx_buffers[i];
4270                         if (rxbuf->buf != NULL) {
4271                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4272                                     BUS_DMASYNC_POSTREAD);
4273                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4274                                 rxbuf->buf->m_flags |= M_PKTHDR;
4275                                 m_freem(rxbuf->buf);
4276                         }
4277                         rxbuf->buf = NULL;
4278                         if (rxbuf->pmap != NULL) {
4279                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4280                                 rxbuf->pmap = NULL;
4281                         }
4282                 }
4283                 if (rxr->rx_buffers != NULL) {
4284                         free(rxr->rx_buffers, M_DEVBUF);
4285                         rxr->rx_buffers = NULL;
4286                 }
4287         }
4288
4289         if (rxr->ptag != NULL) {
4290                 bus_dma_tag_destroy(rxr->ptag);
4291                 rxr->ptag = NULL;
4292         }
4293
4294         return;
4295 }
4296
4297 static __inline void
4298 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4299 {
4300                  
4301         /*
4302          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
4303          * should be computed by hardware. Also it should not have VLAN tag in
4304          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
4305          */
4306         if (rxr->lro_enabled &&
4307             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4308             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4309             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4310             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
4311             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4312             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
4313             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4314             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4315                 /*
4316                  * Send to the stack if:
4317                  **  - LRO not enabled, or
4318                  **  - no LRO resources, or
4319                  **  - lro enqueue fails
4320                  */
4321                 if (rxr->lro.lro_cnt != 0)
4322                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4323                                 return;
4324         }
4325         IXGBE_RX_UNLOCK(rxr);
4326         (*ifp->if_input)(ifp, m);
4327         IXGBE_RX_LOCK(rxr);
4328 }
4329
4330 static __inline void
4331 ixgbe_rx_discard(struct rx_ring *rxr, int i)
4332 {
4333         struct ixgbe_rx_buf     *rbuf;
4334
4335         rbuf = &rxr->rx_buffers[i];
4336
4337         if (rbuf->fmp != NULL) {/* Partial chain ? */
4338                 rbuf->fmp->m_flags |= M_PKTHDR;
4339                 m_freem(rbuf->fmp);
4340                 rbuf->fmp = NULL;
4341         }
4342
4343         /*
4344         ** With advanced descriptors the writeback
4345         ** clobbers the buffer addrs, so its easier
4346         ** to just free the existing mbufs and take
4347         ** the normal refresh path to get new buffers
4348         ** and mapping.
4349         */
4350         if (rbuf->buf) {
4351                 m_free(rbuf->buf);
4352                 rbuf->buf = NULL;
4353         }
4354
4355         rbuf->flags = 0;
4356  
4357         return;
4358 }
4359
4360
4361 /*********************************************************************
4362  *
4363  *  This routine executes in interrupt context. It replenishes
4364  *  the mbufs in the descriptor and sends data which has been
4365  *  dma'ed into host memory to upper layer.
4366  *
4367  *  We loop at most count times if count is > 0, or until done if
4368  *  count < 0.
4369  *
4370  *  Return TRUE for more work, FALSE for all clean.
4371  *********************************************************************/
4372 static bool
4373 ixgbe_rxeof(struct ix_queue *que)
4374 {
4375         struct adapter          *adapter = que->adapter;
4376         struct rx_ring          *rxr = que->rxr;
4377         struct ifnet            *ifp = adapter->ifp;
4378         struct lro_ctrl         *lro = &rxr->lro;
4379         struct lro_entry        *queued;
4380         int                     i, nextp, processed = 0;
4381         u32                     staterr = 0;
4382         u16                     count = rxr->process_limit;
4383         union ixgbe_adv_rx_desc *cur;
4384         struct ixgbe_rx_buf     *rbuf, *nbuf;
4385
4386         IXGBE_RX_LOCK(rxr);
4387
4388 #ifdef DEV_NETMAP
4389         /* Same as the txeof routine: wakeup clients on intr. */
4390         if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4391                 return (FALSE);
4392 #endif /* DEV_NETMAP */
4393
4394         for (i = rxr->next_to_check; count != 0;) {
4395                 struct mbuf     *sendmp, *mp;
4396                 u32             rsc, ptype;
4397                 u16             len;
4398                 u16             vtag = 0;
4399                 bool            eop;
4400  
4401                 /* Sync the ring. */
4402                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4403                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4404
4405                 cur = &rxr->rx_base[i];
4406                 staterr = le32toh(cur->wb.upper.status_error);
4407
4408                 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
4409                         break;
4410                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4411                         break;
4412
4413                 count--;
4414                 sendmp = NULL;
4415                 nbuf = NULL;
4416                 rsc = 0;
4417                 cur->wb.upper.status_error = 0;
4418                 rbuf = &rxr->rx_buffers[i];
4419                 mp = rbuf->buf;
4420
4421                 len = le16toh(cur->wb.upper.length);
4422                 ptype = le32toh(cur->wb.lower.lo_dword.data) &
4423                     IXGBE_RXDADV_PKTTYPE_MASK;
4424                 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
4425
4426                 /* Make sure bad packets are discarded */
4427                 if (((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) ||
4428                     (rxr->discard)) {
4429                         rxr->rx_discarded++;
4430                         if (eop)
4431                                 rxr->discard = FALSE;
4432                         else
4433                                 rxr->discard = TRUE;
4434                         ixgbe_rx_discard(rxr, i);
4435                         goto next_desc;
4436                 }
4437
4438                 /*
4439                 ** On 82599 which supports a hardware
4440                 ** LRO (called HW RSC), packets need
4441                 ** not be fragmented across sequential
4442                 ** descriptors, rather the next descriptor
4443                 ** is indicated in bits of the descriptor.
4444                 ** This also means that we might proceses
4445                 ** more than one packet at a time, something
4446                 ** that has never been true before, it
4447                 ** required eliminating global chain pointers
4448                 ** in favor of what we are doing here.  -jfv
4449                 */
4450                 if (!eop) {
4451                         /*
4452                         ** Figure out the next descriptor
4453                         ** of this frame.
4454                         */
4455                         if (rxr->hw_rsc == TRUE) {
4456                                 rsc = ixgbe_rsc_count(cur);
4457                                 rxr->rsc_num += (rsc - 1);
4458                         }
4459                         if (rsc) { /* Get hardware index */
4460                                 nextp = ((staterr &
4461                                     IXGBE_RXDADV_NEXTP_MASK) >>
4462                                     IXGBE_RXDADV_NEXTP_SHIFT);
4463                         } else { /* Just sequential */
4464                                 nextp = i + 1;
4465                                 if (nextp == adapter->num_rx_desc)
4466                                         nextp = 0;
4467                         }
4468                         nbuf = &rxr->rx_buffers[nextp];
4469                         prefetch(nbuf);
4470                 }
4471                 /*
4472                 ** Rather than using the fmp/lmp global pointers
4473                 ** we now keep the head of a packet chain in the
4474                 ** buffer struct and pass this along from one
4475                 ** descriptor to the next, until we get EOP.
4476                 */
4477                 mp->m_len = len;
4478                 /*
4479                 ** See if there is a stored head
4480                 ** that determines what we are
4481                 */
4482                 sendmp = rbuf->fmp;
4483                 if (sendmp != NULL) {  /* secondary frag */
4484                         rbuf->buf = rbuf->fmp = NULL;
4485                         mp->m_flags &= ~M_PKTHDR;
4486                         sendmp->m_pkthdr.len += mp->m_len;
4487                 } else {
4488                         /*
4489                          * Optimize.  This might be a small packet,
4490                          * maybe just a TCP ACK.  Do a fast copy that
4491                          * is cache aligned into a new mbuf, and
4492                          * leave the old mbuf+cluster for re-use.
4493                          */
4494                         if (eop && len <= IXGBE_RX_COPY_LEN) {
4495                                 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
4496                                 if (sendmp != NULL) {
4497                                         sendmp->m_data +=
4498                                             IXGBE_RX_COPY_ALIGN;
4499                                         ixgbe_bcopy(mp->m_data,
4500                                             sendmp->m_data, len);
4501                                         sendmp->m_len = len;
4502                                         rxr->rx_copies++;
4503                                         rbuf->flags |= IXGBE_RX_COPY;
4504                                 }
4505                         }
4506                         if (sendmp == NULL) {
4507                                 rbuf->buf = rbuf->fmp = NULL;
4508                                 sendmp = mp;
4509                         }
4510
4511                         /* first desc of a non-ps chain */
4512                         sendmp->m_flags |= M_PKTHDR;
4513                         sendmp->m_pkthdr.len = mp->m_len;
4514                 }
4515                 ++processed;
4516
4517                 /* Pass the head pointer on */
4518                 if (eop == 0) {
4519                         nbuf->fmp = sendmp;
4520                         sendmp = NULL;
4521                         mp->m_next = nbuf->buf;
4522                 } else { /* Sending this frame */
4523                         sendmp->m_pkthdr.rcvif = ifp;
4524                         ifp->if_ipackets++;
4525                         rxr->rx_packets++;
4526                         /* capture data for AIM */
4527                         rxr->bytes += sendmp->m_pkthdr.len;
4528                         rxr->rx_bytes += sendmp->m_pkthdr.len;
4529                         /* Process vlan info */
4530                         if ((rxr->vtag_strip) &&
4531                             (staterr & IXGBE_RXD_STAT_VP))
4532                                 vtag = le16toh(cur->wb.upper.vlan);
4533                         if (vtag) {
4534                                 sendmp->m_pkthdr.ether_vtag = vtag;
4535                                 sendmp->m_flags |= M_VLANTAG;
4536                         }
4537                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4538                                 ixgbe_rx_checksum(staterr, sendmp, ptype);
4539 #if __FreeBSD_version >= 800000
4540                         sendmp->m_pkthdr.flowid = que->msix;
4541                         sendmp->m_flags |= M_FLOWID;
4542 #endif
4543                 }
4544 next_desc:
4545                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4546                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4547
4548                 /* Advance our pointers to the next descriptor. */
4549                 if (++i == rxr->num_desc)
4550                         i = 0;
4551
4552                 /* Now send to the stack or do LRO */
4553                 if (sendmp != NULL) {
4554                         rxr->next_to_check = i;
4555                         ixgbe_rx_input(rxr, ifp, sendmp, ptype);
4556                         i = rxr->next_to_check;
4557                 }
4558
4559                /* Every 8 descriptors we go to refresh mbufs */
4560                 if (processed == 8) {
4561                         ixgbe_refresh_mbufs(rxr, i);
4562                         processed = 0;
4563                 }
4564         }
4565
4566         /* Refresh any remaining buf structs */
4567         if (ixgbe_rx_unrefreshed(rxr))
4568                 ixgbe_refresh_mbufs(rxr, i);
4569
4570         rxr->next_to_check = i;
4571
4572         /*
4573          * Flush any outstanding LRO work
4574          */
4575         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4576                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4577                 tcp_lro_flush(lro, queued);
4578         }
4579
4580         IXGBE_RX_UNLOCK(rxr);
4581
4582         /*
4583         ** Still have cleaning to do?
4584         */
4585         if ((staterr & IXGBE_RXD_STAT_DD) != 0)
4586                 return (TRUE);
4587         else
4588                 return (FALSE);
4589 }
4590
4591
4592 /*********************************************************************
4593  *
4594  *  Verify that the hardware indicated that the checksum is valid.
4595  *  Inform the stack about the status of checksum so that stack
4596  *  doesn't spend time verifying the checksum.
4597  *
4598  *********************************************************************/
4599 static void
4600 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
4601 {
4602         u16     status = (u16) staterr;
4603         u8      errors = (u8) (staterr >> 24);
4604         bool    sctp = FALSE;
4605
4606         if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4607             (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
4608                 sctp = TRUE;
4609
4610         if (status & IXGBE_RXD_STAT_IPCS) {
4611                 if (!(errors & IXGBE_RXD_ERR_IPE)) {
4612                         /* IP Checksum Good */
4613                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4614                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4615
4616                 } else
4617                         mp->m_pkthdr.csum_flags = 0;
4618         }
4619         if (status & IXGBE_RXD_STAT_L4CS) {
4620                 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4621 #if __FreeBSD_version >= 800000
4622                 if (sctp)
4623                         type = CSUM_SCTP_VALID;
4624 #endif
4625                 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
4626                         mp->m_pkthdr.csum_flags |= type;
4627                         if (!sctp)
4628                                 mp->m_pkthdr.csum_data = htons(0xffff);
4629                 } 
4630         }
4631         return;
4632 }
4633
4634
4635 /*
4636 ** This routine is run via an vlan config EVENT,
4637 ** it enables us to use the HW Filter table since
4638 ** we can get the vlan id. This just creates the
4639 ** entry in the soft version of the VFTA, init will
4640 ** repopulate the real table.
4641 */
4642 static void
4643 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4644 {
4645         struct adapter  *adapter = ifp->if_softc;
4646         u16             index, bit;
4647
4648         if (ifp->if_softc !=  arg)   /* Not our event */
4649                 return;
4650
4651         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4652                 return;
4653
4654         IXGBE_CORE_LOCK(adapter);
4655         index = (vtag >> 5) & 0x7F;
4656         bit = vtag & 0x1F;
4657         adapter->shadow_vfta[index] |= (1 << bit);
4658         ++adapter->num_vlans;
4659         ixgbe_init_locked(adapter);
4660         IXGBE_CORE_UNLOCK(adapter);
4661 }
4662
4663 /*
4664 ** This routine is run via an vlan
4665 ** unconfig EVENT, remove our entry
4666 ** in the soft vfta.
4667 */
4668 static void
4669 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4670 {
4671         struct adapter  *adapter = ifp->if_softc;
4672         u16             index, bit;
4673
4674         if (ifp->if_softc !=  arg)
4675                 return;
4676
4677         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4678                 return;
4679
4680         IXGBE_CORE_LOCK(adapter);
4681         index = (vtag >> 5) & 0x7F;
4682         bit = vtag & 0x1F;
4683         adapter->shadow_vfta[index] &= ~(1 << bit);
4684         --adapter->num_vlans;
4685         /* Re-init to load the changes */
4686         ixgbe_init_locked(adapter);
4687         IXGBE_CORE_UNLOCK(adapter);
4688 }
4689
4690 static void
4691 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
4692 {
4693         struct ifnet    *ifp = adapter->ifp;
4694         struct ixgbe_hw *hw = &adapter->hw;
4695         struct rx_ring  *rxr;
4696         u32             ctrl;
4697
4698
4699         /*
4700         ** We get here thru init_locked, meaning
4701         ** a soft reset, this has already cleared
4702         ** the VFTA and other state, so if there
4703         ** have been no vlan's registered do nothing.
4704         */
4705         if (adapter->num_vlans == 0)
4706                 return;
4707
4708         /*
4709         ** A soft reset zero's out the VFTA, so
4710         ** we need to repopulate it now.
4711         */
4712         for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
4713                 if (adapter->shadow_vfta[i] != 0)
4714                         IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
4715                             adapter->shadow_vfta[i]);
4716
4717         ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4718         /* Enable the Filter Table if enabled */
4719         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
4720                 ctrl &= ~IXGBE_VLNCTRL_CFIEN;
4721                 ctrl |= IXGBE_VLNCTRL_VFE;
4722         }
4723         if (hw->mac.type == ixgbe_mac_82598EB)
4724                 ctrl |= IXGBE_VLNCTRL_VME;
4725         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
4726
4727         /* Setup the queues for vlans */
4728         for (int i = 0; i < adapter->num_queues; i++) {
4729                 rxr = &adapter->rx_rings[i];
4730                 /* On 82599 the VLAN enable is per/queue in RXDCTL */
4731                 if (hw->mac.type != ixgbe_mac_82598EB) {
4732                         ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
4733                         ctrl |= IXGBE_RXDCTL_VME;
4734                         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
4735                 }
4736                 rxr->vtag_strip = TRUE;
4737         }
4738 }
4739
4740 static void
4741 ixgbe_enable_intr(struct adapter *adapter)
4742 {
4743         struct ixgbe_hw *hw = &adapter->hw;
4744         struct ix_queue *que = adapter->queues;
4745         u32             mask, fwsm;
4746
4747         mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
4748         /* Enable Fan Failure detection */
4749         if (hw->device_id == IXGBE_DEV_ID_82598AT)
4750                     mask |= IXGBE_EIMS_GPI_SDP1;
4751
4752         switch (adapter->hw.mac.type) {
4753                 case ixgbe_mac_82599EB:
4754                         mask |= IXGBE_EIMS_ECC;
4755                         mask |= IXGBE_EIMS_GPI_SDP0;
4756                         mask |= IXGBE_EIMS_GPI_SDP1;
4757                         mask |= IXGBE_EIMS_GPI_SDP2;
4758 #ifdef IXGBE_FDIR
4759                         mask |= IXGBE_EIMS_FLOW_DIR;
4760 #endif
4761                         break;
4762                 case ixgbe_mac_X540:
4763                         mask |= IXGBE_EIMS_ECC;
4764                         /* Detect if Thermal Sensor is enabled */
4765                         fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
4766                         if (fwsm & IXGBE_FWSM_TS_ENABLED)
4767                                 mask |= IXGBE_EIMS_TS;
4768 #ifdef IXGBE_FDIR
4769                         mask |= IXGBE_EIMS_FLOW_DIR;
4770 #endif
4771                 /* falls through */
4772                 default:
4773                         break;
4774         }
4775
4776         IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
4777
4778         /* With RSS we use auto clear */
4779         if (adapter->msix_mem) {
4780                 mask = IXGBE_EIMS_ENABLE_MASK;
4781                 /* Don't autoclear Link */
4782                 mask &= ~IXGBE_EIMS_OTHER;
4783                 mask &= ~IXGBE_EIMS_LSC;
4784                 IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
4785         }
4786
4787         /*
4788         ** Now enable all queues, this is done separately to
4789         ** allow for handling the extended (beyond 32) MSIX
4790         ** vectors that can be used by 82599
4791         */
4792         for (int i = 0; i < adapter->num_queues; i++, que++)
4793                 ixgbe_enable_queue(adapter, que->msix);
4794
4795         IXGBE_WRITE_FLUSH(hw);
4796
4797         return;
4798 }
4799
4800 static void
4801 ixgbe_disable_intr(struct adapter *adapter)
4802 {
4803         if (adapter->msix_mem)
4804                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
4805         if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
4806                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
4807         } else {
4808                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
4809                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
4810                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
4811         }
4812         IXGBE_WRITE_FLUSH(&adapter->hw);
4813         return;
4814 }
4815
4816 u16
4817 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
4818 {
4819         u16 value;
4820
4821         value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
4822             reg, 2);
4823
4824         return (value);
4825 }
4826
4827 void
4828 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
4829 {
4830         pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
4831             reg, value, 2);
4832
4833         return;
4834 }
4835
4836 /*
4837 ** Get the width and transaction speed of
4838 ** the slot this adapter is plugged into.
4839 */
4840 static void
4841 ixgbe_get_slot_info(struct ixgbe_hw *hw)
4842 {
4843         device_t                dev = ((struct ixgbe_osdep *)hw->back)->dev;
4844         struct ixgbe_mac_info   *mac = &hw->mac;
4845         u16                     link;
4846         u32                     offset;
4847
4848         /* For most devices simply call the shared code routine */
4849         if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
4850                 ixgbe_get_bus_info(hw);
4851                 goto display;
4852         }
4853
4854         /*
4855         ** For the Quad port adapter we need to parse back
4856         ** up the PCI tree to find the speed of the expansion
4857         ** slot into which this adapter is plugged. A bit more work.
4858         */
4859         dev = device_get_parent(device_get_parent(dev));
4860 #ifdef IXGBE_DEBUG
4861         device_printf(dev, "parent pcib = %x,%x,%x\n",
4862             pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
4863 #endif
4864         dev = device_get_parent(device_get_parent(dev));
4865 #ifdef IXGBE_DEBUG
4866         device_printf(dev, "slot pcib = %x,%x,%x\n",
4867             pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
4868 #endif
4869         /* Now get the PCI Express Capabilities offset */
4870         pci_find_cap(dev, PCIY_EXPRESS, &offset);
4871         /* ...and read the Link Status Register */
4872         link = pci_read_config(dev, offset + PCIER_LINK_STA, 2);
4873         switch (link & IXGBE_PCI_LINK_WIDTH) {
4874         case IXGBE_PCI_LINK_WIDTH_1:
4875                 hw->bus.width = ixgbe_bus_width_pcie_x1;
4876                 break;
4877         case IXGBE_PCI_LINK_WIDTH_2:
4878                 hw->bus.width = ixgbe_bus_width_pcie_x2;
4879                 break;
4880         case IXGBE_PCI_LINK_WIDTH_4:
4881                 hw->bus.width = ixgbe_bus_width_pcie_x4;
4882                 break;
4883         case IXGBE_PCI_LINK_WIDTH_8:
4884                 hw->bus.width = ixgbe_bus_width_pcie_x8;
4885                 break;
4886         default:
4887                 hw->bus.width = ixgbe_bus_width_unknown;
4888                 break;
4889         }
4890
4891         switch (link & IXGBE_PCI_LINK_SPEED) {
4892         case IXGBE_PCI_LINK_SPEED_2500:
4893                 hw->bus.speed = ixgbe_bus_speed_2500;
4894                 break;
4895         case IXGBE_PCI_LINK_SPEED_5000:
4896                 hw->bus.speed = ixgbe_bus_speed_5000;
4897                 break;
4898         case IXGBE_PCI_LINK_SPEED_8000:
4899                 hw->bus.speed = ixgbe_bus_speed_8000;
4900                 break;
4901         default:
4902                 hw->bus.speed = ixgbe_bus_speed_unknown;
4903                 break;
4904         }
4905
4906         mac->ops.set_lan_id(hw);
4907
4908 display:
4909         device_printf(dev,"PCI Express Bus: Speed %s %s\n",
4910             ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s":
4911             (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s":
4912             (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s":"Unknown"),
4913             (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
4914             (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
4915             (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
4916             ("Unknown"));
4917
4918         if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) &&
4919             ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
4920             (hw->bus.speed == ixgbe_bus_speed_2500))) {
4921                 device_printf(dev, "PCI-Express bandwidth available"
4922                     " for this card\n     is not sufficient for"
4923                     " optimal performance.\n");
4924                 device_printf(dev, "For optimal performance a x8 "
4925                     "PCIE, or x4 PCIE Gen2 slot is required.\n");
4926         }
4927         if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) &&
4928             ((hw->bus.width <= ixgbe_bus_width_pcie_x8) &&
4929             (hw->bus.speed < ixgbe_bus_speed_8000))) {
4930                 device_printf(dev, "PCI-Express bandwidth available"
4931                     " for this card\n     is not sufficient for"
4932                     " optimal performance.\n");
4933                 device_printf(dev, "For optimal performance a x8 "
4934                     "PCIE Gen3 slot is required.\n");
4935         }
4936
4937         return;
4938 }
4939
4940
4941 /*
4942 ** Setup the correct IVAR register for a particular MSIX interrupt
4943 **   (yes this is all very magic and confusing :)
4944 **  - entry is the register array entry
4945 **  - vector is the MSIX vector for this queue
4946 **  - type is RX/TX/MISC
4947 */
4948 static void
4949 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
4950 {
4951         struct ixgbe_hw *hw = &adapter->hw;
4952         u32 ivar, index;
4953
4954         vector |= IXGBE_IVAR_ALLOC_VAL;
4955
4956         switch (hw->mac.type) {
4957
4958         case ixgbe_mac_82598EB:
4959                 if (type == -1)
4960                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4961                 else
4962                         entry += (type * 64);
4963                 index = (entry >> 2) & 0x1F;
4964                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4965                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4966                 ivar |= (vector << (8 * (entry & 0x3)));
4967                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
4968                 break;
4969
4970         case ixgbe_mac_82599EB:
4971         case ixgbe_mac_X540:
4972                 if (type == -1) { /* MISC IVAR */
4973                         index = (entry & 1) * 8;
4974                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4975                         ivar &= ~(0xFF << index);
4976                         ivar |= (vector << index);
4977                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4978                 } else {        /* RX/TX IVARS */
4979                         index = (16 * (entry & 1)) + (8 * type);
4980                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4981                         ivar &= ~(0xFF << index);
4982                         ivar |= (vector << index);
4983                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4984                 }
4985
4986         default:
4987                 break;
4988         }
4989 }
4990
4991 static void
4992 ixgbe_configure_ivars(struct adapter *adapter)
4993 {
4994         struct  ix_queue *que = adapter->queues;
4995         u32 newitr;
4996
4997         if (ixgbe_max_interrupt_rate > 0)
4998                 newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
4999         else
5000                 newitr = 0;
5001
5002         for (int i = 0; i < adapter->num_queues; i++, que++) {
5003                 /* First the RX queue entry */
5004                 ixgbe_set_ivar(adapter, i, que->msix, 0);
5005                 /* ... and the TX */
5006                 ixgbe_set_ivar(adapter, i, que->msix, 1);
5007                 /* Set an Initial EITR value */
5008                 IXGBE_WRITE_REG(&adapter->hw,
5009                     IXGBE_EITR(que->msix), newitr);
5010         }
5011
5012         /* For the Link interrupt */
5013         ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
5014 }
5015
5016 /*
5017 ** ixgbe_sfp_probe - called in the local timer to
5018 ** determine if a port had optics inserted.
5019 */  
5020 static bool ixgbe_sfp_probe(struct adapter *adapter)
5021 {
5022         struct ixgbe_hw *hw = &adapter->hw;
5023         device_t        dev = adapter->dev;
5024         bool            result = FALSE;
5025
5026         if ((hw->phy.type == ixgbe_phy_nl) &&
5027             (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
5028                 s32 ret = hw->phy.ops.identify_sfp(hw);
5029                 if (ret)
5030                         goto out;
5031                 ret = hw->phy.ops.reset(hw);
5032                 if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5033                         device_printf(dev,"Unsupported SFP+ module detected!");
5034                         printf(" Reload driver with supported module.\n");
5035                         adapter->sfp_probe = FALSE;
5036                         goto out;
5037                 } else
5038                         device_printf(dev,"SFP+ module detected!\n");
5039                 /* We now have supported optics */
5040                 adapter->sfp_probe = FALSE;
5041                 /* Set the optics type so system reports correctly */
5042                 ixgbe_setup_optics(adapter);
5043                 result = TRUE;
5044         }
5045 out:
5046         return (result);
5047 }
5048
5049 /*
5050 ** Tasklet handler for MSIX Link interrupts
5051 **  - do outside interrupt since it might sleep
5052 */
5053 static void
5054 ixgbe_handle_link(void *context, int pending)
5055 {
5056         struct adapter  *adapter = context;
5057
5058         ixgbe_check_link(&adapter->hw,
5059             &adapter->link_speed, &adapter->link_up, 0);
5060         ixgbe_update_link_status(adapter);
5061 }
5062
5063 /*
5064 ** Tasklet for handling SFP module interrupts
5065 */
5066 static void
5067 ixgbe_handle_mod(void *context, int pending)
5068 {
5069         struct adapter  *adapter = context;
5070         struct ixgbe_hw *hw = &adapter->hw;
5071         device_t        dev = adapter->dev;
5072         u32 err;
5073
5074         err = hw->phy.ops.identify_sfp(hw);
5075         if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5076                 device_printf(dev,
5077                     "Unsupported SFP+ module type was detected.\n");
5078                 return;
5079         }
5080         err = hw->mac.ops.setup_sfp(hw);
5081         if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5082                 device_printf(dev,
5083                     "Setup failure - unsupported SFP+ module type.\n");
5084                 return;
5085         }
5086         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
5087         return;
5088 }
5089
5090
5091 /*
5092 ** Tasklet for handling MSF (multispeed fiber) interrupts
5093 */
5094 static void
5095 ixgbe_handle_msf(void *context, int pending)
5096 {
5097         struct adapter  *adapter = context;
5098         struct ixgbe_hw *hw = &adapter->hw;
5099         u32 autoneg;
5100         bool negotiate;
5101
5102         autoneg = hw->phy.autoneg_advertised;
5103         if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
5104                 hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
5105         if (hw->mac.ops.setup_link)
5106                 hw->mac.ops.setup_link(hw, autoneg, TRUE);
5107         return;
5108 }
5109
5110 #ifdef IXGBE_FDIR
5111 /*
5112 ** Tasklet for reinitializing the Flow Director filter table
5113 */
5114 static void
5115 ixgbe_reinit_fdir(void *context, int pending)
5116 {
5117         struct adapter  *adapter = context;
5118         struct ifnet   *ifp = adapter->ifp;
5119
5120         if (adapter->fdir_reinit != 1) /* Shouldn't happen */
5121                 return;
5122         ixgbe_reinit_fdir_tables_82599(&adapter->hw);
5123         adapter->fdir_reinit = 0;
5124         /* re-enable flow director interrupts */
5125         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
5126         /* Restart the interface */
5127         ifp->if_drv_flags |= IFF_DRV_RUNNING;
5128         return;
5129 }
5130 #endif
5131
5132 /**********************************************************************
5133  *
5134  *  Update the board statistics counters.
5135  *
5136  **********************************************************************/
5137 static void
5138 ixgbe_update_stats_counters(struct adapter *adapter)
5139 {
5140         struct ifnet   *ifp = adapter->ifp;
5141         struct ixgbe_hw *hw = &adapter->hw;
5142         u32  missed_rx = 0, bprc, lxon, lxoff, total;
5143         u64  total_missed_rx = 0;
5144
5145         adapter->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
5146         adapter->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
5147         adapter->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
5148         adapter->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
5149
5150         /*
5151         ** Note: these are for the 8 possible traffic classes,
5152         **       which in current implementation is unused,
5153         **       therefore only 0 should read real data.
5154         */
5155         for (int i = 0; i < 8; i++) {
5156                 u32 mp;
5157                 mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
5158                 /* missed_rx tallies misses for the gprc workaround */
5159                 missed_rx += mp;
5160                 /* global total per queue */
5161                 adapter->stats.mpc[i] += mp;
5162                 /* Running comprehensive total for stats display */
5163                 total_missed_rx += adapter->stats.mpc[i];
5164                 if (hw->mac.type == ixgbe_mac_82598EB) {
5165                         adapter->stats.rnbc[i] +=
5166                             IXGBE_READ_REG(hw, IXGBE_RNBC(i));
5167                         adapter->stats.qbtc[i] +=
5168                             IXGBE_READ_REG(hw, IXGBE_QBTC(i));
5169                         adapter->stats.qbrc[i] +=
5170                             IXGBE_READ_REG(hw, IXGBE_QBRC(i));
5171                         adapter->stats.pxonrxc[i] +=
5172                             IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
5173                 } else
5174                         adapter->stats.pxonrxc[i] +=
5175                             IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
5176                 adapter->stats.pxontxc[i] +=
5177                     IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
5178                 adapter->stats.pxofftxc[i] +=
5179                     IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
5180                 adapter->stats.pxoffrxc[i] +=
5181                     IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
5182                 adapter->stats.pxon2offc[i] +=
5183                     IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
5184         }
5185         for (int i = 0; i < 16; i++) {
5186                 adapter->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
5187                 adapter->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
5188                 adapter->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
5189         }
5190         adapter->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
5191         adapter->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
5192         adapter->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
5193
5194         /* Hardware workaround, gprc counts missed packets */
5195         adapter->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
5196         adapter->stats.gprc -= missed_rx;
5197
5198         if (hw->mac.type != ixgbe_mac_82598EB) {
5199                 adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
5200                     ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
5201                 adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
5202                     ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
5203                 adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
5204                     ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
5205                 adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
5206                 adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
5207         } else {
5208                 adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
5209                 adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
5210                 /* 82598 only has a counter in the high register */
5211                 adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
5212                 adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
5213                 adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
5214         }
5215
5216         /*
5217          * Workaround: mprc hardware is incorrectly counting
5218          * broadcasts, so for now we subtract those.
5219          */
5220         bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
5221         adapter->stats.bprc += bprc;
5222         adapter->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
5223         if (hw->mac.type == ixgbe_mac_82598EB)
5224                 adapter->stats.mprc -= bprc;
5225
5226         adapter->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
5227         adapter->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
5228         adapter->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
5229         adapter->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
5230         adapter->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
5231         adapter->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
5232
5233         lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
5234         adapter->stats.lxontxc += lxon;
5235         lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
5236         adapter->stats.lxofftxc += lxoff;
5237         total = lxon + lxoff;
5238
5239         adapter->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
5240         adapter->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
5241         adapter->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
5242         adapter->stats.gptc -= total;
5243         adapter->stats.mptc -= total;
5244         adapter->stats.ptc64 -= total;
5245         adapter->stats.gotc -= total * ETHER_MIN_LEN;
5246
5247         adapter->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
5248         adapter->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
5249         adapter->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
5250         adapter->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
5251         adapter->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
5252         adapter->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
5253         adapter->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
5254         adapter->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
5255         adapter->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
5256         adapter->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
5257         adapter->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
5258         adapter->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
5259         adapter->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
5260         adapter->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
5261         adapter->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
5262         adapter->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
5263         adapter->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
5264         adapter->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
5265         /* Only read FCOE on 82599 */
5266         if (hw->mac.type != ixgbe_mac_82598EB) {
5267                 adapter->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
5268                 adapter->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
5269                 adapter->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
5270                 adapter->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
5271                 adapter->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
5272         }
5273
5274         /* Fill out the OS statistics structure */
5275         ifp->if_ipackets = adapter->stats.gprc;
5276         ifp->if_opackets = adapter->stats.gptc;
5277         ifp->if_ibytes = adapter->stats.gorc;
5278         ifp->if_obytes = adapter->stats.gotc;
5279         ifp->if_imcasts = adapter->stats.mprc;
5280         ifp->if_omcasts = adapter->stats.mptc;
5281         ifp->if_collisions = 0;
5282
5283         /* Rx Errors */
5284         ifp->if_iqdrops = total_missed_rx;
5285         ifp->if_ierrors = adapter->stats.crcerrs + adapter->stats.rlec;
5286 }
5287
5288 /** ixgbe_sysctl_tdh_handler - Handler function
5289  *  Retrieves the TDH value from the hardware
5290  */
5291 static int 
5292 ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS)
5293 {
5294         int error;
5295
5296         struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
5297         if (!txr) return 0;
5298
5299         unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
5300         error = sysctl_handle_int(oidp, &val, 0, req);
5301         if (error || !req->newptr)
5302                 return error;
5303         return 0;
5304 }
5305
5306 /** ixgbe_sysctl_tdt_handler - Handler function
5307  *  Retrieves the TDT value from the hardware
5308  */
5309 static int 
5310 ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS)
5311 {
5312         int error;
5313
5314         struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
5315         if (!txr) return 0;
5316
5317         unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
5318         error = sysctl_handle_int(oidp, &val, 0, req);
5319         if (error || !req->newptr)
5320                 return error;
5321         return 0;
5322 }
5323
5324 /** ixgbe_sysctl_rdh_handler - Handler function
5325  *  Retrieves the RDH value from the hardware
5326  */
5327 static int 
5328 ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS)
5329 {
5330         int error;
5331
5332         struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
5333         if (!rxr) return 0;
5334
5335         unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
5336         error = sysctl_handle_int(oidp, &val, 0, req);
5337         if (error || !req->newptr)
5338                 return error;
5339         return 0;
5340 }
5341
5342 /** ixgbe_sysctl_rdt_handler - Handler function
5343  *  Retrieves the RDT value from the hardware
5344  */
5345 static int 
5346 ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS)
5347 {
5348         int error;
5349
5350         struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
5351         if (!rxr) return 0;
5352
5353         unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
5354         error = sysctl_handle_int(oidp, &val, 0, req);
5355         if (error || !req->newptr)
5356                 return error;
5357         return 0;
5358 }
5359
5360 static int
5361 ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5362 {
5363         int error;
5364         struct ix_queue *que = ((struct ix_queue *)oidp->oid_arg1);
5365         unsigned int reg, usec, rate;
5366
5367         reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
5368         usec = ((reg & 0x0FF8) >> 3);
5369         if (usec > 0)
5370                 rate = 500000 / usec;
5371         else
5372                 rate = 0;
5373         error = sysctl_handle_int(oidp, &rate, 0, req);
5374         if (error || !req->newptr)
5375                 return error;
5376         reg &= ~0xfff; /* default, no limitation */
5377         ixgbe_max_interrupt_rate = 0;
5378         if (rate > 0 && rate < 500000) {
5379                 if (rate < 1000)
5380                         rate = 1000;
5381                 ixgbe_max_interrupt_rate = rate;
5382                 reg |= ((4000000/rate) & 0xff8 );
5383         }
5384         IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
5385         return 0;
5386 }
5387
5388 /*
5389  * Add sysctl variables, one per statistic, to the system.
5390  */
5391 static void
5392 ixgbe_add_hw_stats(struct adapter *adapter)
5393 {
5394
5395         device_t dev = adapter->dev;
5396
5397         struct tx_ring *txr = adapter->tx_rings;
5398         struct rx_ring *rxr = adapter->rx_rings;
5399
5400         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5401         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5402         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5403         struct ixgbe_hw_stats *stats = &adapter->stats;
5404
5405         struct sysctl_oid *stat_node, *queue_node;
5406         struct sysctl_oid_list *stat_list, *queue_list;
5407
5408 #define QUEUE_NAME_LEN 32
5409         char namebuf[QUEUE_NAME_LEN];
5410
5411         /* Driver Statistics */
5412         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5413                         CTLFLAG_RD, &adapter->dropped_pkts,
5414                         "Driver dropped packets");
5415         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_failed",
5416                         CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5417                         "m_defrag() failed");
5418         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events",
5419                         CTLFLAG_RD, &adapter->watchdog_events,
5420                         "Watchdog timeouts");
5421         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5422                         CTLFLAG_RD, &adapter->link_irq,
5423                         "Link MSIX IRQ Handled");
5424
5425         for (int i = 0; i < adapter->num_queues; i++, txr++) {
5426                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5427                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5428                                             CTLFLAG_RD, NULL, "Queue Name");
5429                 queue_list = SYSCTL_CHILDREN(queue_node);
5430
5431                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5432                                 CTLTYPE_UINT | CTLFLAG_RW, &adapter->queues[i],
5433                                 sizeof(&adapter->queues[i]),
5434                                 ixgbe_sysctl_interrupt_rate_handler, "IU",
5435                                 "Interrupt Rate");
5436                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs",
5437                                 CTLFLAG_RD, &(adapter->queues[i].irqs),
5438                                 "irqs on this queue");
5439                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5440                                 CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
5441                                 ixgbe_sysctl_tdh_handler, "IU",
5442                                 "Transmit Descriptor Head");
5443                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5444                                 CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
5445                                 ixgbe_sysctl_tdt_handler, "IU",
5446                                 "Transmit Descriptor Tail");
5447                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tso_tx",
5448                                 CTLFLAG_RD, &txr->tso_tx,
5449                                 "TSO");
5450                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_tx_dma_setup",
5451                                 CTLFLAG_RD, &txr->no_tx_dma_setup,
5452                                 "Driver tx dma failure in xmit");
5453                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5454                                 CTLFLAG_RD, &txr->no_desc_avail,
5455                                 "Queue No Descriptor Available");
5456                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5457                                 CTLFLAG_RD, &txr->total_packets,
5458                                 "Queue Packets Transmitted");
5459         }
5460
5461         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
5462                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5463                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
5464                                             CTLFLAG_RD, NULL, "Queue Name");
5465                 queue_list = SYSCTL_CHILDREN(queue_node);
5466
5467                 struct lro_ctrl *lro = &rxr->lro;
5468
5469                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5470                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
5471                                             CTLFLAG_RD, NULL, "Queue Name");
5472                 queue_list = SYSCTL_CHILDREN(queue_node);
5473
5474                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5475                                 CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
5476                                 ixgbe_sysctl_rdh_handler, "IU",
5477                                 "Receive Descriptor Head");
5478                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5479                                 CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
5480                                 ixgbe_sysctl_rdt_handler, "IU",
5481                                 "Receive Descriptor Tail");
5482                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5483                                 CTLFLAG_RD, &rxr->rx_packets,
5484                                 "Queue Packets Received");
5485                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5486                                 CTLFLAG_RD, &rxr->rx_bytes,
5487                                 "Queue Bytes Received");
5488                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_copies",
5489                                 CTLFLAG_RD, &rxr->rx_copies,
5490                                 "Copied RX Frames");
5491                 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
5492                                 CTLFLAG_RD, &lro->lro_queued, 0,
5493                                 "LRO Queued");
5494                 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
5495                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5496                                 "LRO Flushed");
5497         }
5498
5499         /* MAC stats get the own sub node */
5500
5501         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5502                                     CTLFLAG_RD, NULL, "MAC Statistics");
5503         stat_list = SYSCTL_CHILDREN(stat_node);
5504
5505         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5506                         CTLFLAG_RD, &stats->crcerrs,
5507                         "CRC Errors");
5508         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
5509                         CTLFLAG_RD, &stats->illerrc,
5510                         "Illegal Byte Errors");
5511         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
5512                         CTLFLAG_RD, &stats->errbc,
5513                         "Byte Errors");
5514         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
5515                         CTLFLAG_RD, &stats->mspdc,
5516                         "MAC Short Packets Discarded");
5517         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
5518                         CTLFLAG_RD, &stats->mlfc,
5519                         "MAC Local Faults");
5520         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
5521                         CTLFLAG_RD, &stats->mrfc,
5522                         "MAC Remote Faults");
5523         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
5524                         CTLFLAG_RD, &stats->rlec,
5525                         "Receive Length Errors");
5526
5527         /* Flow Control stats */
5528         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5529                         CTLFLAG_RD, &stats->lxontxc,
5530                         "Link XON Transmitted");
5531         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5532                         CTLFLAG_RD, &stats->lxonrxc,
5533                         "Link XON Received");
5534         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5535                         CTLFLAG_RD, &stats->lxofftxc,
5536                         "Link XOFF Transmitted");
5537         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5538                         CTLFLAG_RD, &stats->lxoffrxc,
5539                         "Link XOFF Received");
5540
5541         /* Packet Reception Stats */
5542         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
5543                         CTLFLAG_RD, &stats->tor, 
5544                         "Total Octets Received"); 
5545         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
5546                         CTLFLAG_RD, &stats->gorc, 
5547                         "Good Octets Received"); 
5548         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
5549                         CTLFLAG_RD, &stats->tpr,
5550                         "Total Packets Received");
5551         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
5552                         CTLFLAG_RD, &stats->gprc,
5553                         "Good Packets Received");
5554         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
5555                         CTLFLAG_RD, &stats->mprc,
5556                         "Multicast Packets Received");
5557         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
5558                         CTLFLAG_RD, &stats->bprc,
5559                         "Broadcast Packets Received");
5560         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5561                         CTLFLAG_RD, &stats->prc64,
5562                         "64 byte frames received ");
5563         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5564                         CTLFLAG_RD, &stats->prc127,
5565                         "65-127 byte frames received");
5566         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5567                         CTLFLAG_RD, &stats->prc255,
5568                         "128-255 byte frames received");
5569         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5570                         CTLFLAG_RD, &stats->prc511,
5571                         "256-511 byte frames received");
5572         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5573                         CTLFLAG_RD, &stats->prc1023,
5574                         "512-1023 byte frames received");
5575         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5576                         CTLFLAG_RD, &stats->prc1522,
5577                         "1023-1522 byte frames received");
5578         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
5579                         CTLFLAG_RD, &stats->ruc,
5580                         "Receive Undersized");
5581         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5582                         CTLFLAG_RD, &stats->rfc,
5583                         "Fragmented Packets Received ");
5584         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
5585                         CTLFLAG_RD, &stats->roc,
5586                         "Oversized Packets Received");
5587         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
5588                         CTLFLAG_RD, &stats->rjc,
5589                         "Received Jabber");
5590         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
5591                         CTLFLAG_RD, &stats->mngprc,
5592                         "Management Packets Received");
5593         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
5594                         CTLFLAG_RD, &stats->mngptc,
5595                         "Management Packets Dropped");
5596         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
5597                         CTLFLAG_RD, &stats->xec,
5598                         "Checksum Errors");
5599
5600         /* Packet Transmission Stats */
5601         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5602                         CTLFLAG_RD, &stats->gotc, 
5603                         "Good Octets Transmitted"); 
5604         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5605                         CTLFLAG_RD, &stats->tpt,
5606                         "Total Packets Transmitted");
5607         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5608                         CTLFLAG_RD, &stats->gptc,
5609                         "Good Packets Transmitted");
5610         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5611                         CTLFLAG_RD, &stats->bptc,
5612                         "Broadcast Packets Transmitted");
5613         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5614                         CTLFLAG_RD, &stats->mptc,
5615                         "Multicast Packets Transmitted");
5616         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
5617                         CTLFLAG_RD, &stats->mngptc,
5618                         "Management Packets Transmitted");
5619         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5620                         CTLFLAG_RD, &stats->ptc64,
5621                         "64 byte frames transmitted ");
5622         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5623                         CTLFLAG_RD, &stats->ptc127,
5624                         "65-127 byte frames transmitted");
5625         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5626                         CTLFLAG_RD, &stats->ptc255,
5627                         "128-255 byte frames transmitted");
5628         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5629                         CTLFLAG_RD, &stats->ptc511,
5630                         "256-511 byte frames transmitted");
5631         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5632                         CTLFLAG_RD, &stats->ptc1023,
5633                         "512-1023 byte frames transmitted");
5634         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5635                         CTLFLAG_RD, &stats->ptc1522,
5636                         "1024-1522 byte frames transmitted");
5637 }
5638
5639 /*
5640 ** Set flow control using sysctl:
5641 ** Flow control values:
5642 **      0 - off
5643 **      1 - rx pause
5644 **      2 - tx pause
5645 **      3 - full
5646 */
5647 static int
5648 ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS)
5649 {
5650         int error, last;
5651         struct adapter *adapter = (struct adapter *) arg1;
5652
5653         last = adapter->fc;
5654         error = sysctl_handle_int(oidp, &adapter->fc, 0, req);
5655         if ((error) || (req->newptr == NULL))
5656                 return (error);
5657
5658         /* Don't bother if it's not changed */
5659         if (adapter->fc == last)
5660                 return (0);
5661
5662         switch (adapter->fc) {
5663                 case ixgbe_fc_rx_pause:
5664                 case ixgbe_fc_tx_pause:
5665                 case ixgbe_fc_full:
5666                         adapter->hw.fc.requested_mode = adapter->fc;
5667                         if (adapter->num_queues > 1)
5668                                 ixgbe_disable_rx_drop(adapter);
5669                         break;
5670                 case ixgbe_fc_none:
5671                         adapter->hw.fc.requested_mode = ixgbe_fc_none;
5672                         if (adapter->num_queues > 1)
5673                                 ixgbe_enable_rx_drop(adapter);
5674                         break;
5675                 default:
5676                         adapter->fc = last;
5677                         return (EINVAL);
5678         }
5679         /* Don't autoneg if forcing a value */
5680         adapter->hw.fc.disable_fc_autoneg = TRUE;
5681         ixgbe_fc_enable(&adapter->hw);
5682         return error;
5683 }
5684
5685 /*
5686 ** Control link advertise speed:
5687 **      1 - advertise only 1G
5688 **      2 - advertise 100Mb
5689 **      3 - advertise normal
5690 */
5691 static int
5692 ixgbe_set_advertise(SYSCTL_HANDLER_ARGS)
5693 {
5694         int                     error = 0;
5695         struct adapter          *adapter;
5696         device_t                dev;
5697         struct ixgbe_hw         *hw;
5698         ixgbe_link_speed        speed, last;
5699
5700         adapter = (struct adapter *) arg1;
5701         dev = adapter->dev;
5702         hw = &adapter->hw;
5703         last = adapter->advertise;
5704
5705         error = sysctl_handle_int(oidp, &adapter->advertise, 0, req);
5706         if ((error) || (req->newptr == NULL))
5707                 return (error);
5708
5709         if (adapter->advertise == last) /* no change */
5710                 return (0);
5711
5712         if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
5713             (hw->phy.multispeed_fiber)))
5714                 return (EINVAL);
5715
5716         if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
5717                 device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
5718                 return (EINVAL);
5719         }
5720
5721         if (adapter->advertise == 1)
5722                 speed = IXGBE_LINK_SPEED_1GB_FULL;
5723         else if (adapter->advertise == 2)
5724                 speed = IXGBE_LINK_SPEED_100_FULL;
5725         else if (adapter->advertise == 3)
5726                 speed = IXGBE_LINK_SPEED_1GB_FULL |
5727                         IXGBE_LINK_SPEED_10GB_FULL;
5728         else {  /* bogus value */
5729                 adapter->advertise = last;
5730                 return (EINVAL);
5731         }
5732
5733         hw->mac.autotry_restart = TRUE;
5734         hw->mac.ops.setup_link(hw, speed, TRUE);
5735
5736         return (error);
5737 }
5738
5739 /*
5740 ** Thermal Shutdown Trigger
5741 **   - cause a Thermal Overtemp IRQ
5742 **   - this now requires firmware enabling
5743 */
5744 static int
5745 ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS)
5746 {
5747         int             error, fire = 0;
5748         struct adapter  *adapter = (struct adapter *) arg1;
5749         struct ixgbe_hw *hw = &adapter->hw;
5750
5751
5752         if (hw->mac.type != ixgbe_mac_X540)
5753                 return (0);
5754
5755         error = sysctl_handle_int(oidp, &fire, 0, req);
5756         if ((error) || (req->newptr == NULL))
5757                 return (error);
5758
5759         if (fire) {
5760                 u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
5761                 reg |= IXGBE_EICR_TS;
5762                 IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
5763         }
5764
5765         return (0);
5766 }
5767
5768 /*
5769 ** Enable the hardware to drop packets when the buffer is
5770 ** full. This is useful when multiqueue,so that no single
5771 ** queue being full stalls the entire RX engine. We only
5772 ** enable this when Multiqueue AND when Flow Control is 
5773 ** disabled.
5774 */
5775 static void
5776 ixgbe_enable_rx_drop(struct adapter *adapter)
5777 {
5778         struct ixgbe_hw *hw = &adapter->hw;
5779
5780         for (int i = 0; i < adapter->num_queues; i++) {
5781                 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
5782                 srrctl |= IXGBE_SRRCTL_DROP_EN;
5783                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
5784         }
5785 }
5786
5787 static void
5788 ixgbe_disable_rx_drop(struct adapter *adapter)
5789 {
5790         struct ixgbe_hw *hw = &adapter->hw;
5791
5792         for (int i = 0; i < adapter->num_queues; i++) {
5793                 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
5794                 srrctl &= ~IXGBE_SRRCTL_DROP_EN;
5795                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
5796         }
5797 }