]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/ixgbe/ixgbe.c
Copy head (r256279) to stable/10 as part of the 10.0-RELEASE cycle.
[FreeBSD/stable/10.git] / sys / dev / ixgbe / ixgbe.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2013, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38 #include "ixgbe.h"
39
40 /*********************************************************************
41  *  Set this to one to display debug statistics
42  *********************************************************************/
43 int             ixgbe_display_debug_stats = 0;
44
45 /*********************************************************************
46  *  Driver version
47  *********************************************************************/
48 char ixgbe_driver_version[] = "2.5.15";
49
50 /*********************************************************************
51  *  PCI Device ID Table
52  *
53  *  Used by probe to select devices to load on
54  *  Last field stores an index into ixgbe_strings
55  *  Last entry must be all 0s
56  *
57  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
58  *********************************************************************/
59
60 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
61 {
62         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
63         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
64         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
65         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
66         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
67         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
68         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
69         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
70         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
71         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
72         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
73         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
74         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
75         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
76         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
77         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
78         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
79         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
80         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
81         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
82         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
83         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
84         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0},
85         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
86         /* required last entry */
87         {0, 0, 0, 0, 0}
88 };
89
90 /*********************************************************************
91  *  Table of branding strings
92  *********************************************************************/
93
94 static char    *ixgbe_strings[] = {
95         "Intel(R) PRO/10GbE PCI-Express Network Driver"
96 };
97
98 /*********************************************************************
99  *  Function prototypes
100  *********************************************************************/
101 static int      ixgbe_probe(device_t);
102 static int      ixgbe_attach(device_t);
103 static int      ixgbe_detach(device_t);
104 static int      ixgbe_shutdown(device_t);
105 #ifdef IXGBE_LEGACY_TX
106 static void     ixgbe_start(struct ifnet *);
107 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
108 #else /* ! IXGBE_LEGACY_TX */
109 static int      ixgbe_mq_start(struct ifnet *, struct mbuf *);
110 static int      ixgbe_mq_start_locked(struct ifnet *, struct tx_ring *);
111 static void     ixgbe_qflush(struct ifnet *);
112 static void     ixgbe_deferred_mq_start(void *, int);
113 #endif /* IXGBE_LEGACY_TX */
114 static int      ixgbe_ioctl(struct ifnet *, u_long, caddr_t);
115 static void     ixgbe_init(void *);
116 static void     ixgbe_init_locked(struct adapter *);
117 static void     ixgbe_stop(void *);
118 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
119 static int      ixgbe_media_change(struct ifnet *);
120 static void     ixgbe_identify_hardware(struct adapter *);
121 static int      ixgbe_allocate_pci_resources(struct adapter *);
122 static void     ixgbe_get_slot_info(struct ixgbe_hw *);
123 static int      ixgbe_allocate_msix(struct adapter *);
124 static int      ixgbe_allocate_legacy(struct adapter *);
125 static int      ixgbe_allocate_queues(struct adapter *);
126 static int      ixgbe_setup_msix(struct adapter *);
127 static void     ixgbe_free_pci_resources(struct adapter *);
128 static void     ixgbe_local_timer(void *);
129 static int      ixgbe_setup_interface(device_t, struct adapter *);
130 static void     ixgbe_config_link(struct adapter *);
131
132 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
133 static int      ixgbe_setup_transmit_structures(struct adapter *);
134 static void     ixgbe_setup_transmit_ring(struct tx_ring *);
135 static void     ixgbe_initialize_transmit_units(struct adapter *);
136 static void     ixgbe_free_transmit_structures(struct adapter *);
137 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
138
139 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
140 static int      ixgbe_setup_receive_structures(struct adapter *);
141 static int      ixgbe_setup_receive_ring(struct rx_ring *);
142 static void     ixgbe_initialize_receive_units(struct adapter *);
143 static void     ixgbe_free_receive_structures(struct adapter *);
144 static void     ixgbe_free_receive_buffers(struct rx_ring *);
145 static void     ixgbe_setup_hw_rsc(struct rx_ring *);
146
147 static void     ixgbe_enable_intr(struct adapter *);
148 static void     ixgbe_disable_intr(struct adapter *);
149 static void     ixgbe_update_stats_counters(struct adapter *);
150 static void     ixgbe_txeof(struct tx_ring *);
151 static bool     ixgbe_rxeof(struct ix_queue *);
152 static void     ixgbe_rx_checksum(u32, struct mbuf *, u32);
153 static void     ixgbe_set_promisc(struct adapter *);
154 static void     ixgbe_set_multi(struct adapter *);
155 static void     ixgbe_update_link_status(struct adapter *);
156 static void     ixgbe_refresh_mbufs(struct rx_ring *, int);
157 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
158 static int      ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS);
159 static int      ixgbe_set_advertise(SYSCTL_HANDLER_ARGS);
160 static int      ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS);
161 static int      ixgbe_dma_malloc(struct adapter *, bus_size_t,
162                     struct ixgbe_dma_alloc *, int);
163 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
164 static int      ixgbe_tx_ctx_setup(struct tx_ring *,
165                     struct mbuf *, u32 *, u32 *);
166 static int      ixgbe_tso_setup(struct tx_ring *,
167                     struct mbuf *, u32 *, u32 *);
168 static void     ixgbe_set_ivar(struct adapter *, u8, u8, s8);
169 static void     ixgbe_configure_ivars(struct adapter *);
170 static u8 *     ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
171
172 static void     ixgbe_setup_vlan_hw_support(struct adapter *);
173 static void     ixgbe_register_vlan(void *, struct ifnet *, u16);
174 static void     ixgbe_unregister_vlan(void *, struct ifnet *, u16);
175
176 static void     ixgbe_add_hw_stats(struct adapter *adapter);
177
178 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
179 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
180                     struct mbuf *, u32);
181
182 static void     ixgbe_enable_rx_drop(struct adapter *);
183 static void     ixgbe_disable_rx_drop(struct adapter *);
184
185 /* Support for pluggable optic modules */
186 static bool     ixgbe_sfp_probe(struct adapter *);
187 static void     ixgbe_setup_optics(struct adapter *);
188
189 /* Legacy (single vector interrupt handler */
190 static void     ixgbe_legacy_irq(void *);
191
192 /* The MSI/X Interrupt handlers */
193 static void     ixgbe_msix_que(void *);
194 static void     ixgbe_msix_link(void *);
195
196 /* Deferred interrupt tasklets */
197 static void     ixgbe_handle_que(void *, int);
198 static void     ixgbe_handle_link(void *, int);
199 static void     ixgbe_handle_msf(void *, int);
200 static void     ixgbe_handle_mod(void *, int);
201
202 #ifdef IXGBE_FDIR
203 static void     ixgbe_atr(struct tx_ring *, struct mbuf *);
204 static void     ixgbe_reinit_fdir(void *, int);
205 #endif
206
207 /* Missing shared code prototype */
208 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw);
209
210 /*********************************************************************
211  *  FreeBSD Device Interface Entry Points
212  *********************************************************************/
213
214 static device_method_t ixgbe_methods[] = {
215         /* Device interface */
216         DEVMETHOD(device_probe, ixgbe_probe),
217         DEVMETHOD(device_attach, ixgbe_attach),
218         DEVMETHOD(device_detach, ixgbe_detach),
219         DEVMETHOD(device_shutdown, ixgbe_shutdown),
220         DEVMETHOD_END
221 };
222
223 static driver_t ixgbe_driver = {
224         "ix", ixgbe_methods, sizeof(struct adapter),
225 };
226
227 devclass_t ixgbe_devclass;
228 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
229
230 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
231 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
232
233 /*
234 ** TUNEABLE PARAMETERS:
235 */
236
237 static SYSCTL_NODE(_hw, OID_AUTO, ix, CTLFLAG_RD, 0,
238                    "IXGBE driver parameters");
239
240 /*
241 ** AIM: Adaptive Interrupt Moderation
242 ** which means that the interrupt rate
243 ** is varied over time based on the
244 ** traffic for that interrupt vector
245 */
246 static int ixgbe_enable_aim = TRUE;
247 TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
248 SYSCTL_INT(_hw_ix, OID_AUTO, enable_aim, CTLFLAG_RW, &ixgbe_enable_aim, 0,
249     "Enable adaptive interrupt moderation");
250
251 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
252 TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
253 SYSCTL_INT(_hw_ix, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
254     &ixgbe_max_interrupt_rate, 0, "Maximum interrupts per second");
255
256 /* How many packets rxeof tries to clean at a time */
257 static int ixgbe_rx_process_limit = 256;
258 TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
259 SYSCTL_INT(_hw_ix, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
260     &ixgbe_rx_process_limit, 0,
261     "Maximum number of received packets to process at a time,"
262     "-1 means unlimited");
263
264 /* How many packets txeof tries to clean at a time */
265 static int ixgbe_tx_process_limit = 256;
266 TUNABLE_INT("hw.ixgbe.tx_process_limit", &ixgbe_tx_process_limit);
267 SYSCTL_INT(_hw_ix, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
268     &ixgbe_tx_process_limit, 0,
269     "Maximum number of sent packets to process at a time,"
270     "-1 means unlimited");
271
272 /*
273 ** Smart speed setting, default to on
274 ** this only works as a compile option
275 ** right now as its during attach, set
276 ** this to 'ixgbe_smart_speed_off' to
277 ** disable.
278 */
279 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
280
281 /*
282  * MSIX should be the default for best performance,
283  * but this allows it to be forced off for testing.
284  */
285 static int ixgbe_enable_msix = 1;
286 TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
287 SYSCTL_INT(_hw_ix, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixgbe_enable_msix, 0,
288     "Enable MSI-X interrupts");
289
290 /*
291  * Number of Queues, can be set to 0,
292  * it then autoconfigures based on the
293  * number of cpus with a max of 8. This
294  * can be overriden manually here.
295  */
296 static int ixgbe_num_queues = 0;
297 TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
298 SYSCTL_INT(_hw_ix, OID_AUTO, num_queues, CTLFLAG_RDTUN, &ixgbe_num_queues, 0,
299     "Number of queues to configure, 0 indicates autoconfigure");
300
301 /*
302 ** Number of TX descriptors per ring,
303 ** setting higher than RX as this seems
304 ** the better performing choice.
305 */
306 static int ixgbe_txd = PERFORM_TXD;
307 TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd);
308 SYSCTL_INT(_hw_ix, OID_AUTO, txd, CTLFLAG_RDTUN, &ixgbe_txd, 0,
309     "Number of receive descriptors per queue");
310
311 /* Number of RX descriptors per ring */
312 static int ixgbe_rxd = PERFORM_RXD;
313 TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd);
314 SYSCTL_INT(_hw_ix, OID_AUTO, rxd, CTLFLAG_RDTUN, &ixgbe_rxd, 0,
315     "Number of receive descriptors per queue");
316
317 /*
318 ** Defining this on will allow the use
319 ** of unsupported SFP+ modules, note that
320 ** doing so you are on your own :)
321 */
322 static int allow_unsupported_sfp = FALSE;
323 TUNABLE_INT("hw.ixgbe.unsupported_sfp", &allow_unsupported_sfp);
324
325 /*
326 ** HW RSC control: 
327 **  this feature only works with
328 **  IPv4, and only on 82599 and later.
329 **  Also this will cause IP forwarding to
330 **  fail and that can't be controlled by
331 **  the stack as LRO can. For all these
332 **  reasons I've deemed it best to leave
333 **  this off and not bother with a tuneable
334 **  interface, this would need to be compiled
335 **  to enable.
336 */
337 static bool ixgbe_rsc_enable = FALSE;
338
339 /* Keep running tab on them for sanity check */
340 static int ixgbe_total_ports;
341
342 #ifdef IXGBE_FDIR
343 /*
344 ** For Flow Director: this is the
345 ** number of TX packets we sample
346 ** for the filter pool, this means
347 ** every 20th packet will be probed.
348 **
349 ** This feature can be disabled by 
350 ** setting this to 0.
351 */
352 static int atr_sample_rate = 20;
353 /* 
354 ** Flow Director actually 'steals'
355 ** part of the packet buffer as its
356 ** filter pool, this variable controls
357 ** how much it uses:
358 **  0 = 64K, 1 = 128K, 2 = 256K
359 */
360 static int fdir_pballoc = 1;
361 #endif
362
363 #ifdef DEV_NETMAP
364 /*
365  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
366  * be a reference on how to implement netmap support in a driver.
367  * Additional comments are in ixgbe_netmap.h .
368  *
369  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
370  * that extend the standard driver.
371  */
372 #include <dev/netmap/ixgbe_netmap.h>
373 #endif /* DEV_NETMAP */
374
375 /*********************************************************************
376  *  Device identification routine
377  *
378  *  ixgbe_probe determines if the driver should be loaded on
379  *  adapter based on PCI vendor/device id of the adapter.
380  *
381  *  return BUS_PROBE_DEFAULT on success, positive on failure
382  *********************************************************************/
383
384 static int
385 ixgbe_probe(device_t dev)
386 {
387         ixgbe_vendor_info_t *ent;
388
389         u16     pci_vendor_id = 0;
390         u16     pci_device_id = 0;
391         u16     pci_subvendor_id = 0;
392         u16     pci_subdevice_id = 0;
393         char    adapter_name[256];
394
395         INIT_DEBUGOUT("ixgbe_probe: begin");
396
397         pci_vendor_id = pci_get_vendor(dev);
398         if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID)
399                 return (ENXIO);
400
401         pci_device_id = pci_get_device(dev);
402         pci_subvendor_id = pci_get_subvendor(dev);
403         pci_subdevice_id = pci_get_subdevice(dev);
404
405         ent = ixgbe_vendor_info_array;
406         while (ent->vendor_id != 0) {
407                 if ((pci_vendor_id == ent->vendor_id) &&
408                     (pci_device_id == ent->device_id) &&
409
410                     ((pci_subvendor_id == ent->subvendor_id) ||
411                      (ent->subvendor_id == 0)) &&
412
413                     ((pci_subdevice_id == ent->subdevice_id) ||
414                      (ent->subdevice_id == 0))) {
415                         sprintf(adapter_name, "%s, Version - %s",
416                                 ixgbe_strings[ent->index],
417                                 ixgbe_driver_version);
418                         device_set_desc_copy(dev, adapter_name);
419                         ++ixgbe_total_ports;
420                         return (BUS_PROBE_DEFAULT);
421                 }
422                 ent++;
423         }
424         return (ENXIO);
425 }
426
427 /*********************************************************************
428  *  Device initialization routine
429  *
430  *  The attach entry point is called when the driver is being loaded.
431  *  This routine identifies the type of hardware, allocates all resources
432  *  and initializes the hardware.
433  *
434  *  return 0 on success, positive on failure
435  *********************************************************************/
436
437 static int
438 ixgbe_attach(device_t dev)
439 {
440         struct adapter *adapter;
441         struct ixgbe_hw *hw;
442         int             error = 0;
443         u16             csum;
444         u32             ctrl_ext;
445
446         INIT_DEBUGOUT("ixgbe_attach: begin");
447
448         /* Allocate, clear, and link in our adapter structure */
449         adapter = device_get_softc(dev);
450         adapter->dev = adapter->osdep.dev = dev;
451         hw = &adapter->hw;
452
453         /* Core Lock Init*/
454         IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
455
456         /* SYSCTL APIs */
457
458         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
459                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
460                         OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW,
461                         adapter, 0, ixgbe_set_flowcntl, "I", "Flow Control");
462
463         SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
464                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
465                         OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
466                         &ixgbe_enable_aim, 1, "Interrupt Moderation");
467
468         /*
469         ** Allow a kind of speed control by forcing the autoneg
470         ** advertised speed list to only a certain value, this
471         ** supports 1G on 82599 devices, and 100Mb on x540.
472         */
473         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
474                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
475                         OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW,
476                         adapter, 0, ixgbe_set_advertise, "I", "Link Speed");
477
478         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
479                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
480                         OID_AUTO, "ts", CTLTYPE_INT | CTLFLAG_RW, adapter,
481                         0, ixgbe_set_thermal_test, "I", "Thermal Test");
482
483         /* Set up the timer callout */
484         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
485
486         /* Determine hardware revision */
487         ixgbe_identify_hardware(adapter);
488
489         /* Do base PCI setup - map BAR0 */
490         if (ixgbe_allocate_pci_resources(adapter)) {
491                 device_printf(dev, "Allocation of PCI resources failed\n");
492                 error = ENXIO;
493                 goto err_out;
494         }
495
496         /* Do descriptor calc and sanity checks */
497         if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
498             ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
499                 device_printf(dev, "TXD config issue, using default!\n");
500                 adapter->num_tx_desc = DEFAULT_TXD;
501         } else
502                 adapter->num_tx_desc = ixgbe_txd;
503
504         /*
505         ** With many RX rings it is easy to exceed the
506         ** system mbuf allocation. Tuning nmbclusters
507         ** can alleviate this.
508         */
509         if (nmbclusters > 0 ) {
510                 int s;
511                 s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
512                 if (s > nmbclusters) {
513                         device_printf(dev, "RX Descriptors exceed "
514                             "system mbuf max, using default instead!\n");
515                         ixgbe_rxd = DEFAULT_RXD;
516                 }
517         }
518
519         if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
520             ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) {
521                 device_printf(dev, "RXD config issue, using default!\n");
522                 adapter->num_rx_desc = DEFAULT_RXD;
523         } else
524                 adapter->num_rx_desc = ixgbe_rxd;
525
526         /* Allocate our TX/RX Queues */
527         if (ixgbe_allocate_queues(adapter)) {
528                 error = ENOMEM;
529                 goto err_out;
530         }
531
532         /* Allocate multicast array memory. */
533         adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
534             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
535         if (adapter->mta == NULL) {
536                 device_printf(dev, "Can not allocate multicast setup array\n");
537                 error = ENOMEM;
538                 goto err_late;
539         }
540
541         /* Initialize the shared code */
542         hw->allow_unsupported_sfp = allow_unsupported_sfp;
543         error = ixgbe_init_shared_code(hw);
544         if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
545                 /*
546                 ** No optics in this port, set up
547                 ** so the timer routine will probe 
548                 ** for later insertion.
549                 */
550                 adapter->sfp_probe = TRUE;
551                 error = 0;
552         } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
553                 device_printf(dev,"Unsupported SFP+ module detected!\n");
554                 error = EIO;
555                 goto err_late;
556         } else if (error) {
557                 device_printf(dev,"Unable to initialize the shared code\n");
558                 error = EIO;
559                 goto err_late;
560         }
561
562         /* Make sure we have a good EEPROM before we read from it */
563         if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
564                 device_printf(dev,"The EEPROM Checksum Is Not Valid\n");
565                 error = EIO;
566                 goto err_late;
567         }
568
569         error = ixgbe_init_hw(hw);
570         switch (error) {
571         case IXGBE_ERR_EEPROM_VERSION:
572                 device_printf(dev, "This device is a pre-production adapter/"
573                     "LOM.  Please be aware there may be issues associated "
574                     "with your hardware.\n If you are experiencing problems "
575                     "please contact your Intel or hardware representative "
576                     "who provided you with this hardware.\n");
577                 break;
578         case IXGBE_ERR_SFP_NOT_SUPPORTED:
579                 device_printf(dev,"Unsupported SFP+ Module\n");
580                 error = EIO;
581                 goto err_late;
582         case IXGBE_ERR_SFP_NOT_PRESENT:
583                 device_printf(dev,"No SFP+ Module found\n");
584                 /* falls thru */
585         default:
586                 break;
587         }
588
589         /* Detect and set physical type */
590         ixgbe_setup_optics(adapter);
591
592         if ((adapter->msix > 1) && (ixgbe_enable_msix))
593                 error = ixgbe_allocate_msix(adapter); 
594         else
595                 error = ixgbe_allocate_legacy(adapter); 
596         if (error) 
597                 goto err_late;
598
599         /* Setup OS specific network interface */
600         if (ixgbe_setup_interface(dev, adapter) != 0)
601                 goto err_late;
602
603         /* Initialize statistics */
604         ixgbe_update_stats_counters(adapter);
605
606         /* Register for VLAN events */
607         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
608             ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
609         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
610             ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
611
612         /*
613         ** Check PCIE slot type/speed/width
614         */
615         ixgbe_get_slot_info(hw);
616
617         /* Set an initial default flow control value */
618         adapter->fc =  ixgbe_fc_full;
619
620         /* let hardware know driver is loaded */
621         ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
622         ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
623         IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
624
625         ixgbe_add_hw_stats(adapter);
626
627 #ifdef DEV_NETMAP
628         ixgbe_netmap_attach(adapter);
629 #endif /* DEV_NETMAP */
630         INIT_DEBUGOUT("ixgbe_attach: end");
631         return (0);
632 err_late:
633         ixgbe_free_transmit_structures(adapter);
634         ixgbe_free_receive_structures(adapter);
635 err_out:
636         if (adapter->ifp != NULL)
637                 if_free(adapter->ifp);
638         ixgbe_free_pci_resources(adapter);
639         free(adapter->mta, M_DEVBUF);
640         return (error);
641
642 }
643
644 /*********************************************************************
645  *  Device removal routine
646  *
647  *  The detach entry point is called when the driver is being removed.
648  *  This routine stops the adapter and deallocates all the resources
649  *  that were allocated for driver operation.
650  *
651  *  return 0 on success, positive on failure
652  *********************************************************************/
653
654 static int
655 ixgbe_detach(device_t dev)
656 {
657         struct adapter *adapter = device_get_softc(dev);
658         struct ix_queue *que = adapter->queues;
659         struct tx_ring *txr = adapter->tx_rings;
660         u32     ctrl_ext;
661
662         INIT_DEBUGOUT("ixgbe_detach: begin");
663
664         /* Make sure VLANS are not using driver */
665         if (adapter->ifp->if_vlantrunk != NULL) {
666                 device_printf(dev,"Vlan in use, detach first\n");
667                 return (EBUSY);
668         }
669
670         IXGBE_CORE_LOCK(adapter);
671         ixgbe_stop(adapter);
672         IXGBE_CORE_UNLOCK(adapter);
673
674         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
675                 if (que->tq) {
676 #ifndef IXGBE_LEGACY_TX
677                         taskqueue_drain(que->tq, &txr->txq_task);
678 #endif
679                         taskqueue_drain(que->tq, &que->que_task);
680                         taskqueue_free(que->tq);
681                 }
682         }
683
684         /* Drain the Link queue */
685         if (adapter->tq) {
686                 taskqueue_drain(adapter->tq, &adapter->link_task);
687                 taskqueue_drain(adapter->tq, &adapter->mod_task);
688                 taskqueue_drain(adapter->tq, &adapter->msf_task);
689 #ifdef IXGBE_FDIR
690                 taskqueue_drain(adapter->tq, &adapter->fdir_task);
691 #endif
692                 taskqueue_free(adapter->tq);
693         }
694
695         /* let hardware know driver is unloading */
696         ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
697         ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
698         IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
699
700         /* Unregister VLAN events */
701         if (adapter->vlan_attach != NULL)
702                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
703         if (adapter->vlan_detach != NULL)
704                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
705
706         ether_ifdetach(adapter->ifp);
707         callout_drain(&adapter->timer);
708 #ifdef DEV_NETMAP
709         netmap_detach(adapter->ifp);
710 #endif /* DEV_NETMAP */
711         ixgbe_free_pci_resources(adapter);
712         bus_generic_detach(dev);
713         if_free(adapter->ifp);
714
715         ixgbe_free_transmit_structures(adapter);
716         ixgbe_free_receive_structures(adapter);
717         free(adapter->mta, M_DEVBUF);
718
719         IXGBE_CORE_LOCK_DESTROY(adapter);
720         return (0);
721 }
722
723 /*********************************************************************
724  *
725  *  Shutdown entry point
726  *
727  **********************************************************************/
728
729 static int
730 ixgbe_shutdown(device_t dev)
731 {
732         struct adapter *adapter = device_get_softc(dev);
733         IXGBE_CORE_LOCK(adapter);
734         ixgbe_stop(adapter);
735         IXGBE_CORE_UNLOCK(adapter);
736         return (0);
737 }
738
739
740 #ifdef IXGBE_LEGACY_TX
741 /*********************************************************************
742  *  Transmit entry point
743  *
744  *  ixgbe_start is called by the stack to initiate a transmit.
745  *  The driver will remain in this routine as long as there are
746  *  packets to transmit and transmit resources are available.
747  *  In case resources are not available stack is notified and
748  *  the packet is requeued.
749  **********************************************************************/
750
751 static void
752 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
753 {
754         struct mbuf    *m_head;
755         struct adapter *adapter = txr->adapter;
756
757         IXGBE_TX_LOCK_ASSERT(txr);
758
759         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
760                 return;
761         if (!adapter->link_active)
762                 return;
763
764         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
765                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
766                         break;
767
768                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
769                 if (m_head == NULL)
770                         break;
771
772                 if (ixgbe_xmit(txr, &m_head)) {
773                         if (m_head != NULL)
774                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
775                         break;
776                 }
777                 /* Send a copy of the frame to the BPF listener */
778                 ETHER_BPF_MTAP(ifp, m_head);
779
780                 /* Set watchdog on */
781                 txr->watchdog_time = ticks;
782                 txr->queue_status = IXGBE_QUEUE_WORKING;
783
784         }
785         return;
786 }
787
788 /*
789  * Legacy TX start - called by the stack, this
790  * always uses the first tx ring, and should
791  * not be used with multiqueue tx enabled.
792  */
793 static void
794 ixgbe_start(struct ifnet *ifp)
795 {
796         struct adapter *adapter = ifp->if_softc;
797         struct tx_ring  *txr = adapter->tx_rings;
798
799         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
800                 IXGBE_TX_LOCK(txr);
801                 ixgbe_start_locked(txr, ifp);
802                 IXGBE_TX_UNLOCK(txr);
803         }
804         return;
805 }
806
807 #else /* ! IXGBE_LEGACY_TX */
808
809 /*
810 ** Multiqueue Transmit driver
811 **
812 */
813 static int
814 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
815 {
816         struct adapter  *adapter = ifp->if_softc;
817         struct ix_queue *que;
818         struct tx_ring  *txr;
819         int             i, err = 0;
820
821         /* Which queue to use */
822         if ((m->m_flags & M_FLOWID) != 0)
823                 i = m->m_pkthdr.flowid % adapter->num_queues;
824         else
825                 i = curcpu % adapter->num_queues;
826
827         txr = &adapter->tx_rings[i];
828         que = &adapter->queues[i];
829
830         err = drbr_enqueue(ifp, txr->br, m);
831         if (err)
832                 return (err);
833         if (IXGBE_TX_TRYLOCK(txr)) {
834                 err = ixgbe_mq_start_locked(ifp, txr);
835                 IXGBE_TX_UNLOCK(txr);
836         } else
837                 taskqueue_enqueue(que->tq, &txr->txq_task);
838
839         return (err);
840 }
841
842 static int
843 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
844 {
845         struct adapter  *adapter = txr->adapter;
846         struct mbuf     *next;
847         int             enqueued = 0, err = 0;
848
849         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
850             adapter->link_active == 0)
851                 return (ENETDOWN);
852
853         /* Process the queue */
854 #if __FreeBSD_version < 901504
855         next = drbr_dequeue(ifp, txr->br);
856         while (next != NULL) {
857                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
858                         if (next != NULL)
859                                 err = drbr_enqueue(ifp, txr->br, next);
860 #else
861         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
862                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
863                         if (next == NULL) {
864                                 drbr_advance(ifp, txr->br);
865                         } else {
866                                 drbr_putback(ifp, txr->br, next);
867                         }
868 #endif
869                         break;
870                 }
871 #if __FreeBSD_version >= 901504
872                 drbr_advance(ifp, txr->br);
873 #endif
874                 enqueued++;
875                 /* Send a copy of the frame to the BPF listener */
876                 ETHER_BPF_MTAP(ifp, next);
877                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
878                         break;
879 #if __FreeBSD_version < 901504
880                 next = drbr_dequeue(ifp, txr->br);
881 #endif
882         }
883
884         if (enqueued > 0) {
885                 /* Set watchdog on */
886                 txr->queue_status = IXGBE_QUEUE_WORKING;
887                 txr->watchdog_time = ticks;
888         }
889
890         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
891                 ixgbe_txeof(txr);
892
893         return (err);
894 }
895
896 /*
897  * Called from a taskqueue to drain queued transmit packets.
898  */
899 static void
900 ixgbe_deferred_mq_start(void *arg, int pending)
901 {
902         struct tx_ring *txr = arg;
903         struct adapter *adapter = txr->adapter;
904         struct ifnet *ifp = adapter->ifp;
905
906         IXGBE_TX_LOCK(txr);
907         if (!drbr_empty(ifp, txr->br))
908                 ixgbe_mq_start_locked(ifp, txr);
909         IXGBE_TX_UNLOCK(txr);
910 }
911
912 /*
913 ** Flush all ring buffers
914 */
915 static void
916 ixgbe_qflush(struct ifnet *ifp)
917 {
918         struct adapter  *adapter = ifp->if_softc;
919         struct tx_ring  *txr = adapter->tx_rings;
920         struct mbuf     *m;
921
922         for (int i = 0; i < adapter->num_queues; i++, txr++) {
923                 IXGBE_TX_LOCK(txr);
924                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
925                         m_freem(m);
926                 IXGBE_TX_UNLOCK(txr);
927         }
928         if_qflush(ifp);
929 }
930 #endif /* IXGBE_LEGACY_TX */
931
932 /*********************************************************************
933  *  Ioctl entry point
934  *
935  *  ixgbe_ioctl is called when the user wants to configure the
936  *  interface.
937  *
938  *  return 0 on success, positive on failure
939  **********************************************************************/
940
941 static int
942 ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data)
943 {
944         struct adapter  *adapter = ifp->if_softc;
945         struct ixgbe_hw *hw = &adapter->hw;
946         struct ifreq    *ifr = (struct ifreq *) data;
947 #if defined(INET) || defined(INET6)
948         struct ifaddr *ifa = (struct ifaddr *)data;
949         bool            avoid_reset = FALSE;
950 #endif
951         int             error = 0;
952
953         switch (command) {
954
955         case SIOCSIFADDR:
956 #ifdef INET
957                 if (ifa->ifa_addr->sa_family == AF_INET)
958                         avoid_reset = TRUE;
959 #endif
960 #ifdef INET6
961                 if (ifa->ifa_addr->sa_family == AF_INET6)
962                         avoid_reset = TRUE;
963 #endif
964 #if defined(INET) || defined(INET6)
965                 /*
966                 ** Calling init results in link renegotiation,
967                 ** so we avoid doing it when possible.
968                 */
969                 if (avoid_reset) {
970                         ifp->if_flags |= IFF_UP;
971                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
972                                 ixgbe_init(adapter);
973                         if (!(ifp->if_flags & IFF_NOARP))
974                                 arp_ifinit(ifp, ifa);
975                 } else
976                         error = ether_ioctl(ifp, command, data);
977 #endif
978                 break;
979         case SIOCSIFMTU:
980                 IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
981                 if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
982                         error = EINVAL;
983                 } else {
984                         IXGBE_CORE_LOCK(adapter);
985                         ifp->if_mtu = ifr->ifr_mtu;
986                         adapter->max_frame_size =
987                                 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
988                         ixgbe_init_locked(adapter);
989                         IXGBE_CORE_UNLOCK(adapter);
990                 }
991                 break;
992         case SIOCSIFFLAGS:
993                 IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
994                 IXGBE_CORE_LOCK(adapter);
995                 if (ifp->if_flags & IFF_UP) {
996                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
997                                 if ((ifp->if_flags ^ adapter->if_flags) &
998                                     (IFF_PROMISC | IFF_ALLMULTI)) {
999                                         ixgbe_set_promisc(adapter);
1000                                 }
1001                         } else
1002                                 ixgbe_init_locked(adapter);
1003                 } else
1004                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1005                                 ixgbe_stop(adapter);
1006                 adapter->if_flags = ifp->if_flags;
1007                 IXGBE_CORE_UNLOCK(adapter);
1008                 break;
1009         case SIOCADDMULTI:
1010         case SIOCDELMULTI:
1011                 IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
1012                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1013                         IXGBE_CORE_LOCK(adapter);
1014                         ixgbe_disable_intr(adapter);
1015                         ixgbe_set_multi(adapter);
1016                         ixgbe_enable_intr(adapter);
1017                         IXGBE_CORE_UNLOCK(adapter);
1018                 }
1019                 break;
1020         case SIOCSIFMEDIA:
1021         case SIOCGIFMEDIA:
1022                 IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
1023                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1024                 break;
1025         case SIOCSIFCAP:
1026         {
1027                 int mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1028                 IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
1029                 if (mask & IFCAP_HWCSUM)
1030                         ifp->if_capenable ^= IFCAP_HWCSUM;
1031                 if (mask & IFCAP_TSO4)
1032                         ifp->if_capenable ^= IFCAP_TSO4;
1033                 if (mask & IFCAP_TSO6)
1034                         ifp->if_capenable ^= IFCAP_TSO6;
1035                 if (mask & IFCAP_LRO)
1036                         ifp->if_capenable ^= IFCAP_LRO;
1037                 if (mask & IFCAP_VLAN_HWTAGGING)
1038                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1039                 if (mask & IFCAP_VLAN_HWFILTER)
1040                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1041                 if (mask & IFCAP_VLAN_HWTSO)
1042                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1043                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1044                         IXGBE_CORE_LOCK(adapter);
1045                         ixgbe_init_locked(adapter);
1046                         IXGBE_CORE_UNLOCK(adapter);
1047                 }
1048                 VLAN_CAPABILITIES(ifp);
1049                 break;
1050         }
1051         case SIOCGI2C:
1052         {
1053                 struct ixgbe_i2c_req    i2c;
1054                 IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)");
1055                 error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
1056                 if (error)
1057                         break;
1058                 if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
1059                         error = EINVAL;
1060                         break;
1061                 }
1062                 hw->phy.ops.read_i2c_byte(hw, i2c.offset,
1063                     i2c.dev_addr, i2c.data);
1064                 error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
1065                 break;
1066         }
1067         default:
1068                 IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
1069                 error = ether_ioctl(ifp, command, data);
1070                 break;
1071         }
1072
1073         return (error);
1074 }
1075
1076 /*********************************************************************
1077  *  Init entry point
1078  *
1079  *  This routine is used in two ways. It is used by the stack as
1080  *  init entry point in network interface structure. It is also used
1081  *  by the driver as a hw/sw initialization routine to get to a
1082  *  consistent state.
1083  *
1084  *  return 0 on success, positive on failure
1085  **********************************************************************/
1086 #define IXGBE_MHADD_MFS_SHIFT 16
1087
1088 static void
1089 ixgbe_init_locked(struct adapter *adapter)
1090 {
1091         struct ifnet   *ifp = adapter->ifp;
1092         device_t        dev = adapter->dev;
1093         struct ixgbe_hw *hw = &adapter->hw;
1094         u32             k, txdctl, mhadd, gpie;
1095         u32             rxdctl, rxctrl;
1096
1097         mtx_assert(&adapter->core_mtx, MA_OWNED);
1098         INIT_DEBUGOUT("ixgbe_init_locked: begin");
1099         hw->adapter_stopped = FALSE;
1100         ixgbe_stop_adapter(hw);
1101         callout_stop(&adapter->timer);
1102
1103         /* reprogram the RAR[0] in case user changed it. */
1104         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
1105
1106         /* Get the latest mac address, User can use a LAA */
1107         bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr,
1108               IXGBE_ETH_LENGTH_OF_ADDRESS);
1109         ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
1110         hw->addr_ctrl.rar_used_count = 1;
1111
1112         /* Set the various hardware offload abilities */
1113         ifp->if_hwassist = 0;
1114         if (ifp->if_capenable & IFCAP_TSO)
1115                 ifp->if_hwassist |= CSUM_TSO;
1116         if (ifp->if_capenable & IFCAP_TXCSUM) {
1117                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1118 #if __FreeBSD_version >= 800000
1119                 if (hw->mac.type != ixgbe_mac_82598EB)
1120                         ifp->if_hwassist |= CSUM_SCTP;
1121 #endif
1122         }
1123
1124         /* Prepare transmit descriptors and buffers */
1125         if (ixgbe_setup_transmit_structures(adapter)) {
1126                 device_printf(dev,"Could not setup transmit structures\n");
1127                 ixgbe_stop(adapter);
1128                 return;
1129         }
1130
1131         ixgbe_init_hw(hw);
1132         ixgbe_initialize_transmit_units(adapter);
1133
1134         /* Setup Multicast table */
1135         ixgbe_set_multi(adapter);
1136
1137         /*
1138         ** Determine the correct mbuf pool
1139         ** for doing jumbo frames
1140         */
1141         if (adapter->max_frame_size <= 2048)
1142                 adapter->rx_mbuf_sz = MCLBYTES;
1143         else if (adapter->max_frame_size <= 4096)
1144                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1145         else if (adapter->max_frame_size <= 9216)
1146                 adapter->rx_mbuf_sz = MJUM9BYTES;
1147         else
1148                 adapter->rx_mbuf_sz = MJUM16BYTES;
1149
1150         /* Prepare receive descriptors and buffers */
1151         if (ixgbe_setup_receive_structures(adapter)) {
1152                 device_printf(dev,"Could not setup receive structures\n");
1153                 ixgbe_stop(adapter);
1154                 return;
1155         }
1156
1157         /* Configure RX settings */
1158         ixgbe_initialize_receive_units(adapter);
1159
1160         gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
1161
1162         /* Enable Fan Failure Interrupt */
1163         gpie |= IXGBE_SDP1_GPIEN;
1164
1165         /* Add for Module detection */
1166         if (hw->mac.type == ixgbe_mac_82599EB)
1167                 gpie |= IXGBE_SDP2_GPIEN;
1168
1169         /* Thermal Failure Detection */
1170         if (hw->mac.type == ixgbe_mac_X540)
1171                 gpie |= IXGBE_SDP0_GPIEN;
1172
1173         if (adapter->msix > 1) {
1174                 /* Enable Enhanced MSIX mode */
1175                 gpie |= IXGBE_GPIE_MSIX_MODE;
1176                 gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
1177                     IXGBE_GPIE_OCD;
1178         }
1179         IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
1180
1181         /* Set MTU size */
1182         if (ifp->if_mtu > ETHERMTU) {
1183                 mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
1184                 mhadd &= ~IXGBE_MHADD_MFS_MASK;
1185                 mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
1186                 IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
1187         }
1188         
1189         /* Now enable all the queues */
1190
1191         for (int i = 0; i < adapter->num_queues; i++) {
1192                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
1193                 txdctl |= IXGBE_TXDCTL_ENABLE;
1194                 /* Set WTHRESH to 8, burst writeback */
1195                 txdctl |= (8 << 16);
1196                 /*
1197                  * When the internal queue falls below PTHRESH (32),
1198                  * start prefetching as long as there are at least
1199                  * HTHRESH (1) buffers ready. The values are taken
1200                  * from the Intel linux driver 3.8.21.
1201                  * Prefetching enables tx line rate even with 1 queue.
1202                  */
1203                 txdctl |= (32 << 0) | (1 << 8);
1204                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
1205         }
1206
1207         for (int i = 0; i < adapter->num_queues; i++) {
1208                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
1209                 if (hw->mac.type == ixgbe_mac_82598EB) {
1210                         /*
1211                         ** PTHRESH = 21
1212                         ** HTHRESH = 4
1213                         ** WTHRESH = 8
1214                         */
1215                         rxdctl &= ~0x3FFFFF;
1216                         rxdctl |= 0x080420;
1217                 }
1218                 rxdctl |= IXGBE_RXDCTL_ENABLE;
1219                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
1220                 for (k = 0; k < 10; k++) {
1221                         if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
1222                             IXGBE_RXDCTL_ENABLE)
1223                                 break;
1224                         else
1225                                 msec_delay(1);
1226                 }
1227                 wmb();
1228 #ifdef DEV_NETMAP
1229                 /*
1230                  * In netmap mode, we must preserve the buffers made
1231                  * available to userspace before the if_init()
1232                  * (this is true by default on the TX side, because
1233                  * init makes all buffers available to userspace).
1234                  *
1235                  * netmap_reset() and the device specific routines
1236                  * (e.g. ixgbe_setup_receive_rings()) map these
1237                  * buffers at the end of the NIC ring, so here we
1238                  * must set the RDT (tail) register to make sure
1239                  * they are not overwritten.
1240                  *
1241                  * In this driver the NIC ring starts at RDH = 0,
1242                  * RDT points to the last slot available for reception (?),
1243                  * so RDT = num_rx_desc - 1 means the whole ring is available.
1244                  */
1245                 if (ifp->if_capenable & IFCAP_NETMAP) {
1246                         struct netmap_adapter *na = NA(adapter->ifp);
1247                         struct netmap_kring *kring = &na->rx_rings[i];
1248                         int t = na->num_rx_desc - 1 - kring->nr_hwavail;
1249
1250                         IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
1251                 } else
1252 #endif /* DEV_NETMAP */
1253                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
1254         }
1255
1256         /* Set up VLAN support and filter */
1257         ixgbe_setup_vlan_hw_support(adapter);
1258
1259         /* Enable Receive engine */
1260         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1261         if (hw->mac.type == ixgbe_mac_82598EB)
1262                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
1263         rxctrl |= IXGBE_RXCTRL_RXEN;
1264         ixgbe_enable_rx_dma(hw, rxctrl);
1265
1266         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
1267
1268         /* Set up MSI/X routing */
1269         if (ixgbe_enable_msix)  {
1270                 ixgbe_configure_ivars(adapter);
1271                 /* Set up auto-mask */
1272                 if (hw->mac.type == ixgbe_mac_82598EB)
1273                         IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1274                 else {
1275                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1276                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1277                 }
1278         } else {  /* Simple settings for Legacy/MSI */
1279                 ixgbe_set_ivar(adapter, 0, 0, 0);
1280                 ixgbe_set_ivar(adapter, 0, 0, 1);
1281                 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1282         }
1283
1284 #ifdef IXGBE_FDIR
1285         /* Init Flow director */
1286         if (hw->mac.type != ixgbe_mac_82598EB) {
1287                 u32 hdrm = 32 << fdir_pballoc;
1288
1289                 hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
1290                 ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
1291         }
1292 #endif
1293
1294         /*
1295         ** Check on any SFP devices that
1296         ** need to be kick-started
1297         */
1298         if (hw->phy.type == ixgbe_phy_none) {
1299                 int err = hw->phy.ops.identify(hw);
1300                 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1301                         device_printf(dev,
1302                             "Unsupported SFP+ module type was detected.\n");
1303                         return;
1304                 }
1305         }
1306
1307         /* Set moderation on the Link interrupt */
1308         IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
1309
1310         /* Config/Enable Link */
1311         ixgbe_config_link(adapter);
1312
1313         /* Hardware Packet Buffer & Flow Control setup */
1314         {
1315                 u32 rxpb, frame, size, tmp;
1316
1317                 frame = adapter->max_frame_size;
1318
1319                 /* Calculate High Water */
1320                 if (hw->mac.type == ixgbe_mac_X540)
1321                         tmp = IXGBE_DV_X540(frame, frame);
1322                 else
1323                         tmp = IXGBE_DV(frame, frame);
1324                 size = IXGBE_BT2KB(tmp);
1325                 rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
1326                 hw->fc.high_water[0] = rxpb - size;
1327
1328                 /* Now calculate Low Water */
1329                 if (hw->mac.type == ixgbe_mac_X540)
1330                         tmp = IXGBE_LOW_DV_X540(frame);
1331                 else
1332                         tmp = IXGBE_LOW_DV(frame);
1333                 hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1334                 
1335                 hw->fc.requested_mode = adapter->fc;
1336                 hw->fc.pause_time = IXGBE_FC_PAUSE;
1337                 hw->fc.send_xon = TRUE;
1338         }
1339         /* Initialize the FC settings */
1340         ixgbe_start_hw(hw);
1341
1342         /* And now turn on interrupts */
1343         ixgbe_enable_intr(adapter);
1344
1345         /* Now inform the stack we're ready */
1346         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1347
1348         return;
1349 }
1350
1351 static void
1352 ixgbe_init(void *arg)
1353 {
1354         struct adapter *adapter = arg;
1355
1356         IXGBE_CORE_LOCK(adapter);
1357         ixgbe_init_locked(adapter);
1358         IXGBE_CORE_UNLOCK(adapter);
1359         return;
1360 }
1361
1362
1363 /*
1364 **
1365 ** MSIX Interrupt Handlers and Tasklets
1366 **
1367 */
1368
1369 static inline void
1370 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
1371 {
1372         struct ixgbe_hw *hw = &adapter->hw;
1373         u64     queue = (u64)(1 << vector);
1374         u32     mask;
1375
1376         if (hw->mac.type == ixgbe_mac_82598EB) {
1377                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1378                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
1379         } else {
1380                 mask = (queue & 0xFFFFFFFF);
1381                 if (mask)
1382                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
1383                 mask = (queue >> 32);
1384                 if (mask)
1385                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
1386         }
1387 }
1388
1389 static inline void
1390 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
1391 {
1392         struct ixgbe_hw *hw = &adapter->hw;
1393         u64     queue = (u64)(1 << vector);
1394         u32     mask;
1395
1396         if (hw->mac.type == ixgbe_mac_82598EB) {
1397                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1398                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
1399         } else {
1400                 mask = (queue & 0xFFFFFFFF);
1401                 if (mask)
1402                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
1403                 mask = (queue >> 32);
1404                 if (mask)
1405                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
1406         }
1407 }
1408
1409 static void
1410 ixgbe_handle_que(void *context, int pending)
1411 {
1412         struct ix_queue *que = context;
1413         struct adapter  *adapter = que->adapter;
1414         struct tx_ring  *txr = que->txr;
1415         struct ifnet    *ifp = adapter->ifp;
1416         bool            more;
1417
1418         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1419                 more = ixgbe_rxeof(que);
1420                 IXGBE_TX_LOCK(txr);
1421                 ixgbe_txeof(txr);
1422 #ifndef IXGBE_LEGACY_TX
1423                 if (!drbr_empty(ifp, txr->br))
1424                         ixgbe_mq_start_locked(ifp, txr);
1425 #else
1426                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1427                         ixgbe_start_locked(txr, ifp);
1428 #endif
1429                 IXGBE_TX_UNLOCK(txr);
1430         }
1431
1432         /* Reenable this interrupt */
1433         if (que->res != NULL)
1434                 ixgbe_enable_queue(adapter, que->msix);
1435         else
1436                 ixgbe_enable_intr(adapter);
1437         return;
1438 }
1439
1440
1441 /*********************************************************************
1442  *
1443  *  Legacy Interrupt Service routine
1444  *
1445  **********************************************************************/
1446
1447 static void
1448 ixgbe_legacy_irq(void *arg)
1449 {
1450         struct ix_queue *que = arg;
1451         struct adapter  *adapter = que->adapter;
1452         struct ixgbe_hw *hw = &adapter->hw;
1453         struct ifnet    *ifp = adapter->ifp;
1454         struct          tx_ring *txr = adapter->tx_rings;
1455         bool            more;
1456         u32             reg_eicr;
1457
1458
1459         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1460
1461         ++que->irqs;
1462         if (reg_eicr == 0) {
1463                 ixgbe_enable_intr(adapter);
1464                 return;
1465         }
1466
1467         more = ixgbe_rxeof(que);
1468
1469         IXGBE_TX_LOCK(txr);
1470         ixgbe_txeof(txr);
1471 #ifdef IXGBE_LEGACY_TX
1472         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1473                 ixgbe_start_locked(txr, ifp);
1474 #else
1475         if (!drbr_empty(ifp, txr->br))
1476                 ixgbe_mq_start_locked(ifp, txr);
1477 #endif
1478         IXGBE_TX_UNLOCK(txr);
1479
1480         /* Check for fan failure */
1481         if ((hw->phy.media_type == ixgbe_media_type_copper) &&
1482             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1483                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1484                     "REPLACE IMMEDIATELY!!\n");
1485                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
1486         }
1487
1488         /* Link status change */
1489         if (reg_eicr & IXGBE_EICR_LSC)
1490                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1491
1492         if (more)
1493                 taskqueue_enqueue(que->tq, &que->que_task);
1494         else
1495                 ixgbe_enable_intr(adapter);
1496         return;
1497 }
1498
1499
1500 /*********************************************************************
1501  *
1502  *  MSIX Queue Interrupt Service routine
1503  *
1504  **********************************************************************/
1505 void
1506 ixgbe_msix_que(void *arg)
1507 {
1508         struct ix_queue *que = arg;
1509         struct adapter  *adapter = que->adapter;
1510         struct ifnet    *ifp = adapter->ifp;
1511         struct tx_ring  *txr = que->txr;
1512         struct rx_ring  *rxr = que->rxr;
1513         bool            more;
1514         u32             newitr = 0;
1515
1516         /* Protect against spurious interrupts */
1517         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1518                 return;
1519
1520         ixgbe_disable_queue(adapter, que->msix);
1521         ++que->irqs;
1522
1523         more = ixgbe_rxeof(que);
1524
1525         IXGBE_TX_LOCK(txr);
1526         ixgbe_txeof(txr);
1527 #ifdef IXGBE_LEGACY_TX
1528         if (!IFQ_DRV_IS_EMPTY(ifp->if_snd))
1529                 ixgbe_start_locked(txr, ifp);
1530 #else
1531         if (!drbr_empty(ifp, txr->br))
1532                 ixgbe_mq_start_locked(ifp, txr);
1533 #endif
1534         IXGBE_TX_UNLOCK(txr);
1535
1536         /* Do AIM now? */
1537
1538         if (ixgbe_enable_aim == FALSE)
1539                 goto no_calc;
1540         /*
1541         ** Do Adaptive Interrupt Moderation:
1542         **  - Write out last calculated setting
1543         **  - Calculate based on average size over
1544         **    the last interval.
1545         */
1546         if (que->eitr_setting)
1547                 IXGBE_WRITE_REG(&adapter->hw,
1548                     IXGBE_EITR(que->msix), que->eitr_setting);
1549  
1550         que->eitr_setting = 0;
1551
1552         /* Idle, do nothing */
1553         if ((txr->bytes == 0) && (rxr->bytes == 0))
1554                 goto no_calc;
1555                                 
1556         if ((txr->bytes) && (txr->packets))
1557                 newitr = txr->bytes/txr->packets;
1558         if ((rxr->bytes) && (rxr->packets))
1559                 newitr = max(newitr,
1560                     (rxr->bytes / rxr->packets));
1561         newitr += 24; /* account for hardware frame, crc */
1562
1563         /* set an upper boundary */
1564         newitr = min(newitr, 3000);
1565
1566         /* Be nice to the mid range */
1567         if ((newitr > 300) && (newitr < 1200))
1568                 newitr = (newitr / 3);
1569         else
1570                 newitr = (newitr / 2);
1571
1572         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
1573                 newitr |= newitr << 16;
1574         else
1575                 newitr |= IXGBE_EITR_CNT_WDIS;
1576                  
1577         /* save for next interrupt */
1578         que->eitr_setting = newitr;
1579
1580         /* Reset state */
1581         txr->bytes = 0;
1582         txr->packets = 0;
1583         rxr->bytes = 0;
1584         rxr->packets = 0;
1585
1586 no_calc:
1587         if (more)
1588                 taskqueue_enqueue(que->tq, &que->que_task);
1589         else
1590                 ixgbe_enable_queue(adapter, que->msix);
1591         return;
1592 }
1593
1594
1595 static void
1596 ixgbe_msix_link(void *arg)
1597 {
1598         struct adapter  *adapter = arg;
1599         struct ixgbe_hw *hw = &adapter->hw;
1600         u32             reg_eicr;
1601
1602         ++adapter->link_irq;
1603
1604         /* First get the cause */
1605         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
1606         /* Be sure the queue bits are not cleared */
1607         reg_eicr &= ~IXGBE_EICR_RTX_QUEUE;
1608         /* Clear interrupt with write */
1609         IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
1610
1611         /* Link status change */
1612         if (reg_eicr & IXGBE_EICR_LSC)
1613                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1614
1615         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
1616 #ifdef IXGBE_FDIR
1617                 if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
1618                         /* This is probably overkill :) */
1619                         if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
1620                                 return;
1621                         /* Disable the interrupt */
1622                         IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
1623                         taskqueue_enqueue(adapter->tq, &adapter->fdir_task);
1624                 } else
1625 #endif
1626                 if (reg_eicr & IXGBE_EICR_ECC) {
1627                         device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
1628                             "Please Reboot!!\n");
1629                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
1630                 } else
1631
1632                 if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
1633                         /* Clear the interrupt */
1634                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1635                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
1636                 } else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
1637                         /* Clear the interrupt */
1638                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
1639                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
1640                 }
1641         } 
1642
1643         /* Check for fan failure */
1644         if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
1645             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1646                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1647                     "REPLACE IMMEDIATELY!!\n");
1648                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1649         }
1650
1651         /* Check for over temp condition */
1652         if ((hw->mac.type == ixgbe_mac_X540) &&
1653             (reg_eicr & IXGBE_EICR_TS)) {
1654                 device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
1655                     "PHY IS SHUT DOWN!!\n");
1656                 device_printf(adapter->dev, "System shutdown required\n");
1657                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
1658         }
1659
1660         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
1661         return;
1662 }
1663
1664 /*********************************************************************
1665  *
1666  *  Media Ioctl callback
1667  *
1668  *  This routine is called whenever the user queries the status of
1669  *  the interface using ifconfig.
1670  *
1671  **********************************************************************/
1672 static void
1673 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
1674 {
1675         struct adapter *adapter = ifp->if_softc;
1676
1677         INIT_DEBUGOUT("ixgbe_media_status: begin");
1678         IXGBE_CORE_LOCK(adapter);
1679         ixgbe_update_link_status(adapter);
1680
1681         ifmr->ifm_status = IFM_AVALID;
1682         ifmr->ifm_active = IFM_ETHER;
1683
1684         if (!adapter->link_active) {
1685                 IXGBE_CORE_UNLOCK(adapter);
1686                 return;
1687         }
1688
1689         ifmr->ifm_status |= IFM_ACTIVE;
1690
1691         switch (adapter->link_speed) {
1692                 case IXGBE_LINK_SPEED_100_FULL:
1693                         ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1694                         break;
1695                 case IXGBE_LINK_SPEED_1GB_FULL:
1696                         ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1697                         break;
1698                 case IXGBE_LINK_SPEED_10GB_FULL:
1699                         ifmr->ifm_active |= adapter->optics | IFM_FDX;
1700                         break;
1701         }
1702
1703         IXGBE_CORE_UNLOCK(adapter);
1704
1705         return;
1706 }
1707
1708 /*********************************************************************
1709  *
1710  *  Media Ioctl callback
1711  *
1712  *  This routine is called when the user changes speed/duplex using
1713  *  media/mediopt option with ifconfig.
1714  *
1715  **********************************************************************/
1716 static int
1717 ixgbe_media_change(struct ifnet * ifp)
1718 {
1719         struct adapter *adapter = ifp->if_softc;
1720         struct ifmedia *ifm = &adapter->media;
1721
1722         INIT_DEBUGOUT("ixgbe_media_change: begin");
1723
1724         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1725                 return (EINVAL);
1726
1727         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1728         case IFM_AUTO:
1729                 adapter->hw.phy.autoneg_advertised =
1730                     IXGBE_LINK_SPEED_100_FULL |
1731                     IXGBE_LINK_SPEED_1GB_FULL |
1732                     IXGBE_LINK_SPEED_10GB_FULL;
1733                 break;
1734         default:
1735                 device_printf(adapter->dev, "Only auto media type\n");
1736                 return (EINVAL);
1737         }
1738
1739         return (0);
1740 }
1741
1742 /*********************************************************************
1743  *
1744  *  This routine maps the mbufs to tx descriptors, allowing the
1745  *  TX engine to transmit the packets. 
1746  *      - return 0 on success, positive on failure
1747  *
1748  **********************************************************************/
1749
1750 static int
1751 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1752 {
1753         struct adapter  *adapter = txr->adapter;
1754         u32             olinfo_status = 0, cmd_type_len;
1755         int             i, j, error, nsegs;
1756         int             first;
1757         bool            remap = TRUE;
1758         struct mbuf     *m_head;
1759         bus_dma_segment_t segs[adapter->num_segs];
1760         bus_dmamap_t    map;
1761         struct ixgbe_tx_buf *txbuf;
1762         union ixgbe_adv_tx_desc *txd = NULL;
1763
1764         m_head = *m_headp;
1765
1766         /* Basic descriptor defines */
1767         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1768             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1769
1770         if (m_head->m_flags & M_VLANTAG)
1771                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1772
1773         /*
1774          * Important to capture the first descriptor
1775          * used because it will contain the index of
1776          * the one we tell the hardware to report back
1777          */
1778         first = txr->next_avail_desc;
1779         txbuf = &txr->tx_buffers[first];
1780         map = txbuf->map;
1781
1782         /*
1783          * Map the packet for DMA.
1784          */
1785 retry:
1786         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1787             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1788
1789         if (__predict_false(error)) {
1790                 struct mbuf *m;
1791
1792                 switch (error) {
1793                 case EFBIG:
1794                         /* Try it again? - one try */
1795                         if (remap == TRUE) {
1796                                 remap = FALSE;
1797                                 m = m_defrag(*m_headp, M_NOWAIT);
1798                                 if (m == NULL) {
1799                                         adapter->mbuf_defrag_failed++;
1800                                         m_freem(*m_headp);
1801                                         *m_headp = NULL;
1802                                         return (ENOBUFS);
1803                                 }
1804                                 *m_headp = m;
1805                                 goto retry;
1806                         } else
1807                                 return (error);
1808                 case ENOMEM:
1809                         txr->no_tx_dma_setup++;
1810                         return (error);
1811                 default:
1812                         txr->no_tx_dma_setup++;
1813                         m_freem(*m_headp);
1814                         *m_headp = NULL;
1815                         return (error);
1816                 }
1817         }
1818
1819         /* Make certain there are enough descriptors */
1820         if (nsegs > txr->tx_avail - 2) {
1821                 txr->no_desc_avail++;
1822                 bus_dmamap_unload(txr->txtag, map);
1823                 return (ENOBUFS);
1824         }
1825         m_head = *m_headp;
1826
1827         /*
1828         ** Set up the appropriate offload context
1829         ** this will consume the first descriptor
1830         */
1831         error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1832         if (__predict_false(error)) {
1833                 if (error == ENOBUFS)
1834                         *m_headp = NULL;
1835                 return (error);
1836         }
1837
1838 #ifdef IXGBE_FDIR
1839         /* Do the flow director magic */
1840         if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
1841                 ++txr->atr_count;
1842                 if (txr->atr_count >= atr_sample_rate) {
1843                         ixgbe_atr(txr, m_head);
1844                         txr->atr_count = 0;
1845                 }
1846         }
1847 #endif
1848
1849         i = txr->next_avail_desc;
1850         for (j = 0; j < nsegs; j++) {
1851                 bus_size_t seglen;
1852                 bus_addr_t segaddr;
1853
1854                 txbuf = &txr->tx_buffers[i];
1855                 txd = &txr->tx_base[i];
1856                 seglen = segs[j].ds_len;
1857                 segaddr = htole64(segs[j].ds_addr);
1858
1859                 txd->read.buffer_addr = segaddr;
1860                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
1861                     cmd_type_len |seglen);
1862                 txd->read.olinfo_status = htole32(olinfo_status);
1863
1864                 if (++i == txr->num_desc)
1865                         i = 0;
1866         }
1867
1868         txd->read.cmd_type_len |=
1869             htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
1870         txr->tx_avail -= nsegs;
1871         txr->next_avail_desc = i;
1872
1873         txbuf->m_head = m_head;
1874         /*
1875         ** Here we swap the map so the last descriptor,
1876         ** which gets the completion interrupt has the
1877         ** real map, and the first descriptor gets the
1878         ** unused map from this descriptor.
1879         */
1880         txr->tx_buffers[first].map = txbuf->map;
1881         txbuf->map = map;
1882         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1883
1884         /* Set the EOP descriptor that will be marked done */
1885         txbuf = &txr->tx_buffers[first];
1886         txbuf->eop = txd;
1887
1888         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1889             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1890         /*
1891          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1892          * hardware that this frame is available to transmit.
1893          */
1894         ++txr->total_packets;
1895         IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
1896
1897         return (0);
1898
1899 }
1900
1901 static void
1902 ixgbe_set_promisc(struct adapter *adapter)
1903 {
1904         u_int32_t       reg_rctl;
1905         struct ifnet   *ifp = adapter->ifp;
1906         int             mcnt = 0;
1907
1908         reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1909         reg_rctl &= (~IXGBE_FCTRL_UPE);
1910         if (ifp->if_flags & IFF_ALLMULTI)
1911                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
1912         else {
1913                 struct  ifmultiaddr *ifma;
1914 #if __FreeBSD_version < 800000
1915                 IF_ADDR_LOCK(ifp);
1916 #else
1917                 if_maddr_rlock(ifp);
1918 #endif
1919                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1920                         if (ifma->ifma_addr->sa_family != AF_LINK)
1921                                 continue;
1922                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1923                                 break;
1924                         mcnt++;
1925                 }
1926 #if __FreeBSD_version < 800000
1927                 IF_ADDR_UNLOCK(ifp);
1928 #else
1929                 if_maddr_runlock(ifp);
1930 #endif
1931         }
1932         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
1933                 reg_rctl &= (~IXGBE_FCTRL_MPE);
1934         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1935
1936         if (ifp->if_flags & IFF_PROMISC) {
1937                 reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1938                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1939         } else if (ifp->if_flags & IFF_ALLMULTI) {
1940                 reg_rctl |= IXGBE_FCTRL_MPE;
1941                 reg_rctl &= ~IXGBE_FCTRL_UPE;
1942                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1943         }
1944         return;
1945 }
1946
1947
1948 /*********************************************************************
1949  *  Multicast Update
1950  *
1951  *  This routine is called whenever multicast address list is updated.
1952  *
1953  **********************************************************************/
1954 #define IXGBE_RAR_ENTRIES 16
1955
1956 static void
1957 ixgbe_set_multi(struct adapter *adapter)
1958 {
1959         u32     fctrl;
1960         u8      *mta;
1961         u8      *update_ptr;
1962         struct  ifmultiaddr *ifma;
1963         int     mcnt = 0;
1964         struct ifnet   *ifp = adapter->ifp;
1965
1966         IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
1967
1968         mta = adapter->mta;
1969         bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
1970             MAX_NUM_MULTICAST_ADDRESSES);
1971
1972 #if __FreeBSD_version < 800000
1973         IF_ADDR_LOCK(ifp);
1974 #else
1975         if_maddr_rlock(ifp);
1976 #endif
1977         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1978                 if (ifma->ifma_addr->sa_family != AF_LINK)
1979                         continue;
1980                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1981                         break;
1982                 bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr),
1983                     &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1984                     IXGBE_ETH_LENGTH_OF_ADDRESS);
1985                 mcnt++;
1986         }
1987 #if __FreeBSD_version < 800000
1988         IF_ADDR_UNLOCK(ifp);
1989 #else
1990         if_maddr_runlock(ifp);
1991 #endif
1992
1993         fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1994         fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1995         if (ifp->if_flags & IFF_PROMISC)
1996                 fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1997         else if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES ||
1998             ifp->if_flags & IFF_ALLMULTI) {
1999                 fctrl |= IXGBE_FCTRL_MPE;
2000                 fctrl &= ~IXGBE_FCTRL_UPE;
2001         } else
2002                 fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
2003         
2004         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
2005
2006         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
2007                 update_ptr = mta;
2008                 ixgbe_update_mc_addr_list(&adapter->hw,
2009                     update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
2010         }
2011
2012         return;
2013 }
2014
2015 /*
2016  * This is an iterator function now needed by the multicast
2017  * shared code. It simply feeds the shared code routine the
2018  * addresses in the array of ixgbe_set_multi() one by one.
2019  */
2020 static u8 *
2021 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
2022 {
2023         u8 *addr = *update_ptr;
2024         u8 *newptr;
2025         *vmdq = 0;
2026
2027         newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
2028         *update_ptr = newptr;
2029         return addr;
2030 }
2031
2032
2033 /*********************************************************************
2034  *  Timer routine
2035  *
2036  *  This routine checks for link status,updates statistics,
2037  *  and runs the watchdog check.
2038  *
2039  **********************************************************************/
2040
2041 static void
2042 ixgbe_local_timer(void *arg)
2043 {
2044         struct adapter  *adapter = arg;
2045         device_t        dev = adapter->dev;
2046         struct ix_queue *que = adapter->queues;
2047         struct tx_ring  *txr = adapter->tx_rings;
2048         int             hung = 0, paused = 0;
2049
2050         mtx_assert(&adapter->core_mtx, MA_OWNED);
2051
2052         /* Check for pluggable optics */
2053         if (adapter->sfp_probe)
2054                 if (!ixgbe_sfp_probe(adapter))
2055                         goto out; /* Nothing to do */
2056
2057         ixgbe_update_link_status(adapter);
2058         ixgbe_update_stats_counters(adapter);
2059
2060         /*
2061          * If the interface has been paused
2062          * then don't do the watchdog check
2063          */
2064         if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
2065                 paused = 1;
2066
2067         /*
2068         ** Check the TX queues status
2069         **      - watchdog only if all queues show hung
2070         */          
2071         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2072                 if ((txr->queue_status == IXGBE_QUEUE_HUNG) &&
2073                     (paused == 0))
2074                         ++hung;
2075                 else if (txr->queue_status == IXGBE_QUEUE_WORKING)
2076                         taskqueue_enqueue(que->tq, &txr->txq_task);
2077         }
2078         /* Only truely watchdog if all queues show hung */
2079         if (hung == adapter->num_queues)
2080                 goto watchdog;
2081
2082 out:
2083         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
2084         return;
2085
2086 watchdog:
2087         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2088         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2089             IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
2090             IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
2091         device_printf(dev,"TX(%d) desc avail = %d,"
2092             "Next TX to Clean = %d\n",
2093             txr->me, txr->tx_avail, txr->next_to_clean);
2094         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2095         adapter->watchdog_events++;
2096         ixgbe_init_locked(adapter);
2097 }
2098
2099 /*
2100 ** Note: this routine updates the OS on the link state
2101 **      the real check of the hardware only happens with
2102 **      a link interrupt.
2103 */
2104 static void
2105 ixgbe_update_link_status(struct adapter *adapter)
2106 {
2107         struct ifnet    *ifp = adapter->ifp;
2108         device_t dev = adapter->dev;
2109
2110
2111         if (adapter->link_up){ 
2112                 if (adapter->link_active == FALSE) {
2113                         if (bootverbose)
2114                                 device_printf(dev,"Link is up %d Gbps %s \n",
2115                                     ((adapter->link_speed == 128)? 10:1),
2116                                     "Full Duplex");
2117                         adapter->link_active = TRUE;
2118                         /* Update any Flow Control changes */
2119                         ixgbe_fc_enable(&adapter->hw);
2120                         if_link_state_change(ifp, LINK_STATE_UP);
2121                 }
2122         } else { /* Link down */
2123                 if (adapter->link_active == TRUE) {
2124                         if (bootverbose)
2125                                 device_printf(dev,"Link is Down\n");
2126                         if_link_state_change(ifp, LINK_STATE_DOWN);
2127                         adapter->link_active = FALSE;
2128                 }
2129         }
2130
2131         return;
2132 }
2133
2134
2135 /*********************************************************************
2136  *
2137  *  This routine disables all traffic on the adapter by issuing a
2138  *  global reset on the MAC and deallocates TX/RX buffers.
2139  *
2140  **********************************************************************/
2141
2142 static void
2143 ixgbe_stop(void *arg)
2144 {
2145         struct ifnet   *ifp;
2146         struct adapter *adapter = arg;
2147         struct ixgbe_hw *hw = &adapter->hw;
2148         ifp = adapter->ifp;
2149
2150         mtx_assert(&adapter->core_mtx, MA_OWNED);
2151
2152         INIT_DEBUGOUT("ixgbe_stop: begin\n");
2153         ixgbe_disable_intr(adapter);
2154         callout_stop(&adapter->timer);
2155
2156         /* Let the stack know...*/
2157         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2158
2159         ixgbe_reset_hw(hw);
2160         hw->adapter_stopped = FALSE;
2161         ixgbe_stop_adapter(hw);
2162         if (hw->mac.type == ixgbe_mac_82599EB)
2163                 ixgbe_stop_mac_link_on_d3_82599(hw);
2164         /* Turn off the laser - noop with no optics */
2165         ixgbe_disable_tx_laser(hw);
2166
2167         /* Update the stack */
2168         adapter->link_up = FALSE;
2169         ixgbe_update_link_status(adapter);
2170
2171         /* reprogram the RAR[0] in case user changed it. */
2172         ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
2173
2174         return;
2175 }
2176
2177
2178 /*********************************************************************
2179  *
2180  *  Determine hardware revision.
2181  *
2182  **********************************************************************/
2183 static void
2184 ixgbe_identify_hardware(struct adapter *adapter)
2185 {
2186         device_t        dev = adapter->dev;
2187         struct ixgbe_hw *hw = &adapter->hw;
2188
2189         /* Save off the information about this board */
2190         hw->vendor_id = pci_get_vendor(dev);
2191         hw->device_id = pci_get_device(dev);
2192         hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
2193         hw->subsystem_vendor_id =
2194             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2195         hw->subsystem_device_id =
2196             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2197
2198         /* We need this here to set the num_segs below */
2199         ixgbe_set_mac_type(hw);
2200
2201         /* Pick up the 82599 and VF settings */
2202         if (hw->mac.type != ixgbe_mac_82598EB) {
2203                 hw->phy.smart_speed = ixgbe_smart_speed;
2204                 adapter->num_segs = IXGBE_82599_SCATTER;
2205         } else
2206                 adapter->num_segs = IXGBE_82598_SCATTER;
2207
2208         return;
2209 }
2210
2211 /*********************************************************************
2212  *
2213  *  Determine optic type
2214  *
2215  **********************************************************************/
2216 static void
2217 ixgbe_setup_optics(struct adapter *adapter)
2218 {
2219         struct ixgbe_hw *hw = &adapter->hw;
2220         int             layer;
2221
2222         layer = ixgbe_get_supported_physical_layer(hw);
2223
2224         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
2225                 adapter->optics = IFM_10G_T;
2226                 return;
2227         }
2228
2229         if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
2230                 adapter->optics = IFM_1000_T;
2231                 return;
2232         }
2233
2234         if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
2235                 adapter->optics = IFM_1000_SX;
2236                 return;
2237         }
2238
2239         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
2240             IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
2241                 adapter->optics = IFM_10G_LR;
2242                 return;
2243         }
2244
2245         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
2246                 adapter->optics = IFM_10G_SR;
2247                 return;
2248         }
2249
2250         if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
2251                 adapter->optics = IFM_10G_TWINAX;
2252                 return;
2253         }
2254
2255         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
2256             IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
2257                 adapter->optics = IFM_10G_CX4;
2258                 return;
2259         }
2260
2261         /* If we get here just set the default */
2262         adapter->optics = IFM_ETHER | IFM_AUTO;
2263         return;
2264 }
2265
2266 /*********************************************************************
2267  *
2268  *  Setup the Legacy or MSI Interrupt handler
2269  *
2270  **********************************************************************/
2271 static int
2272 ixgbe_allocate_legacy(struct adapter *adapter)
2273 {
2274         device_t        dev = adapter->dev;
2275         struct          ix_queue *que = adapter->queues;
2276 #ifndef IXGBE_LEGACY_TX
2277         struct tx_ring          *txr = adapter->tx_rings;
2278 #endif
2279         int             error, rid = 0;
2280
2281         /* MSI RID at 1 */
2282         if (adapter->msix == 1)
2283                 rid = 1;
2284
2285         /* We allocate a single interrupt resource */
2286         adapter->res = bus_alloc_resource_any(dev,
2287             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2288         if (adapter->res == NULL) {
2289                 device_printf(dev, "Unable to allocate bus resource: "
2290                     "interrupt\n");
2291                 return (ENXIO);
2292         }
2293
2294         /*
2295          * Try allocating a fast interrupt and the associated deferred
2296          * processing contexts.
2297          */
2298 #ifndef IXGBE_LEGACY_TX
2299         TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr);
2300 #endif
2301         TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2302         que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,
2303             taskqueue_thread_enqueue, &que->tq);
2304         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s ixq",
2305             device_get_nameunit(adapter->dev));
2306
2307         /* Tasklets for Link, SFP and Multispeed Fiber */
2308         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2309         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2310         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2311 #ifdef IXGBE_FDIR
2312         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2313 #endif
2314         adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT,
2315             taskqueue_thread_enqueue, &adapter->tq);
2316         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2317             device_get_nameunit(adapter->dev));
2318
2319         if ((error = bus_setup_intr(dev, adapter->res,
2320             INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_legacy_irq,
2321             que, &adapter->tag)) != 0) {
2322                 device_printf(dev, "Failed to register fast interrupt "
2323                     "handler: %d\n", error);
2324                 taskqueue_free(que->tq);
2325                 taskqueue_free(adapter->tq);
2326                 que->tq = NULL;
2327                 adapter->tq = NULL;
2328                 return (error);
2329         }
2330         /* For simplicity in the handlers */
2331         adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
2332
2333         return (0);
2334 }
2335
2336
2337 /*********************************************************************
2338  *
2339  *  Setup MSIX Interrupt resources and handlers 
2340  *
2341  **********************************************************************/
2342 static int
2343 ixgbe_allocate_msix(struct adapter *adapter)
2344 {
2345         device_t        dev = adapter->dev;
2346         struct          ix_queue *que = adapter->queues;
2347         struct          tx_ring *txr = adapter->tx_rings;
2348         int             error, rid, vector = 0;
2349
2350         for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
2351                 rid = vector + 1;
2352                 que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2353                     RF_SHAREABLE | RF_ACTIVE);
2354                 if (que->res == NULL) {
2355                         device_printf(dev,"Unable to allocate"
2356                             " bus resource: que interrupt [%d]\n", vector);
2357                         return (ENXIO);
2358                 }
2359                 /* Set the handler function */
2360                 error = bus_setup_intr(dev, que->res,
2361                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2362                     ixgbe_msix_que, que, &que->tag);
2363                 if (error) {
2364                         que->res = NULL;
2365                         device_printf(dev, "Failed to register QUE handler");
2366                         return (error);
2367                 }
2368 #if __FreeBSD_version >= 800504
2369                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2370 #endif
2371                 que->msix = vector;
2372                 adapter->que_mask |= (u64)(1 << que->msix);
2373                 /*
2374                 ** Bind the msix vector, and thus the
2375                 ** ring to the corresponding cpu.
2376                 */
2377                 if (adapter->num_queues > 1)
2378                         bus_bind_intr(dev, que->res, i);
2379
2380 #ifndef IXGBE_LEGACY_TX
2381                 TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr);
2382 #endif
2383                 TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2384                 que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,
2385                     taskqueue_thread_enqueue, &que->tq);
2386                 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2387                     device_get_nameunit(adapter->dev));
2388         }
2389
2390         /* and Link */
2391         rid = vector + 1;
2392         adapter->res = bus_alloc_resource_any(dev,
2393             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2394         if (!adapter->res) {
2395                 device_printf(dev,"Unable to allocate"
2396             " bus resource: Link interrupt [%d]\n", rid);
2397                 return (ENXIO);
2398         }
2399         /* Set the link handler function */
2400         error = bus_setup_intr(dev, adapter->res,
2401             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2402             ixgbe_msix_link, adapter, &adapter->tag);
2403         if (error) {
2404                 adapter->res = NULL;
2405                 device_printf(dev, "Failed to register LINK handler");
2406                 return (error);
2407         }
2408 #if __FreeBSD_version >= 800504
2409         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2410 #endif
2411         adapter->linkvec = vector;
2412         /* Tasklets for Link, SFP and Multispeed Fiber */
2413         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2414         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2415         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2416 #ifdef IXGBE_FDIR
2417         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2418 #endif
2419         adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT,
2420             taskqueue_thread_enqueue, &adapter->tq);
2421         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2422             device_get_nameunit(adapter->dev));
2423
2424         return (0);
2425 }
2426
2427 /*
2428  * Setup Either MSI/X or MSI
2429  */
2430 static int
2431 ixgbe_setup_msix(struct adapter *adapter)
2432 {
2433         device_t dev = adapter->dev;
2434         int rid, want, queues, msgs;
2435
2436         /* Override by tuneable */
2437         if (ixgbe_enable_msix == 0)
2438                 goto msi;
2439
2440         /* First try MSI/X */
2441         msgs = pci_msix_count(dev); 
2442         if (msgs == 0)
2443                 goto msi;
2444         rid = PCIR_BAR(MSIX_82598_BAR);
2445         adapter->msix_mem = bus_alloc_resource_any(dev,
2446             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2447         if (adapter->msix_mem == NULL) {
2448                 rid += 4;       /* 82599 maps in higher BAR */
2449                 adapter->msix_mem = bus_alloc_resource_any(dev,
2450                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2451         }
2452         if (adapter->msix_mem == NULL) {
2453                 /* May not be enabled */
2454                 device_printf(adapter->dev,
2455                     "Unable to map MSIX table \n");
2456                 goto msi;
2457         }
2458
2459         /* Figure out a reasonable auto config value */
2460         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2461
2462         if (ixgbe_num_queues != 0)
2463                 queues = ixgbe_num_queues;
2464         /* Set max queues to 8 when autoconfiguring */
2465         else if ((ixgbe_num_queues == 0) && (queues > 8))
2466                 queues = 8;
2467
2468         /* reflect correct sysctl value */
2469         ixgbe_num_queues = queues;
2470
2471         /*
2472         ** Want one vector (RX/TX pair) per queue
2473         ** plus an additional for Link.
2474         */
2475         want = queues + 1;
2476         if (msgs >= want)
2477                 msgs = want;
2478         else {
2479                 device_printf(adapter->dev,
2480                     "MSIX Configuration Problem, "
2481                     "%d vectors but %d queues wanted!\n",
2482                     msgs, want);
2483                 goto msi;
2484         }
2485         if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2486                 device_printf(adapter->dev,
2487                     "Using MSIX interrupts with %d vectors\n", msgs);
2488                 adapter->num_queues = queues;
2489                 return (msgs);
2490         }
2491         /*
2492         ** If MSIX alloc failed or provided us with
2493         ** less than needed, free and fall through to MSI
2494         */
2495         pci_release_msi(dev);
2496
2497 msi:
2498         if (adapter->msix_mem != NULL) {
2499                 bus_release_resource(dev, SYS_RES_MEMORY,
2500                     rid, adapter->msix_mem);
2501                 adapter->msix_mem = NULL;
2502         }
2503         msgs = 1;
2504         if (pci_alloc_msi(dev, &msgs) == 0) {
2505                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2506                 return (msgs);
2507         }
2508         device_printf(adapter->dev,"Using a Legacy interrupt\n");
2509         return (0);
2510 }
2511
2512
2513 static int
2514 ixgbe_allocate_pci_resources(struct adapter *adapter)
2515 {
2516         int             rid;
2517         device_t        dev = adapter->dev;
2518
2519         rid = PCIR_BAR(0);
2520         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2521             &rid, RF_ACTIVE);
2522
2523         if (!(adapter->pci_mem)) {
2524                 device_printf(dev,"Unable to allocate bus resource: memory\n");
2525                 return (ENXIO);
2526         }
2527
2528         adapter->osdep.mem_bus_space_tag =
2529                 rman_get_bustag(adapter->pci_mem);
2530         adapter->osdep.mem_bus_space_handle =
2531                 rman_get_bushandle(adapter->pci_mem);
2532         adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle;
2533
2534         /* Legacy defaults */
2535         adapter->num_queues = 1;
2536         adapter->hw.back = &adapter->osdep;
2537
2538         /*
2539         ** Now setup MSI or MSI/X, should
2540         ** return us the number of supported
2541         ** vectors. (Will be 1 for MSI)
2542         */
2543         adapter->msix = ixgbe_setup_msix(adapter);
2544         return (0);
2545 }
2546
2547 static void
2548 ixgbe_free_pci_resources(struct adapter * adapter)
2549 {
2550         struct          ix_queue *que = adapter->queues;
2551         device_t        dev = adapter->dev;
2552         int             rid, memrid;
2553
2554         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
2555                 memrid = PCIR_BAR(MSIX_82598_BAR);
2556         else
2557                 memrid = PCIR_BAR(MSIX_82599_BAR);
2558
2559         /*
2560         ** There is a slight possibility of a failure mode
2561         ** in attach that will result in entering this function
2562         ** before interrupt resources have been initialized, and
2563         ** in that case we do not want to execute the loops below
2564         ** We can detect this reliably by the state of the adapter
2565         ** res pointer.
2566         */
2567         if (adapter->res == NULL)
2568                 goto mem;
2569
2570         /*
2571         **  Release all msix queue resources:
2572         */
2573         for (int i = 0; i < adapter->num_queues; i++, que++) {
2574                 rid = que->msix + 1;
2575                 if (que->tag != NULL) {
2576                         bus_teardown_intr(dev, que->res, que->tag);
2577                         que->tag = NULL;
2578                 }
2579                 if (que->res != NULL)
2580                         bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
2581         }
2582
2583
2584         /* Clean the Legacy or Link interrupt last */
2585         if (adapter->linkvec) /* we are doing MSIX */
2586                 rid = adapter->linkvec + 1;
2587         else
2588                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2589
2590         if (adapter->tag != NULL) {
2591                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2592                 adapter->tag = NULL;
2593         }
2594         if (adapter->res != NULL)
2595                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2596
2597 mem:
2598         if (adapter->msix)
2599                 pci_release_msi(dev);
2600
2601         if (adapter->msix_mem != NULL)
2602                 bus_release_resource(dev, SYS_RES_MEMORY,
2603                     memrid, adapter->msix_mem);
2604
2605         if (adapter->pci_mem != NULL)
2606                 bus_release_resource(dev, SYS_RES_MEMORY,
2607                     PCIR_BAR(0), adapter->pci_mem);
2608
2609         return;
2610 }
2611
2612 /*********************************************************************
2613  *
2614  *  Setup networking device structure and register an interface.
2615  *
2616  **********************************************************************/
2617 static int
2618 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
2619 {
2620         struct ixgbe_hw *hw = &adapter->hw;
2621         struct ifnet   *ifp;
2622
2623         INIT_DEBUGOUT("ixgbe_setup_interface: begin");
2624
2625         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2626         if (ifp == NULL) {
2627                 device_printf(dev, "can not allocate ifnet structure\n");
2628                 return (-1);
2629         }
2630         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2631 #if __FreeBSD_version < 1000025
2632         ifp->if_baudrate = 1000000000;
2633 #else
2634         if_initbaudrate(ifp, IF_Gbps(10));
2635 #endif
2636         ifp->if_init = ixgbe_init;
2637         ifp->if_softc = adapter;
2638         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2639         ifp->if_ioctl = ixgbe_ioctl;
2640 #ifndef IXGBE_LEGACY_TX
2641         ifp->if_transmit = ixgbe_mq_start;
2642         ifp->if_qflush = ixgbe_qflush;
2643 #else
2644         ifp->if_start = ixgbe_start;
2645         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
2646         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2;
2647         IFQ_SET_READY(&ifp->if_snd);
2648 #endif
2649
2650         ether_ifattach(ifp, adapter->hw.mac.addr);
2651
2652         adapter->max_frame_size =
2653             ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
2654
2655         /*
2656          * Tell the upper layer(s) we support long frames.
2657          */
2658         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2659
2660         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWCSUM;
2661         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2662         ifp->if_capabilities |= IFCAP_LRO;
2663         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2664                              |  IFCAP_VLAN_HWTSO
2665                              |  IFCAP_VLAN_MTU
2666                              |  IFCAP_HWSTATS;
2667         ifp->if_capenable = ifp->if_capabilities;
2668
2669         /*
2670         ** Don't turn this on by default, if vlans are
2671         ** created on another pseudo device (eg. lagg)
2672         ** then vlan events are not passed thru, breaking
2673         ** operation, but with HW FILTER off it works. If
2674         ** using vlans directly on the ixgbe driver you can
2675         ** enable this and get full hardware tag filtering.
2676         */
2677         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2678
2679         /*
2680          * Specify the media types supported by this adapter and register
2681          * callbacks to update media and link information
2682          */
2683         ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
2684                      ixgbe_media_status);
2685         ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
2686         ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
2687         if (hw->device_id == IXGBE_DEV_ID_82598AT) {
2688                 ifmedia_add(&adapter->media,
2689                     IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2690                 ifmedia_add(&adapter->media,
2691                     IFM_ETHER | IFM_1000_T, 0, NULL);
2692         }
2693         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2694         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2695
2696         return (0);
2697 }
2698
2699 static void
2700 ixgbe_config_link(struct adapter *adapter)
2701 {
2702         struct ixgbe_hw *hw = &adapter->hw;
2703         u32     autoneg, err = 0;
2704         bool    sfp, negotiate;
2705
2706         sfp = ixgbe_is_sfp(hw);
2707
2708         if (sfp) { 
2709                 if (hw->phy.multispeed_fiber) {
2710                         hw->mac.ops.setup_sfp(hw);
2711                         ixgbe_enable_tx_laser(hw);
2712                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
2713                 } else
2714                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
2715         } else {
2716                 if (hw->mac.ops.check_link)
2717                         err = ixgbe_check_link(hw, &adapter->link_speed,
2718                             &adapter->link_up, FALSE);
2719                 if (err)
2720                         goto out;
2721                 autoneg = hw->phy.autoneg_advertised;
2722                 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
2723                         err  = hw->mac.ops.get_link_capabilities(hw,
2724                             &autoneg, &negotiate);
2725                 if (err)
2726                         goto out;
2727                 if (hw->mac.ops.setup_link)
2728                         err = hw->mac.ops.setup_link(hw,
2729                             autoneg, adapter->link_up);
2730         }
2731 out:
2732         return;
2733 }
2734
2735 /********************************************************************
2736  * Manage DMA'able memory.
2737  *******************************************************************/
2738 static void
2739 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2740 {
2741         if (error)
2742                 return;
2743         *(bus_addr_t *) arg = segs->ds_addr;
2744         return;
2745 }
2746
2747 static int
2748 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2749                 struct ixgbe_dma_alloc *dma, int mapflags)
2750 {
2751         device_t dev = adapter->dev;
2752         int             r;
2753
2754         r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),   /* parent */
2755                                DBA_ALIGN, 0,    /* alignment, bounds */
2756                                BUS_SPACE_MAXADDR,       /* lowaddr */
2757                                BUS_SPACE_MAXADDR,       /* highaddr */
2758                                NULL, NULL,      /* filter, filterarg */
2759                                size,    /* maxsize */
2760                                1,       /* nsegments */
2761                                size,    /* maxsegsize */
2762                                BUS_DMA_ALLOCNOW,        /* flags */
2763                                NULL,    /* lockfunc */
2764                                NULL,    /* lockfuncarg */
2765                                &dma->dma_tag);
2766         if (r != 0) {
2767                 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2768                        "error %u\n", r);
2769                 goto fail_0;
2770         }
2771         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2772                              BUS_DMA_NOWAIT, &dma->dma_map);
2773         if (r != 0) {
2774                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2775                        "error %u\n", r);
2776                 goto fail_1;
2777         }
2778         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2779                             size,
2780                             ixgbe_dmamap_cb,
2781                             &dma->dma_paddr,
2782                             mapflags | BUS_DMA_NOWAIT);
2783         if (r != 0) {
2784                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2785                        "error %u\n", r);
2786                 goto fail_2;
2787         }
2788         dma->dma_size = size;
2789         return (0);
2790 fail_2:
2791         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2792 fail_1:
2793         bus_dma_tag_destroy(dma->dma_tag);
2794 fail_0:
2795         dma->dma_map = NULL;
2796         dma->dma_tag = NULL;
2797         return (r);
2798 }
2799
2800 static void
2801 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2802 {
2803         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2804             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2805         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2806         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2807         bus_dma_tag_destroy(dma->dma_tag);
2808 }
2809
2810
2811 /*********************************************************************
2812  *
2813  *  Allocate memory for the transmit and receive rings, and then
2814  *  the descriptors associated with each, called only once at attach.
2815  *
2816  **********************************************************************/
2817 static int
2818 ixgbe_allocate_queues(struct adapter *adapter)
2819 {
2820         device_t        dev = adapter->dev;
2821         struct ix_queue *que;
2822         struct tx_ring  *txr;
2823         struct rx_ring  *rxr;
2824         int rsize, tsize, error = IXGBE_SUCCESS;
2825         int txconf = 0, rxconf = 0;
2826
2827         /* First allocate the top level queue structs */
2828         if (!(adapter->queues =
2829             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2830             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2831                 device_printf(dev, "Unable to allocate queue memory\n");
2832                 error = ENOMEM;
2833                 goto fail;
2834         }
2835
2836         /* First allocate the TX ring struct memory */
2837         if (!(adapter->tx_rings =
2838             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2839             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2840                 device_printf(dev, "Unable to allocate TX ring memory\n");
2841                 error = ENOMEM;
2842                 goto tx_fail;
2843         }
2844
2845         /* Next allocate the RX */
2846         if (!(adapter->rx_rings =
2847             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2848             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2849                 device_printf(dev, "Unable to allocate RX ring memory\n");
2850                 error = ENOMEM;
2851                 goto rx_fail;
2852         }
2853
2854         /* For the ring itself */
2855         tsize = roundup2(adapter->num_tx_desc *
2856             sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2857
2858         /*
2859          * Now set up the TX queues, txconf is needed to handle the
2860          * possibility that things fail midcourse and we need to
2861          * undo memory gracefully
2862          */ 
2863         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2864                 /* Set up some basics */
2865                 txr = &adapter->tx_rings[i];
2866                 txr->adapter = adapter;
2867                 txr->me = i;
2868                 txr->num_desc = adapter->num_tx_desc;
2869
2870                 /* Initialize the TX side lock */
2871                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2872                     device_get_nameunit(dev), txr->me);
2873                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2874
2875                 if (ixgbe_dma_malloc(adapter, tsize,
2876                         &txr->txdma, BUS_DMA_NOWAIT)) {
2877                         device_printf(dev,
2878                             "Unable to allocate TX Descriptor memory\n");
2879                         error = ENOMEM;
2880                         goto err_tx_desc;
2881                 }
2882                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2883                 bzero((void *)txr->tx_base, tsize);
2884
2885                 /* Now allocate transmit buffers for the ring */
2886                 if (ixgbe_allocate_transmit_buffers(txr)) {
2887                         device_printf(dev,
2888                             "Critical Failure setting up transmit buffers\n");
2889                         error = ENOMEM;
2890                         goto err_tx_desc;
2891                 }
2892 #ifndef IXGBE_LEGACY_TX
2893                 /* Allocate a buf ring */
2894                 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2895                     M_WAITOK, &txr->tx_mtx);
2896                 if (txr->br == NULL) {
2897                         device_printf(dev,
2898                             "Critical Failure setting up buf ring\n");
2899                         error = ENOMEM;
2900                         goto err_tx_desc;
2901                 }
2902 #endif
2903         }
2904
2905         /*
2906          * Next the RX queues...
2907          */ 
2908         rsize = roundup2(adapter->num_rx_desc *
2909             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2910         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2911                 rxr = &adapter->rx_rings[i];
2912                 /* Set up some basics */
2913                 rxr->adapter = adapter;
2914                 rxr->me = i;
2915                 rxr->num_desc = adapter->num_rx_desc;
2916
2917                 /* Initialize the RX side lock */
2918                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2919                     device_get_nameunit(dev), rxr->me);
2920                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2921
2922                 if (ixgbe_dma_malloc(adapter, rsize,
2923                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2924                         device_printf(dev,
2925                             "Unable to allocate RxDescriptor memory\n");
2926                         error = ENOMEM;
2927                         goto err_rx_desc;
2928                 }
2929                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2930                 bzero((void *)rxr->rx_base, rsize);
2931
2932                 /* Allocate receive buffers for the ring*/
2933                 if (ixgbe_allocate_receive_buffers(rxr)) {
2934                         device_printf(dev,
2935                             "Critical Failure setting up receive buffers\n");
2936                         error = ENOMEM;
2937                         goto err_rx_desc;
2938                 }
2939         }
2940
2941         /*
2942         ** Finally set up the queue holding structs
2943         */
2944         for (int i = 0; i < adapter->num_queues; i++) {
2945                 que = &adapter->queues[i];
2946                 que->adapter = adapter;
2947                 que->txr = &adapter->tx_rings[i];
2948                 que->rxr = &adapter->rx_rings[i];
2949         }
2950
2951         return (0);
2952
2953 err_rx_desc:
2954         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2955                 ixgbe_dma_free(adapter, &rxr->rxdma);
2956 err_tx_desc:
2957         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2958                 ixgbe_dma_free(adapter, &txr->txdma);
2959         free(adapter->rx_rings, M_DEVBUF);
2960 rx_fail:
2961         free(adapter->tx_rings, M_DEVBUF);
2962 tx_fail:
2963         free(adapter->queues, M_DEVBUF);
2964 fail:
2965         return (error);
2966 }
2967
2968 /*********************************************************************
2969  *
2970  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2971  *  the information needed to transmit a packet on the wire. This is
2972  *  called only once at attach, setup is done every reset.
2973  *
2974  **********************************************************************/
2975 static int
2976 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
2977 {
2978         struct adapter *adapter = txr->adapter;
2979         device_t dev = adapter->dev;
2980         struct ixgbe_tx_buf *txbuf;
2981         int error, i;
2982
2983         /*
2984          * Setup DMA descriptor areas.
2985          */
2986         if ((error = bus_dma_tag_create(
2987                                bus_get_dma_tag(adapter->dev),   /* parent */
2988                                1, 0,            /* alignment, bounds */
2989                                BUS_SPACE_MAXADDR,       /* lowaddr */
2990                                BUS_SPACE_MAXADDR,       /* highaddr */
2991                                NULL, NULL,              /* filter, filterarg */
2992                                IXGBE_TSO_SIZE,          /* maxsize */
2993                                adapter->num_segs,       /* nsegments */
2994                                PAGE_SIZE,               /* maxsegsize */
2995                                0,                       /* flags */
2996                                NULL,                    /* lockfunc */
2997                                NULL,                    /* lockfuncarg */
2998                                &txr->txtag))) {
2999                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3000                 goto fail;
3001         }
3002
3003         if (!(txr->tx_buffers =
3004             (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
3005             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3006                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3007                 error = ENOMEM;
3008                 goto fail;
3009         }
3010
3011         /* Create the descriptor buffer dma maps */
3012         txbuf = txr->tx_buffers;
3013         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3014                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3015                 if (error != 0) {
3016                         device_printf(dev, "Unable to create TX DMA map\n");
3017                         goto fail;
3018                 }
3019         }
3020
3021         return 0;
3022 fail:
3023         /* We free all, it handles case where we are in the middle */
3024         ixgbe_free_transmit_structures(adapter);
3025         return (error);
3026 }
3027
3028 /*********************************************************************
3029  *
3030  *  Initialize a transmit ring.
3031  *
3032  **********************************************************************/
3033 static void
3034 ixgbe_setup_transmit_ring(struct tx_ring *txr)
3035 {
3036         struct adapter *adapter = txr->adapter;
3037         struct ixgbe_tx_buf *txbuf;
3038         int i;
3039 #ifdef DEV_NETMAP
3040         struct netmap_adapter *na = NA(adapter->ifp);
3041         struct netmap_slot *slot;
3042 #endif /* DEV_NETMAP */
3043
3044         /* Clear the old ring contents */
3045         IXGBE_TX_LOCK(txr);
3046 #ifdef DEV_NETMAP
3047         /*
3048          * (under lock): if in netmap mode, do some consistency
3049          * checks and set slot to entry 0 of the netmap ring.
3050          */
3051         slot = netmap_reset(na, NR_TX, txr->me, 0);
3052 #endif /* DEV_NETMAP */
3053         bzero((void *)txr->tx_base,
3054               (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
3055         /* Reset indices */
3056         txr->next_avail_desc = 0;
3057         txr->next_to_clean = 0;
3058
3059         /* Free any existing tx buffers. */
3060         txbuf = txr->tx_buffers;
3061         for (i = 0; i < txr->num_desc; i++, txbuf++) {
3062                 if (txbuf->m_head != NULL) {
3063                         bus_dmamap_sync(txr->txtag, txbuf->map,
3064                             BUS_DMASYNC_POSTWRITE);
3065                         bus_dmamap_unload(txr->txtag, txbuf->map);
3066                         m_freem(txbuf->m_head);
3067                         txbuf->m_head = NULL;
3068                 }
3069 #ifdef DEV_NETMAP
3070                 /*
3071                  * In netmap mode, set the map for the packet buffer.
3072                  * NOTE: Some drivers (not this one) also need to set
3073                  * the physical buffer address in the NIC ring.
3074                  * Slots in the netmap ring (indexed by "si") are
3075                  * kring->nkr_hwofs positions "ahead" wrt the
3076                  * corresponding slot in the NIC ring. In some drivers
3077                  * (not here) nkr_hwofs can be negative. Function
3078                  * netmap_idx_n2k() handles wraparounds properly.
3079                  */
3080                 if (slot) {
3081                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3082                         netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3083                 }
3084 #endif /* DEV_NETMAP */
3085                 /* Clear the EOP descriptor pointer */
3086                 txbuf->eop = NULL;
3087         }
3088
3089 #ifdef IXGBE_FDIR
3090         /* Set the rate at which we sample packets */
3091         if (adapter->hw.mac.type != ixgbe_mac_82598EB)
3092                 txr->atr_sample = atr_sample_rate;
3093 #endif
3094
3095         /* Set number of descriptors available */
3096         txr->tx_avail = adapter->num_tx_desc;
3097
3098         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3099             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3100         IXGBE_TX_UNLOCK(txr);
3101 }
3102
3103 /*********************************************************************
3104  *
3105  *  Initialize all transmit rings.
3106  *
3107  **********************************************************************/
3108 static int
3109 ixgbe_setup_transmit_structures(struct adapter *adapter)
3110 {
3111         struct tx_ring *txr = adapter->tx_rings;
3112
3113         for (int i = 0; i < adapter->num_queues; i++, txr++)
3114                 ixgbe_setup_transmit_ring(txr);
3115
3116         return (0);
3117 }
3118
3119 /*********************************************************************
3120  *
3121  *  Enable transmit unit.
3122  *
3123  **********************************************************************/
3124 static void
3125 ixgbe_initialize_transmit_units(struct adapter *adapter)
3126 {
3127         struct tx_ring  *txr = adapter->tx_rings;
3128         struct ixgbe_hw *hw = &adapter->hw;
3129
3130         /* Setup the Base and Length of the Tx Descriptor Ring */
3131
3132         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3133                 u64     tdba = txr->txdma.dma_paddr;
3134                 u32     txctrl;
3135
3136                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
3137                        (tdba & 0x00000000ffffffffULL));
3138                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
3139                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
3140                     adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
3141
3142                 /* Setup the HW Tx Head and Tail descriptor pointers */
3143                 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
3144                 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
3145
3146                 /* Setup Transmit Descriptor Cmd Settings */
3147                 txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
3148                 txr->queue_status = IXGBE_QUEUE_IDLE;
3149
3150                 /* Set the processing limit */
3151                 txr->process_limit = ixgbe_tx_process_limit;
3152
3153                 /* Disable Head Writeback */
3154                 switch (hw->mac.type) {
3155                 case ixgbe_mac_82598EB:
3156                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
3157                         break;
3158                 case ixgbe_mac_82599EB:
3159                 case ixgbe_mac_X540:
3160                 default:
3161                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
3162                         break;
3163                 }
3164                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3165                 switch (hw->mac.type) {
3166                 case ixgbe_mac_82598EB:
3167                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
3168                         break;
3169                 case ixgbe_mac_82599EB:
3170                 case ixgbe_mac_X540:
3171                 default:
3172                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
3173                         break;
3174                 }
3175
3176         }
3177
3178         if (hw->mac.type != ixgbe_mac_82598EB) {
3179                 u32 dmatxctl, rttdcs;
3180                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3181                 dmatxctl |= IXGBE_DMATXCTL_TE;
3182                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3183                 /* Disable arbiter to set MTQC */
3184                 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3185                 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3186                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3187                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
3188                 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3189                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3190         }
3191
3192         return;
3193 }
3194
3195 /*********************************************************************
3196  *
3197  *  Free all transmit rings.
3198  *
3199  **********************************************************************/
3200 static void
3201 ixgbe_free_transmit_structures(struct adapter *adapter)
3202 {
3203         struct tx_ring *txr = adapter->tx_rings;
3204
3205         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3206                 IXGBE_TX_LOCK(txr);
3207                 ixgbe_free_transmit_buffers(txr);
3208                 ixgbe_dma_free(adapter, &txr->txdma);
3209                 IXGBE_TX_UNLOCK(txr);
3210                 IXGBE_TX_LOCK_DESTROY(txr);
3211         }
3212         free(adapter->tx_rings, M_DEVBUF);
3213 }
3214
3215 /*********************************************************************
3216  *
3217  *  Free transmit ring related data structures.
3218  *
3219  **********************************************************************/
3220 static void
3221 ixgbe_free_transmit_buffers(struct tx_ring *txr)
3222 {
3223         struct adapter *adapter = txr->adapter;
3224         struct ixgbe_tx_buf *tx_buffer;
3225         int             i;
3226
3227         INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
3228
3229         if (txr->tx_buffers == NULL)
3230                 return;
3231
3232         tx_buffer = txr->tx_buffers;
3233         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3234                 if (tx_buffer->m_head != NULL) {
3235                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3236                             BUS_DMASYNC_POSTWRITE);
3237                         bus_dmamap_unload(txr->txtag,
3238                             tx_buffer->map);
3239                         m_freem(tx_buffer->m_head);
3240                         tx_buffer->m_head = NULL;
3241                         if (tx_buffer->map != NULL) {
3242                                 bus_dmamap_destroy(txr->txtag,
3243                                     tx_buffer->map);
3244                                 tx_buffer->map = NULL;
3245                         }
3246                 } else if (tx_buffer->map != NULL) {
3247                         bus_dmamap_unload(txr->txtag,
3248                             tx_buffer->map);
3249                         bus_dmamap_destroy(txr->txtag,
3250                             tx_buffer->map);
3251                         tx_buffer->map = NULL;
3252                 }
3253         }
3254 #ifdef IXGBE_LEGACY_TX
3255         if (txr->br != NULL)
3256                 buf_ring_free(txr->br, M_DEVBUF);
3257 #endif
3258         if (txr->tx_buffers != NULL) {
3259                 free(txr->tx_buffers, M_DEVBUF);
3260                 txr->tx_buffers = NULL;
3261         }
3262         if (txr->txtag != NULL) {
3263                 bus_dma_tag_destroy(txr->txtag);
3264                 txr->txtag = NULL;
3265         }
3266         return;
3267 }
3268
3269 /*********************************************************************
3270  *
3271  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3272  *
3273  **********************************************************************/
3274
3275 static int
3276 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3277     u32 *cmd_type_len, u32 *olinfo_status)
3278 {
3279         struct ixgbe_adv_tx_context_desc *TXD;
3280         struct ether_vlan_header *eh;
3281         struct ip *ip;
3282         struct ip6_hdr *ip6;
3283         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3284         int     ehdrlen, ip_hlen = 0;
3285         u16     etype;
3286         u8      ipproto = 0;
3287         int     offload = TRUE;
3288         int     ctxd = txr->next_avail_desc;
3289         u16     vtag = 0;
3290
3291         /* First check if TSO is to be used */
3292         if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3293                 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3294
3295         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3296                 offload = FALSE;
3297
3298         /* Indicate the whole packet as payload when not doing TSO */
3299         *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
3300
3301         /* Now ready a context descriptor */
3302         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3303
3304         /*
3305         ** In advanced descriptors the vlan tag must 
3306         ** be placed into the context descriptor. Hence
3307         ** we need to make one even if not doing offloads.
3308         */
3309         if (mp->m_flags & M_VLANTAG) {
3310                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3311                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3312         } else if (offload == FALSE) /* ... no offload to do */
3313                 return (0);
3314
3315         /*
3316          * Determine where frame payload starts.
3317          * Jump over vlan headers if already present,
3318          * helpful for QinQ too.
3319          */
3320         eh = mtod(mp, struct ether_vlan_header *);
3321         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3322                 etype = ntohs(eh->evl_proto);
3323                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3324         } else {
3325                 etype = ntohs(eh->evl_encap_proto);
3326                 ehdrlen = ETHER_HDR_LEN;
3327         }
3328
3329         /* Set the ether header length */
3330         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3331
3332         switch (etype) {
3333                 case ETHERTYPE_IP:
3334                         ip = (struct ip *)(mp->m_data + ehdrlen);
3335                         ip_hlen = ip->ip_hl << 2;
3336                         ipproto = ip->ip_p;
3337                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3338                         break;
3339                 case ETHERTYPE_IPV6:
3340                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3341                         ip_hlen = sizeof(struct ip6_hdr);
3342                         /* XXX-BZ this will go badly in case of ext hdrs. */
3343                         ipproto = ip6->ip6_nxt;
3344                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3345                         break;
3346                 default:
3347                         offload = FALSE;
3348                         break;
3349         }
3350
3351         vlan_macip_lens |= ip_hlen;
3352         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3353
3354         switch (ipproto) {
3355                 case IPPROTO_TCP:
3356                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3357                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3358                         break;
3359
3360                 case IPPROTO_UDP:
3361                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3362                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
3363                         break;
3364
3365 #if __FreeBSD_version >= 800000
3366                 case IPPROTO_SCTP:
3367                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3368                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
3369                         break;
3370 #endif
3371                 default:
3372                         offload = FALSE;
3373                         break;
3374         }
3375
3376         if (offload) /* For the TX descriptor setup */
3377                 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3378
3379         /* Now copy bits into descriptor */
3380         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3381         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3382         TXD->seqnum_seed = htole32(0);
3383         TXD->mss_l4len_idx = htole32(0);
3384
3385         /* We've consumed the first desc, adjust counters */
3386         if (++ctxd == txr->num_desc)
3387                 ctxd = 0;
3388         txr->next_avail_desc = ctxd;
3389         --txr->tx_avail;
3390
3391         return (0);
3392 }
3393
3394 /**********************************************************************
3395  *
3396  *  Setup work for hardware segmentation offload (TSO) on
3397  *  adapters using advanced tx descriptors
3398  *
3399  **********************************************************************/
3400 static int
3401 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3402     u32 *cmd_type_len, u32 *olinfo_status)
3403 {
3404         struct ixgbe_adv_tx_context_desc *TXD;
3405         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3406         u32 mss_l4len_idx = 0, paylen;
3407         u16 vtag = 0, eh_type;
3408         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3409         struct ether_vlan_header *eh;
3410 #ifdef INET6
3411         struct ip6_hdr *ip6;
3412 #endif
3413 #ifdef INET
3414         struct ip *ip;
3415 #endif
3416         struct tcphdr *th;
3417
3418
3419         /*
3420          * Determine where frame payload starts.
3421          * Jump over vlan headers if already present
3422          */
3423         eh = mtod(mp, struct ether_vlan_header *);
3424         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3425                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3426                 eh_type = eh->evl_proto;
3427         } else {
3428                 ehdrlen = ETHER_HDR_LEN;
3429                 eh_type = eh->evl_encap_proto;
3430         }
3431
3432         switch (ntohs(eh_type)) {
3433 #ifdef INET6
3434         case ETHERTYPE_IPV6:
3435                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3436                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3437                 if (ip6->ip6_nxt != IPPROTO_TCP)
3438                         return (ENXIO);
3439                 ip_hlen = sizeof(struct ip6_hdr);
3440                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3441                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3442                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3443                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3444                 break;
3445 #endif
3446 #ifdef INET
3447         case ETHERTYPE_IP:
3448                 ip = (struct ip *)(mp->m_data + ehdrlen);
3449                 if (ip->ip_p != IPPROTO_TCP)
3450                         return (ENXIO);
3451                 ip->ip_sum = 0;
3452                 ip_hlen = ip->ip_hl << 2;
3453                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3454                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3455                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3456                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3457                 /* Tell transmit desc to also do IPv4 checksum. */
3458                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3459                 break;
3460 #endif
3461         default:
3462                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3463                     __func__, ntohs(eh_type));
3464                 break;
3465         }
3466
3467         ctxd = txr->next_avail_desc;
3468         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3469
3470         tcp_hlen = th->th_off << 2;
3471
3472         /* This is used in the transmit desc in encap */
3473         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3474
3475         /* VLAN MACLEN IPLEN */
3476         if (mp->m_flags & M_VLANTAG) {
3477                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3478                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3479         }
3480
3481         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3482         vlan_macip_lens |= ip_hlen;
3483         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3484
3485         /* ADV DTYPE TUCMD */
3486         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3487         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3488         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3489
3490         /* MSS L4LEN IDX */
3491         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
3492         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
3493         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3494
3495         TXD->seqnum_seed = htole32(0);
3496
3497         if (++ctxd == txr->num_desc)
3498                 ctxd = 0;
3499
3500         txr->tx_avail--;
3501         txr->next_avail_desc = ctxd;
3502         *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
3503         *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3504         *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
3505         ++txr->tso_tx;
3506         return (0);
3507 }
3508
3509 #ifdef IXGBE_FDIR
3510 /*
3511 ** This routine parses packet headers so that Flow
3512 ** Director can make a hashed filter table entry 
3513 ** allowing traffic flows to be identified and kept
3514 ** on the same cpu.  This would be a performance
3515 ** hit, but we only do it at IXGBE_FDIR_RATE of
3516 ** packets.
3517 */
3518 static void
3519 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
3520 {
3521         struct adapter                  *adapter = txr->adapter;
3522         struct ix_queue                 *que;
3523         struct ip                       *ip;
3524         struct tcphdr                   *th;
3525         struct udphdr                   *uh;
3526         struct ether_vlan_header        *eh;
3527         union ixgbe_atr_hash_dword      input = {.dword = 0}; 
3528         union ixgbe_atr_hash_dword      common = {.dword = 0}; 
3529         int                             ehdrlen, ip_hlen;
3530         u16                             etype;
3531
3532         eh = mtod(mp, struct ether_vlan_header *);
3533         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3534                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3535                 etype = eh->evl_proto;
3536         } else {
3537                 ehdrlen = ETHER_HDR_LEN;
3538                 etype = eh->evl_encap_proto;
3539         }
3540
3541         /* Only handling IPv4 */
3542         if (etype != htons(ETHERTYPE_IP))
3543                 return;
3544
3545         ip = (struct ip *)(mp->m_data + ehdrlen);
3546         ip_hlen = ip->ip_hl << 2;
3547
3548         /* check if we're UDP or TCP */
3549         switch (ip->ip_p) {
3550         case IPPROTO_TCP:
3551                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3552                 /* src and dst are inverted */
3553                 common.port.dst ^= th->th_sport;
3554                 common.port.src ^= th->th_dport;
3555                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
3556                 break;
3557         case IPPROTO_UDP:
3558                 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
3559                 /* src and dst are inverted */
3560                 common.port.dst ^= uh->uh_sport;
3561                 common.port.src ^= uh->uh_dport;
3562                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
3563                 break;
3564         default:
3565                 return;
3566         }
3567
3568         input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
3569         if (mp->m_pkthdr.ether_vtag)
3570                 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
3571         else
3572                 common.flex_bytes ^= etype;
3573         common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
3574
3575         que = &adapter->queues[txr->me];
3576         /*
3577         ** This assumes the Rx queue and Tx
3578         ** queue are bound to the same CPU
3579         */
3580         ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
3581             input, common, que->msix);
3582 }
3583 #endif /* IXGBE_FDIR */
3584
3585 /**********************************************************************
3586  *
3587  *  Examine each tx_buffer in the used queue. If the hardware is done
3588  *  processing the packet then free associated resources. The
3589  *  tx_buffer is put back on the free queue.
3590  *
3591  **********************************************************************/
3592 static void
3593 ixgbe_txeof(struct tx_ring *txr)
3594 {
3595         struct adapter          *adapter = txr->adapter;
3596         struct ifnet            *ifp = adapter->ifp;
3597         u32                     work, processed = 0;
3598         u16                     limit = txr->process_limit;
3599         struct ixgbe_tx_buf     *buf;
3600         union ixgbe_adv_tx_desc *txd;
3601
3602         mtx_assert(&txr->tx_mtx, MA_OWNED);
3603
3604 #ifdef DEV_NETMAP
3605         if (ifp->if_capenable & IFCAP_NETMAP) {
3606                 struct netmap_adapter *na = NA(ifp);
3607                 struct netmap_kring *kring = &na->tx_rings[txr->me];
3608                 txd = txr->tx_base;
3609                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3610                     BUS_DMASYNC_POSTREAD);
3611                 /*
3612                  * In netmap mode, all the work is done in the context
3613                  * of the client thread. Interrupt handlers only wake up
3614                  * clients, which may be sleeping on individual rings
3615                  * or on a global resource for all rings.
3616                  * To implement tx interrupt mitigation, we wake up the client
3617                  * thread roughly every half ring, even if the NIC interrupts
3618                  * more frequently. This is implemented as follows:
3619                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
3620                  *   the slot that should wake up the thread (nkr_num_slots
3621                  *   means the user thread should not be woken up);
3622                  * - the driver ignores tx interrupts unless netmap_mitigate=0
3623                  *   or the slot has the DD bit set.
3624                  *
3625                  * When the driver has separate locks, we need to
3626                  * release and re-acquire txlock to avoid deadlocks.
3627                  * XXX see if we can find a better way.
3628                  */
3629                 if (!netmap_mitigate ||
3630                     (kring->nr_kflags < kring->nkr_num_slots &&
3631                     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
3632                         netmap_tx_irq(ifp, txr->me |
3633                             (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT));
3634                 }
3635                 return;
3636         }
3637 #endif /* DEV_NETMAP */
3638
3639         if (txr->tx_avail == txr->num_desc) {
3640                 txr->queue_status = IXGBE_QUEUE_IDLE;
3641                 return;
3642         }
3643
3644         /* Get work starting point */
3645         work = txr->next_to_clean;
3646         buf = &txr->tx_buffers[work];
3647         txd = &txr->tx_base[work];
3648         work -= txr->num_desc; /* The distance to ring end */
3649         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3650             BUS_DMASYNC_POSTREAD);
3651
3652         do {
3653                 union ixgbe_adv_tx_desc *eop= buf->eop;
3654                 if (eop == NULL) /* No work */
3655                         break;
3656
3657                 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
3658                         break;  /* I/O not complete */
3659
3660                 if (buf->m_head) {
3661                         txr->bytes +=
3662                             buf->m_head->m_pkthdr.len;
3663                         bus_dmamap_sync(txr->txtag,
3664                             buf->map,
3665                             BUS_DMASYNC_POSTWRITE);
3666                         bus_dmamap_unload(txr->txtag,
3667                             buf->map);
3668                         m_freem(buf->m_head);
3669                         buf->m_head = NULL;
3670                         buf->map = NULL;
3671                 }
3672                 buf->eop = NULL;
3673                 ++txr->tx_avail;
3674
3675                 /* We clean the range if multi segment */
3676                 while (txd != eop) {
3677                         ++txd;
3678                         ++buf;
3679                         ++work;
3680                         /* wrap the ring? */
3681                         if (__predict_false(!work)) {
3682                                 work -= txr->num_desc;
3683                                 buf = txr->tx_buffers;
3684                                 txd = txr->tx_base;
3685                         }
3686                         if (buf->m_head) {
3687                                 txr->bytes +=
3688                                     buf->m_head->m_pkthdr.len;
3689                                 bus_dmamap_sync(txr->txtag,
3690                                     buf->map,
3691                                     BUS_DMASYNC_POSTWRITE);
3692                                 bus_dmamap_unload(txr->txtag,
3693                                     buf->map);
3694                                 m_freem(buf->m_head);
3695                                 buf->m_head = NULL;
3696                                 buf->map = NULL;
3697                         }
3698                         ++txr->tx_avail;
3699                         buf->eop = NULL;
3700
3701                 }
3702                 ++txr->packets;
3703                 ++processed;
3704                 ++ifp->if_opackets;
3705                 txr->watchdog_time = ticks;
3706
3707                 /* Try the next packet */
3708                 ++txd;
3709                 ++buf;
3710                 ++work;
3711                 /* reset with a wrap */
3712                 if (__predict_false(!work)) {
3713                         work -= txr->num_desc;
3714                         buf = txr->tx_buffers;
3715                         txd = txr->tx_base;
3716                 }
3717                 prefetch(txd);
3718         } while (__predict_true(--limit));
3719
3720         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3721             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3722
3723         work += txr->num_desc;
3724         txr->next_to_clean = work;
3725
3726         /*
3727         ** Watchdog calculation, we know there's
3728         ** work outstanding or the first return
3729         ** would have been taken, so none processed
3730         ** for too long indicates a hang.
3731         */
3732         if ((!processed) && ((ticks - txr->watchdog_time) > IXGBE_WATCHDOG))
3733                 txr->queue_status = IXGBE_QUEUE_HUNG;
3734
3735         if (txr->tx_avail == txr->num_desc)
3736                 txr->queue_status = IXGBE_QUEUE_IDLE;
3737
3738         return;
3739 }
3740
3741 /*********************************************************************
3742  *
3743  *  Refresh mbuf buffers for RX descriptor rings
3744  *   - now keeps its own state so discards due to resource
3745  *     exhaustion are unnecessary, if an mbuf cannot be obtained
3746  *     it just returns, keeping its placeholder, thus it can simply
3747  *     be recalled to try again.
3748  *
3749  **********************************************************************/
3750 static void
3751 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
3752 {
3753         struct adapter          *adapter = rxr->adapter;
3754         bus_dma_segment_t       seg[1];
3755         struct ixgbe_rx_buf     *rxbuf;
3756         struct mbuf             *mp;
3757         int                     i, j, nsegs, error;
3758         bool                    refreshed = FALSE;
3759
3760         i = j = rxr->next_to_refresh;
3761         /* Control the loop with one beyond */
3762         if (++j == rxr->num_desc)
3763                 j = 0;
3764
3765         while (j != limit) {
3766                 rxbuf = &rxr->rx_buffers[i];
3767                 if (rxbuf->buf == NULL) {
3768                         mp = m_getjcl(M_NOWAIT, MT_DATA,
3769                             M_PKTHDR, rxr->mbuf_sz);
3770                         if (mp == NULL)
3771                                 goto update;
3772                         if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3773                                 m_adj(mp, ETHER_ALIGN);
3774                 } else
3775                         mp = rxbuf->buf;
3776
3777                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
3778
3779                 /* If we're dealing with an mbuf that was copied rather
3780                  * than replaced, there's no need to go through busdma.
3781                  */
3782                 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
3783                         /* Get the memory mapping */
3784                         error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3785                             rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
3786                         if (error != 0) {
3787                                 printf("Refresh mbufs: payload dmamap load"
3788                                     " failure - %d\n", error);
3789                                 m_free(mp);
3790                                 rxbuf->buf = NULL;
3791                                 goto update;
3792                         }
3793                         rxbuf->buf = mp;
3794                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3795                             BUS_DMASYNC_PREREAD);
3796                         rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
3797                             htole64(seg[0].ds_addr);
3798                 } else {
3799                         rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
3800                         rxbuf->flags &= ~IXGBE_RX_COPY;
3801                 }
3802
3803                 refreshed = TRUE;
3804                 /* Next is precalculated */
3805                 i = j;
3806                 rxr->next_to_refresh = i;
3807                 if (++j == rxr->num_desc)
3808                         j = 0;
3809         }
3810 update:
3811         if (refreshed) /* Update hardware tail index */
3812                 IXGBE_WRITE_REG(&adapter->hw,
3813                     IXGBE_RDT(rxr->me), rxr->next_to_refresh);
3814         return;
3815 }
3816
3817 /*********************************************************************
3818  *
3819  *  Allocate memory for rx_buffer structures. Since we use one
3820  *  rx_buffer per received packet, the maximum number of rx_buffer's
3821  *  that we'll need is equal to the number of receive descriptors
3822  *  that we've allocated.
3823  *
3824  **********************************************************************/
3825 static int
3826 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
3827 {
3828         struct  adapter         *adapter = rxr->adapter;
3829         device_t                dev = adapter->dev;
3830         struct ixgbe_rx_buf     *rxbuf;
3831         int                     i, bsize, error;
3832
3833         bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
3834         if (!(rxr->rx_buffers =
3835             (struct ixgbe_rx_buf *) malloc(bsize,
3836             M_DEVBUF, M_NOWAIT | M_ZERO))) {
3837                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3838                 error = ENOMEM;
3839                 goto fail;
3840         }
3841
3842         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),   /* parent */
3843                                    1, 0,        /* alignment, bounds */
3844                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3845                                    BUS_SPACE_MAXADDR,   /* highaddr */
3846                                    NULL, NULL,          /* filter, filterarg */
3847                                    MJUM16BYTES,         /* maxsize */
3848                                    1,                   /* nsegments */
3849                                    MJUM16BYTES,         /* maxsegsize */
3850                                    0,                   /* flags */
3851                                    NULL,                /* lockfunc */
3852                                    NULL,                /* lockfuncarg */
3853                                    &rxr->ptag))) {
3854                 device_printf(dev, "Unable to create RX DMA tag\n");
3855                 goto fail;
3856         }
3857
3858         for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
3859                 rxbuf = &rxr->rx_buffers[i];
3860                 error = bus_dmamap_create(rxr->ptag,
3861                     BUS_DMA_NOWAIT, &rxbuf->pmap);
3862                 if (error) {
3863                         device_printf(dev, "Unable to create RX dma map\n");
3864                         goto fail;
3865                 }
3866         }
3867
3868         return (0);
3869
3870 fail:
3871         /* Frees all, but can handle partial completion */
3872         ixgbe_free_receive_structures(adapter);
3873         return (error);
3874 }
3875
3876 /*
3877 ** Used to detect a descriptor that has
3878 ** been merged by Hardware RSC.
3879 */
3880 static inline u32
3881 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
3882 {
3883         return (le32toh(rx->wb.lower.lo_dword.data) &
3884             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
3885 }
3886
3887 /*********************************************************************
3888  *
3889  *  Initialize Hardware RSC (LRO) feature on 82599
3890  *  for an RX ring, this is toggled by the LRO capability
3891  *  even though it is transparent to the stack.
3892  *
3893  *  NOTE: since this HW feature only works with IPV4 and 
3894  *        our testing has shown soft LRO to be as effective
3895  *        I have decided to disable this by default.
3896  *
3897  **********************************************************************/
3898 static void
3899 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
3900 {
3901         struct  adapter         *adapter = rxr->adapter;
3902         struct  ixgbe_hw        *hw = &adapter->hw;
3903         u32                     rscctrl, rdrxctl;
3904
3905         /* If turning LRO/RSC off we need to disable it */
3906         if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
3907                 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
3908                 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
3909                 return;
3910         }
3911
3912         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
3913         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
3914 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
3915         if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
3916 #endif /* DEV_NETMAP */
3917         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
3918         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
3919         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
3920
3921         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
3922         rscctrl |= IXGBE_RSCCTL_RSCEN;
3923         /*
3924         ** Limit the total number of descriptors that
3925         ** can be combined, so it does not exceed 64K
3926         */
3927         if (rxr->mbuf_sz == MCLBYTES)
3928                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
3929         else if (rxr->mbuf_sz == MJUMPAGESIZE)
3930                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
3931         else if (rxr->mbuf_sz == MJUM9BYTES)
3932                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
3933         else  /* Using 16K cluster */
3934                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
3935
3936         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
3937
3938         /* Enable TCP header recognition */
3939         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
3940             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
3941             IXGBE_PSRTYPE_TCPHDR));
3942
3943         /* Disable RSC for ACK packets */
3944         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
3945             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
3946
3947         rxr->hw_rsc = TRUE;
3948 }
3949
3950
3951 static void     
3952 ixgbe_free_receive_ring(struct rx_ring *rxr)
3953
3954         struct ixgbe_rx_buf       *rxbuf;
3955         int i;
3956
3957         for (i = 0; i < rxr->num_desc; i++) {
3958                 rxbuf = &rxr->rx_buffers[i];
3959                 if (rxbuf->buf != NULL) {
3960                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3961                             BUS_DMASYNC_POSTREAD);
3962                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3963                         rxbuf->buf->m_flags |= M_PKTHDR;
3964                         m_freem(rxbuf->buf);
3965                         rxbuf->buf = NULL;
3966                         rxbuf->flags = 0;
3967                 }
3968         }
3969 }
3970
3971
3972 /*********************************************************************
3973  *
3974  *  Initialize a receive ring and its buffers.
3975  *
3976  **********************************************************************/
3977 static int
3978 ixgbe_setup_receive_ring(struct rx_ring *rxr)
3979 {
3980         struct  adapter         *adapter;
3981         struct ifnet            *ifp;
3982         device_t                dev;
3983         struct ixgbe_rx_buf     *rxbuf;
3984         bus_dma_segment_t       seg[1];
3985         struct lro_ctrl         *lro = &rxr->lro;
3986         int                     rsize, nsegs, error = 0;
3987 #ifdef DEV_NETMAP
3988         struct netmap_adapter *na = NA(rxr->adapter->ifp);
3989         struct netmap_slot *slot;
3990 #endif /* DEV_NETMAP */
3991
3992         adapter = rxr->adapter;
3993         ifp = adapter->ifp;
3994         dev = adapter->dev;
3995
3996         /* Clear the ring contents */
3997         IXGBE_RX_LOCK(rxr);
3998 #ifdef DEV_NETMAP
3999         /* same as in ixgbe_setup_transmit_ring() */
4000         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4001 #endif /* DEV_NETMAP */
4002         rsize = roundup2(adapter->num_rx_desc *
4003             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
4004         bzero((void *)rxr->rx_base, rsize);
4005         /* Cache the size */
4006         rxr->mbuf_sz = adapter->rx_mbuf_sz;
4007
4008         /* Free current RX buffer structs and their mbufs */
4009         ixgbe_free_receive_ring(rxr);
4010
4011         /* Now replenish the mbufs */
4012         for (int j = 0; j != rxr->num_desc; ++j) {
4013                 struct mbuf     *mp;
4014
4015                 rxbuf = &rxr->rx_buffers[j];
4016 #ifdef DEV_NETMAP
4017                 /*
4018                  * In netmap mode, fill the map and set the buffer
4019                  * address in the NIC ring, considering the offset
4020                  * between the netmap and NIC rings (see comment in
4021                  * ixgbe_setup_transmit_ring() ). No need to allocate
4022                  * an mbuf, so end the block with a continue;
4023                  */
4024                 if (slot) {
4025                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4026                         uint64_t paddr;
4027                         void *addr;
4028
4029                         addr = PNMB(slot + sj, &paddr);
4030                         netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4031                         /* Update descriptor and the cached value */
4032                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4033                         rxbuf->addr = htole64(paddr);
4034                         continue;
4035                 }
4036 #endif /* DEV_NETMAP */
4037                 rxbuf->flags = 0; 
4038                 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
4039                     M_PKTHDR, adapter->rx_mbuf_sz);
4040                 if (rxbuf->buf == NULL) {
4041                         error = ENOBUFS;
4042                         goto fail;
4043                 }
4044                 mp = rxbuf->buf;
4045                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
4046                 /* Get the memory mapping */
4047                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4048                     rxbuf->pmap, mp, seg,
4049                     &nsegs, BUS_DMA_NOWAIT);
4050                 if (error != 0)
4051                         goto fail;
4052                 bus_dmamap_sync(rxr->ptag,
4053                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4054                 /* Update the descriptor and the cached value */
4055                 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
4056                 rxbuf->addr = htole64(seg[0].ds_addr);
4057         }
4058
4059
4060         /* Setup our descriptor indices */
4061         rxr->next_to_check = 0;
4062         rxr->next_to_refresh = 0;
4063         rxr->lro_enabled = FALSE;
4064         rxr->rx_copies = 0;
4065         rxr->rx_bytes = 0;
4066         rxr->discard = FALSE;
4067         rxr->vtag_strip = FALSE;
4068
4069         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4070             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4071
4072         /*
4073         ** Now set up the LRO interface:
4074         */
4075         if (ixgbe_rsc_enable)
4076                 ixgbe_setup_hw_rsc(rxr);
4077         else if (ifp->if_capenable & IFCAP_LRO) {
4078                 int err = tcp_lro_init(lro);
4079                 if (err) {
4080                         device_printf(dev, "LRO Initialization failed!\n");
4081                         goto fail;
4082                 }
4083                 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
4084                 rxr->lro_enabled = TRUE;
4085                 lro->ifp = adapter->ifp;
4086         }
4087
4088         IXGBE_RX_UNLOCK(rxr);
4089         return (0);
4090
4091 fail:
4092         ixgbe_free_receive_ring(rxr);
4093         IXGBE_RX_UNLOCK(rxr);
4094         return (error);
4095 }
4096
4097 /*********************************************************************
4098  *
4099  *  Initialize all receive rings.
4100  *
4101  **********************************************************************/
4102 static int
4103 ixgbe_setup_receive_structures(struct adapter *adapter)
4104 {
4105         struct rx_ring *rxr = adapter->rx_rings;
4106         int j;
4107
4108         for (j = 0; j < adapter->num_queues; j++, rxr++)
4109                 if (ixgbe_setup_receive_ring(rxr))
4110                         goto fail;
4111
4112         return (0);
4113 fail:
4114         /*
4115          * Free RX buffers allocated so far, we will only handle
4116          * the rings that completed, the failing case will have
4117          * cleaned up for itself. 'j' failed, so its the terminus.
4118          */
4119         for (int i = 0; i < j; ++i) {
4120                 rxr = &adapter->rx_rings[i];
4121                 ixgbe_free_receive_ring(rxr);
4122         }
4123
4124         return (ENOBUFS);
4125 }
4126
4127 /*********************************************************************
4128  *
4129  *  Setup receive registers and features.
4130  *
4131  **********************************************************************/
4132 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
4133
4134 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
4135         
4136 static void
4137 ixgbe_initialize_receive_units(struct adapter *adapter)
4138 {
4139         struct  rx_ring *rxr = adapter->rx_rings;
4140         struct ixgbe_hw *hw = &adapter->hw;
4141         struct ifnet   *ifp = adapter->ifp;
4142         u32             bufsz, rxctrl, fctrl, srrctl, rxcsum;
4143         u32             reta, mrqc = 0, hlreg, random[10];
4144
4145
4146         /*
4147          * Make sure receives are disabled while
4148          * setting up the descriptor ring
4149          */
4150         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4151         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
4152             rxctrl & ~IXGBE_RXCTRL_RXEN);
4153
4154         /* Enable broadcasts */
4155         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4156         fctrl |= IXGBE_FCTRL_BAM;
4157         fctrl |= IXGBE_FCTRL_DPF;
4158         fctrl |= IXGBE_FCTRL_PMCF;
4159         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4160
4161         /* Set for Jumbo Frames? */
4162         hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4163         if (ifp->if_mtu > ETHERMTU)
4164                 hlreg |= IXGBE_HLREG0_JUMBOEN;
4165         else
4166                 hlreg &= ~IXGBE_HLREG0_JUMBOEN;
4167 #ifdef DEV_NETMAP
4168         /* crcstrip is conditional in netmap (in RDRXCTL too ?) */
4169         if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
4170                 hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
4171         else
4172                 hlreg |= IXGBE_HLREG0_RXCRCSTRP;
4173 #endif /* DEV_NETMAP */
4174         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
4175
4176         bufsz = (adapter->rx_mbuf_sz +
4177             BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
4178
4179         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4180                 u64 rdba = rxr->rxdma.dma_paddr;
4181
4182                 /* Setup the Base and Length of the Rx Descriptor Ring */
4183                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
4184                                (rdba & 0x00000000ffffffffULL));
4185                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
4186                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
4187                     adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4188
4189                 /* Set up the SRRCTL register */
4190                 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
4191                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4192                 srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
4193                 srrctl |= bufsz;
4194                 srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4195                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
4196
4197                 /* Setup the HW Rx Head and Tail Descriptor Pointers */
4198                 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
4199                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
4200
4201                 /* Set the processing limit */
4202                 rxr->process_limit = ixgbe_rx_process_limit;
4203         }
4204
4205         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
4206                 u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
4207                               IXGBE_PSRTYPE_UDPHDR |
4208                               IXGBE_PSRTYPE_IPV4HDR |
4209                               IXGBE_PSRTYPE_IPV6HDR;
4210                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
4211         }
4212
4213         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4214
4215         /* Setup RSS */
4216         if (adapter->num_queues > 1) {
4217                 int i, j;
4218                 reta = 0;
4219
4220                 /* set up random bits */
4221                 arc4rand(&random, sizeof(random), 0);
4222
4223                 /* Set up the redirection table */
4224                 for (i = 0, j = 0; i < 128; i++, j++) {
4225                         if (j == adapter->num_queues) j = 0;
4226                         reta = (reta << 8) | (j * 0x11);
4227                         if ((i & 3) == 3)
4228                                 IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
4229                 }
4230
4231                 /* Now fill our hash function seeds */
4232                 for (int i = 0; i < 10; i++)
4233                         IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), random[i]);
4234
4235                 /* Perform hash on these packet types */
4236                 mrqc = IXGBE_MRQC_RSSEN
4237                      | IXGBE_MRQC_RSS_FIELD_IPV4
4238                      | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
4239                      | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
4240                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
4241                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX
4242                      | IXGBE_MRQC_RSS_FIELD_IPV6
4243                      | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
4244                      | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
4245                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
4246                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4247
4248                 /* RSS and RX IPP Checksum are mutually exclusive */
4249                 rxcsum |= IXGBE_RXCSUM_PCSD;
4250         }
4251
4252         if (ifp->if_capenable & IFCAP_RXCSUM)
4253                 rxcsum |= IXGBE_RXCSUM_PCSD;
4254
4255         if (!(rxcsum & IXGBE_RXCSUM_PCSD))
4256                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4257
4258         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4259
4260         return;
4261 }
4262
4263 /*********************************************************************
4264  *
4265  *  Free all receive rings.
4266  *
4267  **********************************************************************/
4268 static void
4269 ixgbe_free_receive_structures(struct adapter *adapter)
4270 {
4271         struct rx_ring *rxr = adapter->rx_rings;
4272
4273         INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
4274
4275         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4276                 struct lro_ctrl         *lro = &rxr->lro;
4277                 ixgbe_free_receive_buffers(rxr);
4278                 /* Free LRO memory */
4279                 tcp_lro_free(lro);
4280                 /* Free the ring memory as well */
4281                 ixgbe_dma_free(adapter, &rxr->rxdma);
4282         }
4283
4284         free(adapter->rx_rings, M_DEVBUF);
4285 }
4286
4287
4288 /*********************************************************************
4289  *
4290  *  Free receive ring data structures
4291  *
4292  **********************************************************************/
4293 static void
4294 ixgbe_free_receive_buffers(struct rx_ring *rxr)
4295 {
4296         struct adapter          *adapter = rxr->adapter;
4297         struct ixgbe_rx_buf     *rxbuf;
4298
4299         INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
4300
4301         /* Cleanup any existing buffers */
4302         if (rxr->rx_buffers != NULL) {
4303                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4304                         rxbuf = &rxr->rx_buffers[i];
4305                         if (rxbuf->buf != NULL) {
4306                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4307                                     BUS_DMASYNC_POSTREAD);
4308                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4309                                 rxbuf->buf->m_flags |= M_PKTHDR;
4310                                 m_freem(rxbuf->buf);
4311                         }
4312                         rxbuf->buf = NULL;
4313                         if (rxbuf->pmap != NULL) {
4314                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4315                                 rxbuf->pmap = NULL;
4316                         }
4317                 }
4318                 if (rxr->rx_buffers != NULL) {
4319                         free(rxr->rx_buffers, M_DEVBUF);
4320                         rxr->rx_buffers = NULL;
4321                 }
4322         }
4323
4324         if (rxr->ptag != NULL) {
4325                 bus_dma_tag_destroy(rxr->ptag);
4326                 rxr->ptag = NULL;
4327         }
4328
4329         return;
4330 }
4331
4332 static __inline void
4333 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4334 {
4335                  
4336         /*
4337          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
4338          * should be computed by hardware. Also it should not have VLAN tag in
4339          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
4340          */
4341         if (rxr->lro_enabled &&
4342             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4343             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4344             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4345             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
4346             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4347             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
4348             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4349             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4350                 /*
4351                  * Send to the stack if:
4352                  **  - LRO not enabled, or
4353                  **  - no LRO resources, or
4354                  **  - lro enqueue fails
4355                  */
4356                 if (rxr->lro.lro_cnt != 0)
4357                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4358                                 return;
4359         }
4360         IXGBE_RX_UNLOCK(rxr);
4361         (*ifp->if_input)(ifp, m);
4362         IXGBE_RX_LOCK(rxr);
4363 }
4364
4365 static __inline void
4366 ixgbe_rx_discard(struct rx_ring *rxr, int i)
4367 {
4368         struct ixgbe_rx_buf     *rbuf;
4369
4370         rbuf = &rxr->rx_buffers[i];
4371
4372         if (rbuf->fmp != NULL) {/* Partial chain ? */
4373                 rbuf->fmp->m_flags |= M_PKTHDR;
4374                 m_freem(rbuf->fmp);
4375                 rbuf->fmp = NULL;
4376         }
4377
4378         /*
4379         ** With advanced descriptors the writeback
4380         ** clobbers the buffer addrs, so its easier
4381         ** to just free the existing mbufs and take
4382         ** the normal refresh path to get new buffers
4383         ** and mapping.
4384         */
4385         if (rbuf->buf) {
4386                 m_free(rbuf->buf);
4387                 rbuf->buf = NULL;
4388         }
4389
4390         rbuf->flags = 0;
4391  
4392         return;
4393 }
4394
4395
4396 /*********************************************************************
4397  *
4398  *  This routine executes in interrupt context. It replenishes
4399  *  the mbufs in the descriptor and sends data which has been
4400  *  dma'ed into host memory to upper layer.
4401  *
4402  *  We loop at most count times if count is > 0, or until done if
4403  *  count < 0.
4404  *
4405  *  Return TRUE for more work, FALSE for all clean.
4406  *********************************************************************/
4407 static bool
4408 ixgbe_rxeof(struct ix_queue *que)
4409 {
4410         struct adapter          *adapter = que->adapter;
4411         struct rx_ring          *rxr = que->rxr;
4412         struct ifnet            *ifp = adapter->ifp;
4413         struct lro_ctrl         *lro = &rxr->lro;
4414         struct lro_entry        *queued;
4415         int                     i, nextp, processed = 0;
4416         u32                     staterr = 0;
4417         u16                     count = rxr->process_limit;
4418         union ixgbe_adv_rx_desc *cur;
4419         struct ixgbe_rx_buf     *rbuf, *nbuf;
4420
4421         IXGBE_RX_LOCK(rxr);
4422
4423 #ifdef DEV_NETMAP
4424         /* Same as the txeof routine: wakeup clients on intr. */
4425         if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4426                 return (FALSE);
4427 #endif /* DEV_NETMAP */
4428
4429         for (i = rxr->next_to_check; count != 0;) {
4430                 struct mbuf     *sendmp, *mp;
4431                 u32             rsc, ptype;
4432                 u16             len;
4433                 u16             vtag = 0;
4434                 bool            eop;
4435  
4436                 /* Sync the ring. */
4437                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4438                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4439
4440                 cur = &rxr->rx_base[i];
4441                 staterr = le32toh(cur->wb.upper.status_error);
4442
4443                 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
4444                         break;
4445                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4446                         break;
4447
4448                 count--;
4449                 sendmp = NULL;
4450                 nbuf = NULL;
4451                 rsc = 0;
4452                 cur->wb.upper.status_error = 0;
4453                 rbuf = &rxr->rx_buffers[i];
4454                 mp = rbuf->buf;
4455
4456                 len = le16toh(cur->wb.upper.length);
4457                 ptype = le32toh(cur->wb.lower.lo_dword.data) &
4458                     IXGBE_RXDADV_PKTTYPE_MASK;
4459                 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
4460
4461                 /* Make sure bad packets are discarded */
4462                 if (((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) ||
4463                     (rxr->discard)) {
4464                         rxr->rx_discarded++;
4465                         if (eop)
4466                                 rxr->discard = FALSE;
4467                         else
4468                                 rxr->discard = TRUE;
4469                         ixgbe_rx_discard(rxr, i);
4470                         goto next_desc;
4471                 }
4472
4473                 /*
4474                 ** On 82599 which supports a hardware
4475                 ** LRO (called HW RSC), packets need
4476                 ** not be fragmented across sequential
4477                 ** descriptors, rather the next descriptor
4478                 ** is indicated in bits of the descriptor.
4479                 ** This also means that we might proceses
4480                 ** more than one packet at a time, something
4481                 ** that has never been true before, it
4482                 ** required eliminating global chain pointers
4483                 ** in favor of what we are doing here.  -jfv
4484                 */
4485                 if (!eop) {
4486                         /*
4487                         ** Figure out the next descriptor
4488                         ** of this frame.
4489                         */
4490                         if (rxr->hw_rsc == TRUE) {
4491                                 rsc = ixgbe_rsc_count(cur);
4492                                 rxr->rsc_num += (rsc - 1);
4493                         }
4494                         if (rsc) { /* Get hardware index */
4495                                 nextp = ((staterr &
4496                                     IXGBE_RXDADV_NEXTP_MASK) >>
4497                                     IXGBE_RXDADV_NEXTP_SHIFT);
4498                         } else { /* Just sequential */
4499                                 nextp = i + 1;
4500                                 if (nextp == adapter->num_rx_desc)
4501                                         nextp = 0;
4502                         }
4503                         nbuf = &rxr->rx_buffers[nextp];
4504                         prefetch(nbuf);
4505                 }
4506                 /*
4507                 ** Rather than using the fmp/lmp global pointers
4508                 ** we now keep the head of a packet chain in the
4509                 ** buffer struct and pass this along from one
4510                 ** descriptor to the next, until we get EOP.
4511                 */
4512                 mp->m_len = len;
4513                 /*
4514                 ** See if there is a stored head
4515                 ** that determines what we are
4516                 */
4517                 sendmp = rbuf->fmp;
4518                 if (sendmp != NULL) {  /* secondary frag */
4519                         rbuf->buf = rbuf->fmp = NULL;
4520                         mp->m_flags &= ~M_PKTHDR;
4521                         sendmp->m_pkthdr.len += mp->m_len;
4522                 } else {
4523                         /*
4524                          * Optimize.  This might be a small packet,
4525                          * maybe just a TCP ACK.  Do a fast copy that
4526                          * is cache aligned into a new mbuf, and
4527                          * leave the old mbuf+cluster for re-use.
4528                          */
4529                         if (eop && len <= IXGBE_RX_COPY_LEN) {
4530                                 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
4531                                 if (sendmp != NULL) {
4532                                         sendmp->m_data +=
4533                                             IXGBE_RX_COPY_ALIGN;
4534                                         ixgbe_bcopy(mp->m_data,
4535                                             sendmp->m_data, len);
4536                                         sendmp->m_len = len;
4537                                         rxr->rx_copies++;
4538                                         rbuf->flags |= IXGBE_RX_COPY;
4539                                 }
4540                         }
4541                         if (sendmp == NULL) {
4542                                 rbuf->buf = rbuf->fmp = NULL;
4543                                 sendmp = mp;
4544                         }
4545
4546                         /* first desc of a non-ps chain */
4547                         sendmp->m_flags |= M_PKTHDR;
4548                         sendmp->m_pkthdr.len = mp->m_len;
4549                 }
4550                 ++processed;
4551
4552                 /* Pass the head pointer on */
4553                 if (eop == 0) {
4554                         nbuf->fmp = sendmp;
4555                         sendmp = NULL;
4556                         mp->m_next = nbuf->buf;
4557                 } else { /* Sending this frame */
4558                         sendmp->m_pkthdr.rcvif = ifp;
4559                         ifp->if_ipackets++;
4560                         rxr->rx_packets++;
4561                         /* capture data for AIM */
4562                         rxr->bytes += sendmp->m_pkthdr.len;
4563                         rxr->rx_bytes += sendmp->m_pkthdr.len;
4564                         /* Process vlan info */
4565                         if ((rxr->vtag_strip) &&
4566                             (staterr & IXGBE_RXD_STAT_VP))
4567                                 vtag = le16toh(cur->wb.upper.vlan);
4568                         if (vtag) {
4569                                 sendmp->m_pkthdr.ether_vtag = vtag;
4570                                 sendmp->m_flags |= M_VLANTAG;
4571                         }
4572                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4573                                 ixgbe_rx_checksum(staterr, sendmp, ptype);
4574 #if __FreeBSD_version >= 800000
4575                         sendmp->m_pkthdr.flowid = que->msix;
4576                         sendmp->m_flags |= M_FLOWID;
4577 #endif
4578                 }
4579 next_desc:
4580                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4581                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4582
4583                 /* Advance our pointers to the next descriptor. */
4584                 if (++i == rxr->num_desc)
4585                         i = 0;
4586
4587                 /* Now send to the stack or do LRO */
4588                 if (sendmp != NULL) {
4589                         rxr->next_to_check = i;
4590                         ixgbe_rx_input(rxr, ifp, sendmp, ptype);
4591                         i = rxr->next_to_check;
4592                 }
4593
4594                /* Every 8 descriptors we go to refresh mbufs */
4595                 if (processed == 8) {
4596                         ixgbe_refresh_mbufs(rxr, i);
4597                         processed = 0;
4598                 }
4599         }
4600
4601         /* Refresh any remaining buf structs */
4602         if (ixgbe_rx_unrefreshed(rxr))
4603                 ixgbe_refresh_mbufs(rxr, i);
4604
4605         rxr->next_to_check = i;
4606
4607         /*
4608          * Flush any outstanding LRO work
4609          */
4610         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4611                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4612                 tcp_lro_flush(lro, queued);
4613         }
4614
4615         IXGBE_RX_UNLOCK(rxr);
4616
4617         /*
4618         ** Still have cleaning to do?
4619         */
4620         if ((staterr & IXGBE_RXD_STAT_DD) != 0)
4621                 return (TRUE);
4622         else
4623                 return (FALSE);
4624 }
4625
4626
4627 /*********************************************************************
4628  *
4629  *  Verify that the hardware indicated that the checksum is valid.
4630  *  Inform the stack about the status of checksum so that stack
4631  *  doesn't spend time verifying the checksum.
4632  *
4633  *********************************************************************/
4634 static void
4635 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
4636 {
4637         u16     status = (u16) staterr;
4638         u8      errors = (u8) (staterr >> 24);
4639         bool    sctp = FALSE;
4640
4641         if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4642             (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
4643                 sctp = TRUE;
4644
4645         if (status & IXGBE_RXD_STAT_IPCS) {
4646                 if (!(errors & IXGBE_RXD_ERR_IPE)) {
4647                         /* IP Checksum Good */
4648                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4649                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4650
4651                 } else
4652                         mp->m_pkthdr.csum_flags = 0;
4653         }
4654         if (status & IXGBE_RXD_STAT_L4CS) {
4655                 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4656 #if __FreeBSD_version >= 800000
4657                 if (sctp)
4658                         type = CSUM_SCTP_VALID;
4659 #endif
4660                 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
4661                         mp->m_pkthdr.csum_flags |= type;
4662                         if (!sctp)
4663                                 mp->m_pkthdr.csum_data = htons(0xffff);
4664                 } 
4665         }
4666         return;
4667 }
4668
4669
4670 /*
4671 ** This routine is run via an vlan config EVENT,
4672 ** it enables us to use the HW Filter table since
4673 ** we can get the vlan id. This just creates the
4674 ** entry in the soft version of the VFTA, init will
4675 ** repopulate the real table.
4676 */
4677 static void
4678 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4679 {
4680         struct adapter  *adapter = ifp->if_softc;
4681         u16             index, bit;
4682
4683         if (ifp->if_softc !=  arg)   /* Not our event */
4684                 return;
4685
4686         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4687                 return;
4688
4689         IXGBE_CORE_LOCK(adapter);
4690         index = (vtag >> 5) & 0x7F;
4691         bit = vtag & 0x1F;
4692         adapter->shadow_vfta[index] |= (1 << bit);
4693         ++adapter->num_vlans;
4694         ixgbe_init_locked(adapter);
4695         IXGBE_CORE_UNLOCK(adapter);
4696 }
4697
4698 /*
4699 ** This routine is run via an vlan
4700 ** unconfig EVENT, remove our entry
4701 ** in the soft vfta.
4702 */
4703 static void
4704 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4705 {
4706         struct adapter  *adapter = ifp->if_softc;
4707         u16             index, bit;
4708
4709         if (ifp->if_softc !=  arg)
4710                 return;
4711
4712         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4713                 return;
4714
4715         IXGBE_CORE_LOCK(adapter);
4716         index = (vtag >> 5) & 0x7F;
4717         bit = vtag & 0x1F;
4718         adapter->shadow_vfta[index] &= ~(1 << bit);
4719         --adapter->num_vlans;
4720         /* Re-init to load the changes */
4721         ixgbe_init_locked(adapter);
4722         IXGBE_CORE_UNLOCK(adapter);
4723 }
4724
4725 static void
4726 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
4727 {
4728         struct ifnet    *ifp = adapter->ifp;
4729         struct ixgbe_hw *hw = &adapter->hw;
4730         struct rx_ring  *rxr;
4731         u32             ctrl;
4732
4733
4734         /*
4735         ** We get here thru init_locked, meaning
4736         ** a soft reset, this has already cleared
4737         ** the VFTA and other state, so if there
4738         ** have been no vlan's registered do nothing.
4739         */
4740         if (adapter->num_vlans == 0)
4741                 return;
4742
4743         /*
4744         ** A soft reset zero's out the VFTA, so
4745         ** we need to repopulate it now.
4746         */
4747         for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
4748                 if (adapter->shadow_vfta[i] != 0)
4749                         IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
4750                             adapter->shadow_vfta[i]);
4751
4752         ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4753         /* Enable the Filter Table if enabled */
4754         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
4755                 ctrl &= ~IXGBE_VLNCTRL_CFIEN;
4756                 ctrl |= IXGBE_VLNCTRL_VFE;
4757         }
4758         if (hw->mac.type == ixgbe_mac_82598EB)
4759                 ctrl |= IXGBE_VLNCTRL_VME;
4760         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
4761
4762         /* Setup the queues for vlans */
4763         for (int i = 0; i < adapter->num_queues; i++) {
4764                 rxr = &adapter->rx_rings[i];
4765                 /* On 82599 the VLAN enable is per/queue in RXDCTL */
4766                 if (hw->mac.type != ixgbe_mac_82598EB) {
4767                         ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
4768                         ctrl |= IXGBE_RXDCTL_VME;
4769                         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
4770                 }
4771                 rxr->vtag_strip = TRUE;
4772         }
4773 }
4774
4775 static void
4776 ixgbe_enable_intr(struct adapter *adapter)
4777 {
4778         struct ixgbe_hw *hw = &adapter->hw;
4779         struct ix_queue *que = adapter->queues;
4780         u32             mask, fwsm;
4781
4782         mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
4783         /* Enable Fan Failure detection */
4784         if (hw->device_id == IXGBE_DEV_ID_82598AT)
4785                     mask |= IXGBE_EIMS_GPI_SDP1;
4786
4787         switch (adapter->hw.mac.type) {
4788                 case ixgbe_mac_82599EB:
4789                         mask |= IXGBE_EIMS_ECC;
4790                         mask |= IXGBE_EIMS_GPI_SDP0;
4791                         mask |= IXGBE_EIMS_GPI_SDP1;
4792                         mask |= IXGBE_EIMS_GPI_SDP2;
4793 #ifdef IXGBE_FDIR
4794                         mask |= IXGBE_EIMS_FLOW_DIR;
4795 #endif
4796                         break;
4797                 case ixgbe_mac_X540:
4798                         mask |= IXGBE_EIMS_ECC;
4799                         /* Detect if Thermal Sensor is enabled */
4800                         fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
4801                         if (fwsm & IXGBE_FWSM_TS_ENABLED)
4802                                 mask |= IXGBE_EIMS_TS;
4803 #ifdef IXGBE_FDIR
4804                         mask |= IXGBE_EIMS_FLOW_DIR;
4805 #endif
4806                 /* falls through */
4807                 default:
4808                         break;
4809         }
4810
4811         IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
4812
4813         /* With RSS we use auto clear */
4814         if (adapter->msix_mem) {
4815                 mask = IXGBE_EIMS_ENABLE_MASK;
4816                 /* Don't autoclear Link */
4817                 mask &= ~IXGBE_EIMS_OTHER;
4818                 mask &= ~IXGBE_EIMS_LSC;
4819                 IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
4820         }
4821
4822         /*
4823         ** Now enable all queues, this is done separately to
4824         ** allow for handling the extended (beyond 32) MSIX
4825         ** vectors that can be used by 82599
4826         */
4827         for (int i = 0; i < adapter->num_queues; i++, que++)
4828                 ixgbe_enable_queue(adapter, que->msix);
4829
4830         IXGBE_WRITE_FLUSH(hw);
4831
4832         return;
4833 }
4834
4835 static void
4836 ixgbe_disable_intr(struct adapter *adapter)
4837 {
4838         if (adapter->msix_mem)
4839                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
4840         if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
4841                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
4842         } else {
4843                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
4844                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
4845                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
4846         }
4847         IXGBE_WRITE_FLUSH(&adapter->hw);
4848         return;
4849 }
4850
4851 u16
4852 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
4853 {
4854         u16 value;
4855
4856         value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
4857             reg, 2);
4858
4859         return (value);
4860 }
4861
4862 void
4863 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
4864 {
4865         pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
4866             reg, value, 2);
4867
4868         return;
4869 }
4870
4871 /*
4872 ** Get the width and transaction speed of
4873 ** the slot this adapter is plugged into.
4874 */
4875 static void
4876 ixgbe_get_slot_info(struct ixgbe_hw *hw)
4877 {
4878         device_t                dev = ((struct ixgbe_osdep *)hw->back)->dev;
4879         struct ixgbe_mac_info   *mac = &hw->mac;
4880         u16                     link;
4881         u32                     offset;
4882
4883         /* For most devices simply call the shared code routine */
4884         if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
4885                 ixgbe_get_bus_info(hw);
4886                 goto display;
4887         }
4888
4889         /*
4890         ** For the Quad port adapter we need to parse back
4891         ** up the PCI tree to find the speed of the expansion
4892         ** slot into which this adapter is plugged. A bit more work.
4893         */
4894         dev = device_get_parent(device_get_parent(dev));
4895 #ifdef IXGBE_DEBUG
4896         device_printf(dev, "parent pcib = %x,%x,%x\n",
4897             pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
4898 #endif
4899         dev = device_get_parent(device_get_parent(dev));
4900 #ifdef IXGBE_DEBUG
4901         device_printf(dev, "slot pcib = %x,%x,%x\n",
4902             pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
4903 #endif
4904         /* Now get the PCI Express Capabilities offset */
4905         pci_find_cap(dev, PCIY_EXPRESS, &offset);
4906         /* ...and read the Link Status Register */
4907         link = pci_read_config(dev, offset + PCIER_LINK_STA, 2);
4908         switch (link & IXGBE_PCI_LINK_WIDTH) {
4909         case IXGBE_PCI_LINK_WIDTH_1:
4910                 hw->bus.width = ixgbe_bus_width_pcie_x1;
4911                 break;
4912         case IXGBE_PCI_LINK_WIDTH_2:
4913                 hw->bus.width = ixgbe_bus_width_pcie_x2;
4914                 break;
4915         case IXGBE_PCI_LINK_WIDTH_4:
4916                 hw->bus.width = ixgbe_bus_width_pcie_x4;
4917                 break;
4918         case IXGBE_PCI_LINK_WIDTH_8:
4919                 hw->bus.width = ixgbe_bus_width_pcie_x8;
4920                 break;
4921         default:
4922                 hw->bus.width = ixgbe_bus_width_unknown;
4923                 break;
4924         }
4925
4926         switch (link & IXGBE_PCI_LINK_SPEED) {
4927         case IXGBE_PCI_LINK_SPEED_2500:
4928                 hw->bus.speed = ixgbe_bus_speed_2500;
4929                 break;
4930         case IXGBE_PCI_LINK_SPEED_5000:
4931                 hw->bus.speed = ixgbe_bus_speed_5000;
4932                 break;
4933         case IXGBE_PCI_LINK_SPEED_8000:
4934                 hw->bus.speed = ixgbe_bus_speed_8000;
4935                 break;
4936         default:
4937                 hw->bus.speed = ixgbe_bus_speed_unknown;
4938                 break;
4939         }
4940
4941         mac->ops.set_lan_id(hw);
4942
4943 display:
4944         device_printf(dev,"PCI Express Bus: Speed %s %s\n",
4945             ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s":
4946             (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s":
4947             (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s":"Unknown"),
4948             (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
4949             (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
4950             (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
4951             ("Unknown"));
4952
4953         if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) &&
4954             ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
4955             (hw->bus.speed == ixgbe_bus_speed_2500))) {
4956                 device_printf(dev, "PCI-Express bandwidth available"
4957                     " for this card\n     is not sufficient for"
4958                     " optimal performance.\n");
4959                 device_printf(dev, "For optimal performance a x8 "
4960                     "PCIE, or x4 PCIE Gen2 slot is required.\n");
4961         }
4962         if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) &&
4963             ((hw->bus.width <= ixgbe_bus_width_pcie_x8) &&
4964             (hw->bus.speed < ixgbe_bus_speed_8000))) {
4965                 device_printf(dev, "PCI-Express bandwidth available"
4966                     " for this card\n     is not sufficient for"
4967                     " optimal performance.\n");
4968                 device_printf(dev, "For optimal performance a x8 "
4969                     "PCIE Gen3 slot is required.\n");
4970         }
4971
4972         return;
4973 }
4974
4975
4976 /*
4977 ** Setup the correct IVAR register for a particular MSIX interrupt
4978 **   (yes this is all very magic and confusing :)
4979 **  - entry is the register array entry
4980 **  - vector is the MSIX vector for this queue
4981 **  - type is RX/TX/MISC
4982 */
4983 static void
4984 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
4985 {
4986         struct ixgbe_hw *hw = &adapter->hw;
4987         u32 ivar, index;
4988
4989         vector |= IXGBE_IVAR_ALLOC_VAL;
4990
4991         switch (hw->mac.type) {
4992
4993         case ixgbe_mac_82598EB:
4994                 if (type == -1)
4995                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4996                 else
4997                         entry += (type * 64);
4998                 index = (entry >> 2) & 0x1F;
4999                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
5000                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
5001                 ivar |= (vector << (8 * (entry & 0x3)));
5002                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
5003                 break;
5004
5005         case ixgbe_mac_82599EB:
5006         case ixgbe_mac_X540:
5007                 if (type == -1) { /* MISC IVAR */
5008                         index = (entry & 1) * 8;
5009                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
5010                         ivar &= ~(0xFF << index);
5011                         ivar |= (vector << index);
5012                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
5013                 } else {        /* RX/TX IVARS */
5014                         index = (16 * (entry & 1)) + (8 * type);
5015                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
5016                         ivar &= ~(0xFF << index);
5017                         ivar |= (vector << index);
5018                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
5019                 }
5020
5021         default:
5022                 break;
5023         }
5024 }
5025
5026 static void
5027 ixgbe_configure_ivars(struct adapter *adapter)
5028 {
5029         struct  ix_queue *que = adapter->queues;
5030         u32 newitr;
5031
5032         if (ixgbe_max_interrupt_rate > 0)
5033                 newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
5034         else
5035                 newitr = 0;
5036
5037         for (int i = 0; i < adapter->num_queues; i++, que++) {
5038                 /* First the RX queue entry */
5039                 ixgbe_set_ivar(adapter, i, que->msix, 0);
5040                 /* ... and the TX */
5041                 ixgbe_set_ivar(adapter, i, que->msix, 1);
5042                 /* Set an Initial EITR value */
5043                 IXGBE_WRITE_REG(&adapter->hw,
5044                     IXGBE_EITR(que->msix), newitr);
5045         }
5046
5047         /* For the Link interrupt */
5048         ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
5049 }
5050
5051 /*
5052 ** ixgbe_sfp_probe - called in the local timer to
5053 ** determine if a port had optics inserted.
5054 */  
5055 static bool ixgbe_sfp_probe(struct adapter *adapter)
5056 {
5057         struct ixgbe_hw *hw = &adapter->hw;
5058         device_t        dev = adapter->dev;
5059         bool            result = FALSE;
5060
5061         if ((hw->phy.type == ixgbe_phy_nl) &&
5062             (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
5063                 s32 ret = hw->phy.ops.identify_sfp(hw);
5064                 if (ret)
5065                         goto out;
5066                 ret = hw->phy.ops.reset(hw);
5067                 if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5068                         device_printf(dev,"Unsupported SFP+ module detected!");
5069                         printf(" Reload driver with supported module.\n");
5070                         adapter->sfp_probe = FALSE;
5071                         goto out;
5072                 } else
5073                         device_printf(dev,"SFP+ module detected!\n");
5074                 /* We now have supported optics */
5075                 adapter->sfp_probe = FALSE;
5076                 /* Set the optics type so system reports correctly */
5077                 ixgbe_setup_optics(adapter);
5078                 result = TRUE;
5079         }
5080 out:
5081         return (result);
5082 }
5083
5084 /*
5085 ** Tasklet handler for MSIX Link interrupts
5086 **  - do outside interrupt since it might sleep
5087 */
5088 static void
5089 ixgbe_handle_link(void *context, int pending)
5090 {
5091         struct adapter  *adapter = context;
5092
5093         ixgbe_check_link(&adapter->hw,
5094             &adapter->link_speed, &adapter->link_up, 0);
5095         ixgbe_update_link_status(adapter);
5096 }
5097
5098 /*
5099 ** Tasklet for handling SFP module interrupts
5100 */
5101 static void
5102 ixgbe_handle_mod(void *context, int pending)
5103 {
5104         struct adapter  *adapter = context;
5105         struct ixgbe_hw *hw = &adapter->hw;
5106         device_t        dev = adapter->dev;
5107         u32 err;
5108
5109         err = hw->phy.ops.identify_sfp(hw);
5110         if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5111                 device_printf(dev,
5112                     "Unsupported SFP+ module type was detected.\n");
5113                 return;
5114         }
5115         err = hw->mac.ops.setup_sfp(hw);
5116         if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5117                 device_printf(dev,
5118                     "Setup failure - unsupported SFP+ module type.\n");
5119                 return;
5120         }
5121         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
5122         return;
5123 }
5124
5125
5126 /*
5127 ** Tasklet for handling MSF (multispeed fiber) interrupts
5128 */
5129 static void
5130 ixgbe_handle_msf(void *context, int pending)
5131 {
5132         struct adapter  *adapter = context;
5133         struct ixgbe_hw *hw = &adapter->hw;
5134         u32 autoneg;
5135         bool negotiate;
5136
5137         autoneg = hw->phy.autoneg_advertised;
5138         if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
5139                 hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
5140         if (hw->mac.ops.setup_link)
5141                 hw->mac.ops.setup_link(hw, autoneg, TRUE);
5142         return;
5143 }
5144
5145 #ifdef IXGBE_FDIR
5146 /*
5147 ** Tasklet for reinitializing the Flow Director filter table
5148 */
5149 static void
5150 ixgbe_reinit_fdir(void *context, int pending)
5151 {
5152         struct adapter  *adapter = context;
5153         struct ifnet   *ifp = adapter->ifp;
5154
5155         if (adapter->fdir_reinit != 1) /* Shouldn't happen */
5156                 return;
5157         ixgbe_reinit_fdir_tables_82599(&adapter->hw);
5158         adapter->fdir_reinit = 0;
5159         /* re-enable flow director interrupts */
5160         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
5161         /* Restart the interface */
5162         ifp->if_drv_flags |= IFF_DRV_RUNNING;
5163         return;
5164 }
5165 #endif
5166
5167 /**********************************************************************
5168  *
5169  *  Update the board statistics counters.
5170  *
5171  **********************************************************************/
5172 static void
5173 ixgbe_update_stats_counters(struct adapter *adapter)
5174 {
5175         struct ifnet   *ifp = adapter->ifp;
5176         struct ixgbe_hw *hw = &adapter->hw;
5177         u32  missed_rx = 0, bprc, lxon, lxoff, total;
5178         u64  total_missed_rx = 0;
5179
5180         adapter->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
5181         adapter->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
5182         adapter->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
5183         adapter->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
5184
5185         /*
5186         ** Note: these are for the 8 possible traffic classes,
5187         **       which in current implementation is unused,
5188         **       therefore only 0 should read real data.
5189         */
5190         for (int i = 0; i < 8; i++) {
5191                 u32 mp;
5192                 mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
5193                 /* missed_rx tallies misses for the gprc workaround */
5194                 missed_rx += mp;
5195                 /* global total per queue */
5196                 adapter->stats.mpc[i] += mp;
5197                 /* Running comprehensive total for stats display */
5198                 total_missed_rx += adapter->stats.mpc[i];
5199                 if (hw->mac.type == ixgbe_mac_82598EB) {
5200                         adapter->stats.rnbc[i] +=
5201                             IXGBE_READ_REG(hw, IXGBE_RNBC(i));
5202                         adapter->stats.qbtc[i] +=
5203                             IXGBE_READ_REG(hw, IXGBE_QBTC(i));
5204                         adapter->stats.qbrc[i] +=
5205                             IXGBE_READ_REG(hw, IXGBE_QBRC(i));
5206                         adapter->stats.pxonrxc[i] +=
5207                             IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
5208                 } else
5209                         adapter->stats.pxonrxc[i] +=
5210                             IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
5211                 adapter->stats.pxontxc[i] +=
5212                     IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
5213                 adapter->stats.pxofftxc[i] +=
5214                     IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
5215                 adapter->stats.pxoffrxc[i] +=
5216                     IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
5217                 adapter->stats.pxon2offc[i] +=
5218                     IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
5219         }
5220         for (int i = 0; i < 16; i++) {
5221                 adapter->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
5222                 adapter->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
5223                 adapter->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
5224         }
5225         adapter->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
5226         adapter->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
5227         adapter->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
5228
5229         /* Hardware workaround, gprc counts missed packets */
5230         adapter->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
5231         adapter->stats.gprc -= missed_rx;
5232
5233         if (hw->mac.type != ixgbe_mac_82598EB) {
5234                 adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
5235                     ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
5236                 adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
5237                     ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
5238                 adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
5239                     ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
5240                 adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
5241                 adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
5242         } else {
5243                 adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
5244                 adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
5245                 /* 82598 only has a counter in the high register */
5246                 adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
5247                 adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
5248                 adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
5249         }
5250
5251         /*
5252          * Workaround: mprc hardware is incorrectly counting
5253          * broadcasts, so for now we subtract those.
5254          */
5255         bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
5256         adapter->stats.bprc += bprc;
5257         adapter->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
5258         if (hw->mac.type == ixgbe_mac_82598EB)
5259                 adapter->stats.mprc -= bprc;
5260
5261         adapter->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
5262         adapter->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
5263         adapter->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
5264         adapter->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
5265         adapter->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
5266         adapter->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
5267
5268         lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
5269         adapter->stats.lxontxc += lxon;
5270         lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
5271         adapter->stats.lxofftxc += lxoff;
5272         total = lxon + lxoff;
5273
5274         adapter->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
5275         adapter->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
5276         adapter->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
5277         adapter->stats.gptc -= total;
5278         adapter->stats.mptc -= total;
5279         adapter->stats.ptc64 -= total;
5280         adapter->stats.gotc -= total * ETHER_MIN_LEN;
5281
5282         adapter->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
5283         adapter->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
5284         adapter->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
5285         adapter->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
5286         adapter->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
5287         adapter->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
5288         adapter->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
5289         adapter->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
5290         adapter->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
5291         adapter->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
5292         adapter->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
5293         adapter->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
5294         adapter->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
5295         adapter->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
5296         adapter->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
5297         adapter->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
5298         adapter->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
5299         adapter->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
5300         /* Only read FCOE on 82599 */
5301         if (hw->mac.type != ixgbe_mac_82598EB) {
5302                 adapter->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
5303                 adapter->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
5304                 adapter->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
5305                 adapter->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
5306                 adapter->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
5307         }
5308
5309         /* Fill out the OS statistics structure */
5310         ifp->if_ipackets = adapter->stats.gprc;
5311         ifp->if_opackets = adapter->stats.gptc;
5312         ifp->if_ibytes = adapter->stats.gorc;
5313         ifp->if_obytes = adapter->stats.gotc;
5314         ifp->if_imcasts = adapter->stats.mprc;
5315         ifp->if_omcasts = adapter->stats.mptc;
5316         ifp->if_collisions = 0;
5317
5318         /* Rx Errors */
5319         ifp->if_iqdrops = total_missed_rx;
5320         ifp->if_ierrors = adapter->stats.crcerrs + adapter->stats.rlec;
5321 }
5322
5323 /** ixgbe_sysctl_tdh_handler - Handler function
5324  *  Retrieves the TDH value from the hardware
5325  */
5326 static int 
5327 ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS)
5328 {
5329         int error;
5330
5331         struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
5332         if (!txr) return 0;
5333
5334         unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
5335         error = sysctl_handle_int(oidp, &val, 0, req);
5336         if (error || !req->newptr)
5337                 return error;
5338         return 0;
5339 }
5340
5341 /** ixgbe_sysctl_tdt_handler - Handler function
5342  *  Retrieves the TDT value from the hardware
5343  */
5344 static int 
5345 ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS)
5346 {
5347         int error;
5348
5349         struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
5350         if (!txr) return 0;
5351
5352         unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
5353         error = sysctl_handle_int(oidp, &val, 0, req);
5354         if (error || !req->newptr)
5355                 return error;
5356         return 0;
5357 }
5358
5359 /** ixgbe_sysctl_rdh_handler - Handler function
5360  *  Retrieves the RDH value from the hardware
5361  */
5362 static int 
5363 ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS)
5364 {
5365         int error;
5366
5367         struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
5368         if (!rxr) return 0;
5369
5370         unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
5371         error = sysctl_handle_int(oidp, &val, 0, req);
5372         if (error || !req->newptr)
5373                 return error;
5374         return 0;
5375 }
5376
5377 /** ixgbe_sysctl_rdt_handler - Handler function
5378  *  Retrieves the RDT value from the hardware
5379  */
5380 static int 
5381 ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS)
5382 {
5383         int error;
5384
5385         struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
5386         if (!rxr) return 0;
5387
5388         unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
5389         error = sysctl_handle_int(oidp, &val, 0, req);
5390         if (error || !req->newptr)
5391                 return error;
5392         return 0;
5393 }
5394
5395 static int
5396 ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5397 {
5398         int error;
5399         struct ix_queue *que = ((struct ix_queue *)oidp->oid_arg1);
5400         unsigned int reg, usec, rate;
5401
5402         reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
5403         usec = ((reg & 0x0FF8) >> 3);
5404         if (usec > 0)
5405                 rate = 500000 / usec;
5406         else
5407                 rate = 0;
5408         error = sysctl_handle_int(oidp, &rate, 0, req);
5409         if (error || !req->newptr)
5410                 return error;
5411         reg &= ~0xfff; /* default, no limitation */
5412         ixgbe_max_interrupt_rate = 0;
5413         if (rate > 0 && rate < 500000) {
5414                 if (rate < 1000)
5415                         rate = 1000;
5416                 ixgbe_max_interrupt_rate = rate;
5417                 reg |= ((4000000/rate) & 0xff8 );
5418         }
5419         IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
5420         return 0;
5421 }
5422
5423 /*
5424  * Add sysctl variables, one per statistic, to the system.
5425  */
5426 static void
5427 ixgbe_add_hw_stats(struct adapter *adapter)
5428 {
5429
5430         device_t dev = adapter->dev;
5431
5432         struct tx_ring *txr = adapter->tx_rings;
5433         struct rx_ring *rxr = adapter->rx_rings;
5434
5435         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5436         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5437         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5438         struct ixgbe_hw_stats *stats = &adapter->stats;
5439
5440         struct sysctl_oid *stat_node, *queue_node;
5441         struct sysctl_oid_list *stat_list, *queue_list;
5442
5443 #define QUEUE_NAME_LEN 32
5444         char namebuf[QUEUE_NAME_LEN];
5445
5446         /* Driver Statistics */
5447         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5448                         CTLFLAG_RD, &adapter->dropped_pkts,
5449                         "Driver dropped packets");
5450         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_failed",
5451                         CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5452                         "m_defrag() failed");
5453         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events",
5454                         CTLFLAG_RD, &adapter->watchdog_events,
5455                         "Watchdog timeouts");
5456         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5457                         CTLFLAG_RD, &adapter->link_irq,
5458                         "Link MSIX IRQ Handled");
5459
5460         for (int i = 0; i < adapter->num_queues; i++, txr++) {
5461                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5462                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5463                                             CTLFLAG_RD, NULL, "Queue Name");
5464                 queue_list = SYSCTL_CHILDREN(queue_node);
5465
5466                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5467                                 CTLTYPE_UINT | CTLFLAG_RW, &adapter->queues[i],
5468                                 sizeof(&adapter->queues[i]),
5469                                 ixgbe_sysctl_interrupt_rate_handler, "IU",
5470                                 "Interrupt Rate");
5471                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs",
5472                                 CTLFLAG_RD, &(adapter->queues[i].irqs),
5473                                 "irqs on this queue");
5474                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5475                                 CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
5476                                 ixgbe_sysctl_tdh_handler, "IU",
5477                                 "Transmit Descriptor Head");
5478                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5479                                 CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
5480                                 ixgbe_sysctl_tdt_handler, "IU",
5481                                 "Transmit Descriptor Tail");
5482                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tso_tx",
5483                                 CTLFLAG_RD, &txr->tso_tx,
5484                                 "TSO");
5485                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_tx_dma_setup",
5486                                 CTLFLAG_RD, &txr->no_tx_dma_setup,
5487                                 "Driver tx dma failure in xmit");
5488                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5489                                 CTLFLAG_RD, &txr->no_desc_avail,
5490                                 "Queue No Descriptor Available");
5491                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5492                                 CTLFLAG_RD, &txr->total_packets,
5493                                 "Queue Packets Transmitted");
5494         }
5495
5496         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
5497                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5498                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
5499                                             CTLFLAG_RD, NULL, "Queue Name");
5500                 queue_list = SYSCTL_CHILDREN(queue_node);
5501
5502                 struct lro_ctrl *lro = &rxr->lro;
5503
5504                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5505                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
5506                                             CTLFLAG_RD, NULL, "Queue Name");
5507                 queue_list = SYSCTL_CHILDREN(queue_node);
5508
5509                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5510                                 CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
5511                                 ixgbe_sysctl_rdh_handler, "IU",
5512                                 "Receive Descriptor Head");
5513                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5514                                 CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
5515                                 ixgbe_sysctl_rdt_handler, "IU",
5516                                 "Receive Descriptor Tail");
5517                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5518                                 CTLFLAG_RD, &rxr->rx_packets,
5519                                 "Queue Packets Received");
5520                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5521                                 CTLFLAG_RD, &rxr->rx_bytes,
5522                                 "Queue Bytes Received");
5523                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_copies",
5524                                 CTLFLAG_RD, &rxr->rx_copies,
5525                                 "Copied RX Frames");
5526                 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
5527                                 CTLFLAG_RD, &lro->lro_queued, 0,
5528                                 "LRO Queued");
5529                 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
5530                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5531                                 "LRO Flushed");
5532         }
5533
5534         /* MAC stats get the own sub node */
5535
5536         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5537                                     CTLFLAG_RD, NULL, "MAC Statistics");
5538         stat_list = SYSCTL_CHILDREN(stat_node);
5539
5540         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5541                         CTLFLAG_RD, &stats->crcerrs,
5542                         "CRC Errors");
5543         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
5544                         CTLFLAG_RD, &stats->illerrc,
5545                         "Illegal Byte Errors");
5546         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
5547                         CTLFLAG_RD, &stats->errbc,
5548                         "Byte Errors");
5549         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
5550                         CTLFLAG_RD, &stats->mspdc,
5551                         "MAC Short Packets Discarded");
5552         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
5553                         CTLFLAG_RD, &stats->mlfc,
5554                         "MAC Local Faults");
5555         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
5556                         CTLFLAG_RD, &stats->mrfc,
5557                         "MAC Remote Faults");
5558         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
5559                         CTLFLAG_RD, &stats->rlec,
5560                         "Receive Length Errors");
5561
5562         /* Flow Control stats */
5563         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5564                         CTLFLAG_RD, &stats->lxontxc,
5565                         "Link XON Transmitted");
5566         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5567                         CTLFLAG_RD, &stats->lxonrxc,
5568                         "Link XON Received");
5569         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5570                         CTLFLAG_RD, &stats->lxofftxc,
5571                         "Link XOFF Transmitted");
5572         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5573                         CTLFLAG_RD, &stats->lxoffrxc,
5574                         "Link XOFF Received");
5575
5576         /* Packet Reception Stats */
5577         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
5578                         CTLFLAG_RD, &stats->tor, 
5579                         "Total Octets Received"); 
5580         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
5581                         CTLFLAG_RD, &stats->gorc, 
5582                         "Good Octets Received"); 
5583         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
5584                         CTLFLAG_RD, &stats->tpr,
5585                         "Total Packets Received");
5586         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
5587                         CTLFLAG_RD, &stats->gprc,
5588                         "Good Packets Received");
5589         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
5590                         CTLFLAG_RD, &stats->mprc,
5591                         "Multicast Packets Received");
5592         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
5593                         CTLFLAG_RD, &stats->bprc,
5594                         "Broadcast Packets Received");
5595         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5596                         CTLFLAG_RD, &stats->prc64,
5597                         "64 byte frames received ");
5598         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5599                         CTLFLAG_RD, &stats->prc127,
5600                         "65-127 byte frames received");
5601         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5602                         CTLFLAG_RD, &stats->prc255,
5603                         "128-255 byte frames received");
5604         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5605                         CTLFLAG_RD, &stats->prc511,
5606                         "256-511 byte frames received");
5607         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5608                         CTLFLAG_RD, &stats->prc1023,
5609                         "512-1023 byte frames received");
5610         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5611                         CTLFLAG_RD, &stats->prc1522,
5612                         "1023-1522 byte frames received");
5613         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
5614                         CTLFLAG_RD, &stats->ruc,
5615                         "Receive Undersized");
5616         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5617                         CTLFLAG_RD, &stats->rfc,
5618                         "Fragmented Packets Received ");
5619         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
5620                         CTLFLAG_RD, &stats->roc,
5621                         "Oversized Packets Received");
5622         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
5623                         CTLFLAG_RD, &stats->rjc,
5624                         "Received Jabber");
5625         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
5626                         CTLFLAG_RD, &stats->mngprc,
5627                         "Management Packets Received");
5628         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
5629                         CTLFLAG_RD, &stats->mngptc,
5630                         "Management Packets Dropped");
5631         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
5632                         CTLFLAG_RD, &stats->xec,
5633                         "Checksum Errors");
5634
5635         /* Packet Transmission Stats */
5636         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5637                         CTLFLAG_RD, &stats->gotc, 
5638                         "Good Octets Transmitted"); 
5639         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5640                         CTLFLAG_RD, &stats->tpt,
5641                         "Total Packets Transmitted");
5642         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5643                         CTLFLAG_RD, &stats->gptc,
5644                         "Good Packets Transmitted");
5645         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5646                         CTLFLAG_RD, &stats->bptc,
5647                         "Broadcast Packets Transmitted");
5648         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5649                         CTLFLAG_RD, &stats->mptc,
5650                         "Multicast Packets Transmitted");
5651         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
5652                         CTLFLAG_RD, &stats->mngptc,
5653                         "Management Packets Transmitted");
5654         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5655                         CTLFLAG_RD, &stats->ptc64,
5656                         "64 byte frames transmitted ");
5657         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5658                         CTLFLAG_RD, &stats->ptc127,
5659                         "65-127 byte frames transmitted");
5660         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5661                         CTLFLAG_RD, &stats->ptc255,
5662                         "128-255 byte frames transmitted");
5663         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5664                         CTLFLAG_RD, &stats->ptc511,
5665                         "256-511 byte frames transmitted");
5666         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5667                         CTLFLAG_RD, &stats->ptc1023,
5668                         "512-1023 byte frames transmitted");
5669         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5670                         CTLFLAG_RD, &stats->ptc1522,
5671                         "1024-1522 byte frames transmitted");
5672 }
5673
5674 /*
5675 ** Set flow control using sysctl:
5676 ** Flow control values:
5677 **      0 - off
5678 **      1 - rx pause
5679 **      2 - tx pause
5680 **      3 - full
5681 */
5682 static int
5683 ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS)
5684 {
5685         int error, last;
5686         struct adapter *adapter = (struct adapter *) arg1;
5687
5688         last = adapter->fc;
5689         error = sysctl_handle_int(oidp, &adapter->fc, 0, req);
5690         if ((error) || (req->newptr == NULL))
5691                 return (error);
5692
5693         /* Don't bother if it's not changed */
5694         if (adapter->fc == last)
5695                 return (0);
5696
5697         switch (adapter->fc) {
5698                 case ixgbe_fc_rx_pause:
5699                 case ixgbe_fc_tx_pause:
5700                 case ixgbe_fc_full:
5701                         adapter->hw.fc.requested_mode = adapter->fc;
5702                         if (adapter->num_queues > 1)
5703                                 ixgbe_disable_rx_drop(adapter);
5704                         break;
5705                 case ixgbe_fc_none:
5706                         adapter->hw.fc.requested_mode = ixgbe_fc_none;
5707                         if (adapter->num_queues > 1)
5708                                 ixgbe_enable_rx_drop(adapter);
5709                         break;
5710                 default:
5711                         adapter->fc = last;
5712                         return (EINVAL);
5713         }
5714         /* Don't autoneg if forcing a value */
5715         adapter->hw.fc.disable_fc_autoneg = TRUE;
5716         ixgbe_fc_enable(&adapter->hw);
5717         return error;
5718 }
5719
5720 /*
5721 ** Control link advertise speed:
5722 **      1 - advertise only 1G
5723 **      2 - advertise 100Mb
5724 **      3 - advertise normal
5725 */
5726 static int
5727 ixgbe_set_advertise(SYSCTL_HANDLER_ARGS)
5728 {
5729         int                     error = 0;
5730         struct adapter          *adapter;
5731         device_t                dev;
5732         struct ixgbe_hw         *hw;
5733         ixgbe_link_speed        speed, last;
5734
5735         adapter = (struct adapter *) arg1;
5736         dev = adapter->dev;
5737         hw = &adapter->hw;
5738         last = adapter->advertise;
5739
5740         error = sysctl_handle_int(oidp, &adapter->advertise, 0, req);
5741         if ((error) || (req->newptr == NULL))
5742                 return (error);
5743
5744         if (adapter->advertise == last) /* no change */
5745                 return (0);
5746
5747         if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
5748             (hw->phy.multispeed_fiber)))
5749                 return (EINVAL);
5750
5751         if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
5752                 device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
5753                 return (EINVAL);
5754         }
5755
5756         if (adapter->advertise == 1)
5757                 speed = IXGBE_LINK_SPEED_1GB_FULL;
5758         else if (adapter->advertise == 2)
5759                 speed = IXGBE_LINK_SPEED_100_FULL;
5760         else if (adapter->advertise == 3)
5761                 speed = IXGBE_LINK_SPEED_1GB_FULL |
5762                         IXGBE_LINK_SPEED_10GB_FULL;
5763         else {  /* bogus value */
5764                 adapter->advertise = last;
5765                 return (EINVAL);
5766         }
5767
5768         hw->mac.autotry_restart = TRUE;
5769         hw->mac.ops.setup_link(hw, speed, TRUE);
5770
5771         return (error);
5772 }
5773
5774 /*
5775 ** Thermal Shutdown Trigger
5776 **   - cause a Thermal Overtemp IRQ
5777 **   - this now requires firmware enabling
5778 */
5779 static int
5780 ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS)
5781 {
5782         int             error, fire = 0;
5783         struct adapter  *adapter = (struct adapter *) arg1;
5784         struct ixgbe_hw *hw = &adapter->hw;
5785
5786
5787         if (hw->mac.type != ixgbe_mac_X540)
5788                 return (0);
5789
5790         error = sysctl_handle_int(oidp, &fire, 0, req);
5791         if ((error) || (req->newptr == NULL))
5792                 return (error);
5793
5794         if (fire) {
5795                 u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
5796                 reg |= IXGBE_EICR_TS;
5797                 IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
5798         }
5799
5800         return (0);
5801 }
5802
5803 /*
5804 ** Enable the hardware to drop packets when the buffer is
5805 ** full. This is useful when multiqueue,so that no single
5806 ** queue being full stalls the entire RX engine. We only
5807 ** enable this when Multiqueue AND when Flow Control is 
5808 ** disabled.
5809 */
5810 static void
5811 ixgbe_enable_rx_drop(struct adapter *adapter)
5812 {
5813         struct ixgbe_hw *hw = &adapter->hw;
5814
5815         for (int i = 0; i < adapter->num_queues; i++) {
5816                 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
5817                 srrctl |= IXGBE_SRRCTL_DROP_EN;
5818                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
5819         }
5820 }
5821
5822 static void
5823 ixgbe_disable_rx_drop(struct adapter *adapter)
5824 {
5825         struct ixgbe_hw *hw = &adapter->hw;
5826
5827         for (int i = 0; i < adapter->num_queues; i++) {
5828                 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
5829                 srrctl &= ~IXGBE_SRRCTL_DROP_EN;
5830                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
5831         }
5832 }