]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/ixgbe/ixgbe.c
MFC r275358 r275483 r276982 - Removing M_FLOWID by hps@
[FreeBSD/stable/10.git] / sys / dev / ixgbe / ixgbe.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2013, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38 #include "ixgbe.h"
39
40 /*********************************************************************
41  *  Set this to one to display debug statistics
42  *********************************************************************/
43 int             ixgbe_display_debug_stats = 0;
44
45 /*********************************************************************
46  *  Driver version
47  *********************************************************************/
48 char ixgbe_driver_version[] = "2.5.15";
49
50 /*********************************************************************
51  *  PCI Device ID Table
52  *
53  *  Used by probe to select devices to load on
54  *  Last field stores an index into ixgbe_strings
55  *  Last entry must be all 0s
56  *
57  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
58  *********************************************************************/
59
60 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
61 {
62         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
63         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
64         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
65         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
66         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
67         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
68         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
69         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
70         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
71         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
72         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
73         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
74         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
75         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
76         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
77         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
78         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
79         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
80         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
81         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
82         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
83         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
84         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0},
85         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
86         /* required last entry */
87         {0, 0, 0, 0, 0}
88 };
89
90 /*********************************************************************
91  *  Table of branding strings
92  *********************************************************************/
93
94 static char    *ixgbe_strings[] = {
95         "Intel(R) PRO/10GbE PCI-Express Network Driver"
96 };
97
98 /*********************************************************************
99  *  Function prototypes
100  *********************************************************************/
101 static int      ixgbe_probe(device_t);
102 static int      ixgbe_attach(device_t);
103 static int      ixgbe_detach(device_t);
104 static int      ixgbe_shutdown(device_t);
105 #ifdef IXGBE_LEGACY_TX
106 static void     ixgbe_start(struct ifnet *);
107 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
108 #else /* ! IXGBE_LEGACY_TX */
109 static int      ixgbe_mq_start(struct ifnet *, struct mbuf *);
110 static int      ixgbe_mq_start_locked(struct ifnet *, struct tx_ring *);
111 static void     ixgbe_qflush(struct ifnet *);
112 static void     ixgbe_deferred_mq_start(void *, int);
113 #endif /* IXGBE_LEGACY_TX */
114 static int      ixgbe_ioctl(struct ifnet *, u_long, caddr_t);
115 static void     ixgbe_init(void *);
116 static void     ixgbe_init_locked(struct adapter *);
117 static void     ixgbe_stop(void *);
118 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
119 static int      ixgbe_media_change(struct ifnet *);
120 static void     ixgbe_identify_hardware(struct adapter *);
121 static int      ixgbe_allocate_pci_resources(struct adapter *);
122 static void     ixgbe_get_slot_info(struct ixgbe_hw *);
123 static int      ixgbe_allocate_msix(struct adapter *);
124 static int      ixgbe_allocate_legacy(struct adapter *);
125 static int      ixgbe_allocate_queues(struct adapter *);
126 static int      ixgbe_setup_msix(struct adapter *);
127 static void     ixgbe_free_pci_resources(struct adapter *);
128 static void     ixgbe_local_timer(void *);
129 static int      ixgbe_setup_interface(device_t, struct adapter *);
130 static void     ixgbe_config_link(struct adapter *);
131
132 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
133 static int      ixgbe_setup_transmit_structures(struct adapter *);
134 static void     ixgbe_setup_transmit_ring(struct tx_ring *);
135 static void     ixgbe_initialize_transmit_units(struct adapter *);
136 static void     ixgbe_free_transmit_structures(struct adapter *);
137 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
138
139 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
140 static int      ixgbe_setup_receive_structures(struct adapter *);
141 static int      ixgbe_setup_receive_ring(struct rx_ring *);
142 static void     ixgbe_initialize_receive_units(struct adapter *);
143 static void     ixgbe_free_receive_structures(struct adapter *);
144 static void     ixgbe_free_receive_buffers(struct rx_ring *);
145 static void     ixgbe_setup_hw_rsc(struct rx_ring *);
146
147 static void     ixgbe_enable_intr(struct adapter *);
148 static void     ixgbe_disable_intr(struct adapter *);
149 static void     ixgbe_update_stats_counters(struct adapter *);
150 static void     ixgbe_txeof(struct tx_ring *);
151 static bool     ixgbe_rxeof(struct ix_queue *);
152 static void     ixgbe_rx_checksum(u32, struct mbuf *, u32);
153 static void     ixgbe_set_promisc(struct adapter *);
154 static void     ixgbe_set_multi(struct adapter *);
155 static void     ixgbe_update_link_status(struct adapter *);
156 static void     ixgbe_refresh_mbufs(struct rx_ring *, int);
157 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
158 static int      ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS);
159 static int      ixgbe_set_advertise(SYSCTL_HANDLER_ARGS);
160 static int      ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS);
161 static int      ixgbe_dma_malloc(struct adapter *, bus_size_t,
162                     struct ixgbe_dma_alloc *, int);
163 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
164 static int      ixgbe_tx_ctx_setup(struct tx_ring *,
165                     struct mbuf *, u32 *, u32 *);
166 static int      ixgbe_tso_setup(struct tx_ring *,
167                     struct mbuf *, u32 *, u32 *);
168 static void     ixgbe_set_ivar(struct adapter *, u8, u8, s8);
169 static void     ixgbe_configure_ivars(struct adapter *);
170 static u8 *     ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
171
172 static void     ixgbe_setup_vlan_hw_support(struct adapter *);
173 static void     ixgbe_register_vlan(void *, struct ifnet *, u16);
174 static void     ixgbe_unregister_vlan(void *, struct ifnet *, u16);
175
176 static void     ixgbe_add_hw_stats(struct adapter *adapter);
177
178 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
179 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
180                     struct mbuf *, u32);
181
182 static void     ixgbe_enable_rx_drop(struct adapter *);
183 static void     ixgbe_disable_rx_drop(struct adapter *);
184
185 /* Support for pluggable optic modules */
186 static bool     ixgbe_sfp_probe(struct adapter *);
187 static void     ixgbe_setup_optics(struct adapter *);
188
189 /* Legacy (single vector interrupt handler */
190 static void     ixgbe_legacy_irq(void *);
191
192 /* The MSI/X Interrupt handlers */
193 static void     ixgbe_msix_que(void *);
194 static void     ixgbe_msix_link(void *);
195
196 /* Deferred interrupt tasklets */
197 static void     ixgbe_handle_que(void *, int);
198 static void     ixgbe_handle_link(void *, int);
199 static void     ixgbe_handle_msf(void *, int);
200 static void     ixgbe_handle_mod(void *, int);
201
202 #ifdef IXGBE_FDIR
203 static void     ixgbe_atr(struct tx_ring *, struct mbuf *);
204 static void     ixgbe_reinit_fdir(void *, int);
205 #endif
206
207 /* Missing shared code prototype */
208 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw);
209
210 /*********************************************************************
211  *  FreeBSD Device Interface Entry Points
212  *********************************************************************/
213
214 static device_method_t ixgbe_methods[] = {
215         /* Device interface */
216         DEVMETHOD(device_probe, ixgbe_probe),
217         DEVMETHOD(device_attach, ixgbe_attach),
218         DEVMETHOD(device_detach, ixgbe_detach),
219         DEVMETHOD(device_shutdown, ixgbe_shutdown),
220         DEVMETHOD_END
221 };
222
223 static driver_t ixgbe_driver = {
224         "ix", ixgbe_methods, sizeof(struct adapter),
225 };
226
227 devclass_t ixgbe_devclass;
228 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
229
230 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
231 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
232
233 /*
234 ** TUNEABLE PARAMETERS:
235 */
236
237 static SYSCTL_NODE(_hw, OID_AUTO, ix, CTLFLAG_RD, 0,
238                    "IXGBE driver parameters");
239
240 /*
241 ** AIM: Adaptive Interrupt Moderation
242 ** which means that the interrupt rate
243 ** is varied over time based on the
244 ** traffic for that interrupt vector
245 */
246 static int ixgbe_enable_aim = TRUE;
247 TUNABLE_INT("hw.ix.enable_aim", &ixgbe_enable_aim);
248 SYSCTL_INT(_hw_ix, OID_AUTO, enable_aim, CTLFLAG_RW, &ixgbe_enable_aim, 0,
249     "Enable adaptive interrupt moderation");
250
251 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
252 TUNABLE_INT("hw.ix.max_interrupt_rate", &ixgbe_max_interrupt_rate);
253 SYSCTL_INT(_hw_ix, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
254     &ixgbe_max_interrupt_rate, 0, "Maximum interrupts per second");
255
256 /* How many packets rxeof tries to clean at a time */
257 static int ixgbe_rx_process_limit = 256;
258 TUNABLE_INT("hw.ix.rx_process_limit", &ixgbe_rx_process_limit);
259 SYSCTL_INT(_hw_ix, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
260     &ixgbe_rx_process_limit, 0,
261     "Maximum number of received packets to process at a time,"
262     "-1 means unlimited");
263
264 /* How many packets txeof tries to clean at a time */
265 static int ixgbe_tx_process_limit = 256;
266 TUNABLE_INT("hw.ix.tx_process_limit", &ixgbe_tx_process_limit);
267 SYSCTL_INT(_hw_ix, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
268     &ixgbe_tx_process_limit, 0,
269     "Maximum number of sent packets to process at a time,"
270     "-1 means unlimited");
271
272 /*
273 ** Smart speed setting, default to on
274 ** this only works as a compile option
275 ** right now as its during attach, set
276 ** this to 'ixgbe_smart_speed_off' to
277 ** disable.
278 */
279 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
280
281 /*
282  * MSIX should be the default for best performance,
283  * but this allows it to be forced off for testing.
284  */
285 static int ixgbe_enable_msix = 1;
286 TUNABLE_INT("hw.ix.enable_msix", &ixgbe_enable_msix);
287 SYSCTL_INT(_hw_ix, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixgbe_enable_msix, 0,
288     "Enable MSI-X interrupts");
289
290 /*
291  * Number of Queues, can be set to 0,
292  * it then autoconfigures based on the
293  * number of cpus with a max of 8. This
294  * can be overriden manually here.
295  */
296 static int ixgbe_num_queues = 0;
297 TUNABLE_INT("hw.ix.num_queues", &ixgbe_num_queues);
298 SYSCTL_INT(_hw_ix, OID_AUTO, num_queues, CTLFLAG_RDTUN, &ixgbe_num_queues, 0,
299     "Number of queues to configure, 0 indicates autoconfigure");
300
301 /*
302 ** Number of TX descriptors per ring,
303 ** setting higher than RX as this seems
304 ** the better performing choice.
305 */
306 static int ixgbe_txd = PERFORM_TXD;
307 TUNABLE_INT("hw.ix.txd", &ixgbe_txd);
308 SYSCTL_INT(_hw_ix, OID_AUTO, txd, CTLFLAG_RDTUN, &ixgbe_txd, 0,
309     "Number of transmit descriptors per queue");
310
311 /* Number of RX descriptors per ring */
312 static int ixgbe_rxd = PERFORM_RXD;
313 TUNABLE_INT("hw.ix.rxd", &ixgbe_rxd);
314 SYSCTL_INT(_hw_ix, OID_AUTO, rxd, CTLFLAG_RDTUN, &ixgbe_rxd, 0,
315     "Number of receive descriptors per queue");
316
317 /*
318 ** Defining this on will allow the use
319 ** of unsupported SFP+ modules, note that
320 ** doing so you are on your own :)
321 */
322 static int allow_unsupported_sfp = FALSE;
323 TUNABLE_INT("hw.ix.unsupported_sfp", &allow_unsupported_sfp);
324
325 /*
326 ** HW RSC control: 
327 **  this feature only works with
328 **  IPv4, and only on 82599 and later.
329 **  Also this will cause IP forwarding to
330 **  fail and that can't be controlled by
331 **  the stack as LRO can. For all these
332 **  reasons I've deemed it best to leave
333 **  this off and not bother with a tuneable
334 **  interface, this would need to be compiled
335 **  to enable.
336 */
337 static bool ixgbe_rsc_enable = FALSE;
338
339 /* Keep running tab on them for sanity check */
340 static int ixgbe_total_ports;
341
342 #ifdef IXGBE_FDIR
343 /*
344 ** For Flow Director: this is the
345 ** number of TX packets we sample
346 ** for the filter pool, this means
347 ** every 20th packet will be probed.
348 **
349 ** This feature can be disabled by 
350 ** setting this to 0.
351 */
352 static int atr_sample_rate = 20;
353 /* 
354 ** Flow Director actually 'steals'
355 ** part of the packet buffer as its
356 ** filter pool, this variable controls
357 ** how much it uses:
358 **  0 = 64K, 1 = 128K, 2 = 256K
359 */
360 static int fdir_pballoc = 1;
361 #endif
362
363 #ifdef DEV_NETMAP
364 /*
365  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
366  * be a reference on how to implement netmap support in a driver.
367  * Additional comments are in ixgbe_netmap.h .
368  *
369  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
370  * that extend the standard driver.
371  */
372 #include <dev/netmap/ixgbe_netmap.h>
373 #endif /* DEV_NETMAP */
374
375 /*********************************************************************
376  *  Device identification routine
377  *
378  *  ixgbe_probe determines if the driver should be loaded on
379  *  adapter based on PCI vendor/device id of the adapter.
380  *
381  *  return BUS_PROBE_DEFAULT on success, positive on failure
382  *********************************************************************/
383
384 static int
385 ixgbe_probe(device_t dev)
386 {
387         ixgbe_vendor_info_t *ent;
388
389         u16     pci_vendor_id = 0;
390         u16     pci_device_id = 0;
391         u16     pci_subvendor_id = 0;
392         u16     pci_subdevice_id = 0;
393         char    adapter_name[256];
394
395         INIT_DEBUGOUT("ixgbe_probe: begin");
396
397         pci_vendor_id = pci_get_vendor(dev);
398         if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID)
399                 return (ENXIO);
400
401         pci_device_id = pci_get_device(dev);
402         pci_subvendor_id = pci_get_subvendor(dev);
403         pci_subdevice_id = pci_get_subdevice(dev);
404
405         ent = ixgbe_vendor_info_array;
406         while (ent->vendor_id != 0) {
407                 if ((pci_vendor_id == ent->vendor_id) &&
408                     (pci_device_id == ent->device_id) &&
409
410                     ((pci_subvendor_id == ent->subvendor_id) ||
411                      (ent->subvendor_id == 0)) &&
412
413                     ((pci_subdevice_id == ent->subdevice_id) ||
414                      (ent->subdevice_id == 0))) {
415                         sprintf(adapter_name, "%s, Version - %s",
416                                 ixgbe_strings[ent->index],
417                                 ixgbe_driver_version);
418                         device_set_desc_copy(dev, adapter_name);
419                         ++ixgbe_total_ports;
420                         return (BUS_PROBE_DEFAULT);
421                 }
422                 ent++;
423         }
424         return (ENXIO);
425 }
426
427 /*********************************************************************
428  *  Device initialization routine
429  *
430  *  The attach entry point is called when the driver is being loaded.
431  *  This routine identifies the type of hardware, allocates all resources
432  *  and initializes the hardware.
433  *
434  *  return 0 on success, positive on failure
435  *********************************************************************/
436
437 static int
438 ixgbe_attach(device_t dev)
439 {
440         struct adapter *adapter;
441         struct ixgbe_hw *hw;
442         int             error = 0;
443         u16             csum;
444         u32             ctrl_ext;
445
446         INIT_DEBUGOUT("ixgbe_attach: begin");
447
448         /* Allocate, clear, and link in our adapter structure */
449         adapter = device_get_softc(dev);
450         adapter->dev = adapter->osdep.dev = dev;
451         hw = &adapter->hw;
452
453         /* Core Lock Init*/
454         IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
455
456         /* SYSCTL APIs */
457
458         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
459                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
460                         OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW,
461                         adapter, 0, ixgbe_set_flowcntl, "I", "Flow Control");
462
463         SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
464                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
465                         OID_AUTO, "enable_aim", CTLFLAG_RW,
466                         &ixgbe_enable_aim, 1, "Interrupt Moderation");
467
468         /*
469         ** Allow a kind of speed control by forcing the autoneg
470         ** advertised speed list to only a certain value, this
471         ** supports 1G on 82599 devices, and 100Mb on x540.
472         */
473         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
474                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
475                         OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW,
476                         adapter, 0, ixgbe_set_advertise, "I", "Link Speed");
477
478         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
479                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
480                         OID_AUTO, "ts", CTLTYPE_INT | CTLFLAG_RW, adapter,
481                         0, ixgbe_set_thermal_test, "I", "Thermal Test");
482
483         /* Set up the timer callout */
484         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
485
486         /* Determine hardware revision */
487         ixgbe_identify_hardware(adapter);
488
489         /* Do base PCI setup - map BAR0 */
490         if (ixgbe_allocate_pci_resources(adapter)) {
491                 device_printf(dev, "Allocation of PCI resources failed\n");
492                 error = ENXIO;
493                 goto err_out;
494         }
495
496         /* Do descriptor calc and sanity checks */
497         if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
498             ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
499                 device_printf(dev, "TXD config issue, using default!\n");
500                 adapter->num_tx_desc = DEFAULT_TXD;
501         } else
502                 adapter->num_tx_desc = ixgbe_txd;
503
504         /*
505         ** With many RX rings it is easy to exceed the
506         ** system mbuf allocation. Tuning nmbclusters
507         ** can alleviate this.
508         */
509         if (nmbclusters > 0 ) {
510                 int s;
511                 s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
512                 if (s > nmbclusters) {
513                         device_printf(dev, "RX Descriptors exceed "
514                             "system mbuf max, using default instead!\n");
515                         ixgbe_rxd = DEFAULT_RXD;
516                 }
517         }
518
519         if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
520             ixgbe_rxd < MIN_RXD || ixgbe_rxd > MAX_RXD) {
521                 device_printf(dev, "RXD config issue, using default!\n");
522                 adapter->num_rx_desc = DEFAULT_RXD;
523         } else
524                 adapter->num_rx_desc = ixgbe_rxd;
525
526         /* Allocate our TX/RX Queues */
527         if (ixgbe_allocate_queues(adapter)) {
528                 error = ENOMEM;
529                 goto err_out;
530         }
531
532         /* Allocate multicast array memory. */
533         adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
534             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
535         if (adapter->mta == NULL) {
536                 device_printf(dev, "Can not allocate multicast setup array\n");
537                 error = ENOMEM;
538                 goto err_late;
539         }
540
541         /* Initialize the shared code */
542         hw->allow_unsupported_sfp = allow_unsupported_sfp;
543         error = ixgbe_init_shared_code(hw);
544         if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
545                 /*
546                 ** No optics in this port, set up
547                 ** so the timer routine will probe 
548                 ** for later insertion.
549                 */
550                 adapter->sfp_probe = TRUE;
551                 error = 0;
552         } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
553                 device_printf(dev,"Unsupported SFP+ module detected!\n");
554                 error = EIO;
555                 goto err_late;
556         } else if (error) {
557                 device_printf(dev,"Unable to initialize the shared code\n");
558                 error = EIO;
559                 goto err_late;
560         }
561
562         /* Make sure we have a good EEPROM before we read from it */
563         if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
564                 device_printf(dev,"The EEPROM Checksum Is Not Valid\n");
565                 error = EIO;
566                 goto err_late;
567         }
568
569         error = ixgbe_init_hw(hw);
570         switch (error) {
571         case IXGBE_ERR_EEPROM_VERSION:
572                 device_printf(dev, "This device is a pre-production adapter/"
573                     "LOM.  Please be aware there may be issues associated "
574                     "with your hardware.\n If you are experiencing problems "
575                     "please contact your Intel or hardware representative "
576                     "who provided you with this hardware.\n");
577                 break;
578         case IXGBE_ERR_SFP_NOT_SUPPORTED:
579                 device_printf(dev,"Unsupported SFP+ Module\n");
580                 error = EIO;
581                 goto err_late;
582         case IXGBE_ERR_SFP_NOT_PRESENT:
583                 device_printf(dev,"No SFP+ Module found\n");
584                 /* falls thru */
585         default:
586                 break;
587         }
588
589         /* Detect and set physical type */
590         ixgbe_setup_optics(adapter);
591
592         if ((adapter->msix > 1) && (ixgbe_enable_msix))
593                 error = ixgbe_allocate_msix(adapter); 
594         else
595                 error = ixgbe_allocate_legacy(adapter); 
596         if (error) 
597                 goto err_late;
598
599         /* Setup OS specific network interface */
600         if (ixgbe_setup_interface(dev, adapter) != 0)
601                 goto err_late;
602
603         /* Initialize statistics */
604         ixgbe_update_stats_counters(adapter);
605
606         /* Register for VLAN events */
607         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
608             ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
609         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
610             ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
611
612         /*
613         ** Check PCIE slot type/speed/width
614         */
615         ixgbe_get_slot_info(hw);
616
617         /* Set an initial default flow control value */
618         adapter->fc =  ixgbe_fc_full;
619
620         /* let hardware know driver is loaded */
621         ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
622         ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
623         IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
624
625         ixgbe_add_hw_stats(adapter);
626
627 #ifdef DEV_NETMAP
628         ixgbe_netmap_attach(adapter);
629 #endif /* DEV_NETMAP */
630         INIT_DEBUGOUT("ixgbe_attach: end");
631         return (0);
632 err_late:
633         ixgbe_free_transmit_structures(adapter);
634         ixgbe_free_receive_structures(adapter);
635 err_out:
636         if (adapter->ifp != NULL)
637                 if_free(adapter->ifp);
638         ixgbe_free_pci_resources(adapter);
639         free(adapter->mta, M_DEVBUF);
640         return (error);
641
642 }
643
644 /*********************************************************************
645  *  Device removal routine
646  *
647  *  The detach entry point is called when the driver is being removed.
648  *  This routine stops the adapter and deallocates all the resources
649  *  that were allocated for driver operation.
650  *
651  *  return 0 on success, positive on failure
652  *********************************************************************/
653
654 static int
655 ixgbe_detach(device_t dev)
656 {
657         struct adapter *adapter = device_get_softc(dev);
658         struct ix_queue *que = adapter->queues;
659         struct tx_ring *txr = adapter->tx_rings;
660         u32     ctrl_ext;
661
662         INIT_DEBUGOUT("ixgbe_detach: begin");
663
664         /* Make sure VLANS are not using driver */
665         if (adapter->ifp->if_vlantrunk != NULL) {
666                 device_printf(dev,"Vlan in use, detach first\n");
667                 return (EBUSY);
668         }
669
670         IXGBE_CORE_LOCK(adapter);
671         ixgbe_stop(adapter);
672         IXGBE_CORE_UNLOCK(adapter);
673
674         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
675                 if (que->tq) {
676 #ifndef IXGBE_LEGACY_TX
677                         taskqueue_drain(que->tq, &txr->txq_task);
678 #endif
679                         taskqueue_drain(que->tq, &que->que_task);
680                         taskqueue_free(que->tq);
681                 }
682         }
683
684         /* Drain the Link queue */
685         if (adapter->tq) {
686                 taskqueue_drain(adapter->tq, &adapter->link_task);
687                 taskqueue_drain(adapter->tq, &adapter->mod_task);
688                 taskqueue_drain(adapter->tq, &adapter->msf_task);
689 #ifdef IXGBE_FDIR
690                 taskqueue_drain(adapter->tq, &adapter->fdir_task);
691 #endif
692                 taskqueue_free(adapter->tq);
693         }
694
695         /* let hardware know driver is unloading */
696         ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
697         ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
698         IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
699
700         /* Unregister VLAN events */
701         if (adapter->vlan_attach != NULL)
702                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
703         if (adapter->vlan_detach != NULL)
704                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
705
706         ether_ifdetach(adapter->ifp);
707         callout_drain(&adapter->timer);
708 #ifdef DEV_NETMAP
709         netmap_detach(adapter->ifp);
710 #endif /* DEV_NETMAP */
711         ixgbe_free_pci_resources(adapter);
712         bus_generic_detach(dev);
713         if_free(adapter->ifp);
714
715         ixgbe_free_transmit_structures(adapter);
716         ixgbe_free_receive_structures(adapter);
717         free(adapter->mta, M_DEVBUF);
718
719         IXGBE_CORE_LOCK_DESTROY(adapter);
720         return (0);
721 }
722
723 /*********************************************************************
724  *
725  *  Shutdown entry point
726  *
727  **********************************************************************/
728
729 static int
730 ixgbe_shutdown(device_t dev)
731 {
732         struct adapter *adapter = device_get_softc(dev);
733         IXGBE_CORE_LOCK(adapter);
734         ixgbe_stop(adapter);
735         IXGBE_CORE_UNLOCK(adapter);
736         return (0);
737 }
738
739
740 #ifdef IXGBE_LEGACY_TX
741 /*********************************************************************
742  *  Transmit entry point
743  *
744  *  ixgbe_start is called by the stack to initiate a transmit.
745  *  The driver will remain in this routine as long as there are
746  *  packets to transmit and transmit resources are available.
747  *  In case resources are not available stack is notified and
748  *  the packet is requeued.
749  **********************************************************************/
750
751 static void
752 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
753 {
754         struct mbuf    *m_head;
755         struct adapter *adapter = txr->adapter;
756
757         IXGBE_TX_LOCK_ASSERT(txr);
758
759         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
760                 return;
761         if (!adapter->link_active)
762                 return;
763
764         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
765                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
766                         break;
767
768                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
769                 if (m_head == NULL)
770                         break;
771
772                 if (ixgbe_xmit(txr, &m_head)) {
773                         if (m_head != NULL)
774                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
775                         break;
776                 }
777                 /* Send a copy of the frame to the BPF listener */
778                 ETHER_BPF_MTAP(ifp, m_head);
779
780                 /* Set watchdog on */
781                 txr->watchdog_time = ticks;
782                 txr->queue_status = IXGBE_QUEUE_WORKING;
783
784         }
785         return;
786 }
787
788 /*
789  * Legacy TX start - called by the stack, this
790  * always uses the first tx ring, and should
791  * not be used with multiqueue tx enabled.
792  */
793 static void
794 ixgbe_start(struct ifnet *ifp)
795 {
796         struct adapter *adapter = ifp->if_softc;
797         struct tx_ring  *txr = adapter->tx_rings;
798
799         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
800                 IXGBE_TX_LOCK(txr);
801                 ixgbe_start_locked(txr, ifp);
802                 IXGBE_TX_UNLOCK(txr);
803         }
804         return;
805 }
806
807 #else /* ! IXGBE_LEGACY_TX */
808
809 /*
810 ** Multiqueue Transmit driver
811 **
812 */
813 static int
814 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
815 {
816         struct adapter  *adapter = ifp->if_softc;
817         struct ix_queue *que;
818         struct tx_ring  *txr;
819         int             i, err = 0;
820
821         /* Which queue to use */
822         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
823                 i = m->m_pkthdr.flowid % adapter->num_queues;
824         else
825                 i = curcpu % adapter->num_queues;
826
827         txr = &adapter->tx_rings[i];
828         que = &adapter->queues[i];
829
830         err = drbr_enqueue(ifp, txr->br, m);
831         if (err)
832                 return (err);
833         if (IXGBE_TX_TRYLOCK(txr)) {
834                 ixgbe_mq_start_locked(ifp, txr);
835                 IXGBE_TX_UNLOCK(txr);
836         } else
837                 taskqueue_enqueue(que->tq, &txr->txq_task);
838
839         return (0);
840 }
841
842 static int
843 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
844 {
845         struct adapter  *adapter = txr->adapter;
846         struct mbuf     *next;
847         int             enqueued = 0, err = 0;
848
849         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
850             adapter->link_active == 0)
851                 return (ENETDOWN);
852
853         /* Process the queue */
854 #if __FreeBSD_version < 901504
855         next = drbr_dequeue(ifp, txr->br);
856         while (next != NULL) {
857                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
858                         if (next != NULL)
859                                 err = drbr_enqueue(ifp, txr->br, next);
860 #else
861         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
862                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
863                         if (next == NULL) {
864                                 drbr_advance(ifp, txr->br);
865                         } else {
866                                 drbr_putback(ifp, txr->br, next);
867                         }
868 #endif
869                         break;
870                 }
871 #if __FreeBSD_version >= 901504
872                 drbr_advance(ifp, txr->br);
873 #endif
874                 enqueued++;
875                 /* Send a copy of the frame to the BPF listener */
876                 ETHER_BPF_MTAP(ifp, next);
877                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
878                         break;
879 #if __FreeBSD_version < 901504
880                 next = drbr_dequeue(ifp, txr->br);
881 #endif
882         }
883
884         if (enqueued > 0) {
885                 /* Set watchdog on */
886                 txr->queue_status = IXGBE_QUEUE_WORKING;
887                 txr->watchdog_time = ticks;
888         }
889
890         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
891                 ixgbe_txeof(txr);
892
893         return (err);
894 }
895
896 /*
897  * Called from a taskqueue to drain queued transmit packets.
898  */
899 static void
900 ixgbe_deferred_mq_start(void *arg, int pending)
901 {
902         struct tx_ring *txr = arg;
903         struct adapter *adapter = txr->adapter;
904         struct ifnet *ifp = adapter->ifp;
905
906         IXGBE_TX_LOCK(txr);
907         if (!drbr_empty(ifp, txr->br))
908                 ixgbe_mq_start_locked(ifp, txr);
909         IXGBE_TX_UNLOCK(txr);
910 }
911
912 /*
913 ** Flush all ring buffers
914 */
915 static void
916 ixgbe_qflush(struct ifnet *ifp)
917 {
918         struct adapter  *adapter = ifp->if_softc;
919         struct tx_ring  *txr = adapter->tx_rings;
920         struct mbuf     *m;
921
922         for (int i = 0; i < adapter->num_queues; i++, txr++) {
923                 IXGBE_TX_LOCK(txr);
924                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
925                         m_freem(m);
926                 IXGBE_TX_UNLOCK(txr);
927         }
928         if_qflush(ifp);
929 }
930 #endif /* IXGBE_LEGACY_TX */
931
932 /*********************************************************************
933  *  Ioctl entry point
934  *
935  *  ixgbe_ioctl is called when the user wants to configure the
936  *  interface.
937  *
938  *  return 0 on success, positive on failure
939  **********************************************************************/
940
941 static int
942 ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data)
943 {
944         struct adapter  *adapter = ifp->if_softc;
945         struct ixgbe_hw *hw = &adapter->hw;
946         struct ifreq    *ifr = (struct ifreq *) data;
947 #if defined(INET) || defined(INET6)
948         struct ifaddr *ifa = (struct ifaddr *)data;
949         bool            avoid_reset = FALSE;
950 #endif
951         int             error = 0;
952
953         switch (command) {
954
955         case SIOCSIFADDR:
956 #ifdef INET
957                 if (ifa->ifa_addr->sa_family == AF_INET)
958                         avoid_reset = TRUE;
959 #endif
960 #ifdef INET6
961                 if (ifa->ifa_addr->sa_family == AF_INET6)
962                         avoid_reset = TRUE;
963 #endif
964 #if defined(INET) || defined(INET6)
965                 /*
966                 ** Calling init results in link renegotiation,
967                 ** so we avoid doing it when possible.
968                 */
969                 if (avoid_reset) {
970                         ifp->if_flags |= IFF_UP;
971                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
972                                 ixgbe_init(adapter);
973                         if (!(ifp->if_flags & IFF_NOARP))
974                                 arp_ifinit(ifp, ifa);
975                 } else
976                         error = ether_ioctl(ifp, command, data);
977 #endif
978                 break;
979         case SIOCSIFMTU:
980                 IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
981                 if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
982                         error = EINVAL;
983                 } else {
984                         IXGBE_CORE_LOCK(adapter);
985                         ifp->if_mtu = ifr->ifr_mtu;
986                         adapter->max_frame_size =
987                                 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
988                         ixgbe_init_locked(adapter);
989                         IXGBE_CORE_UNLOCK(adapter);
990                 }
991                 break;
992         case SIOCSIFFLAGS:
993                 IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
994                 IXGBE_CORE_LOCK(adapter);
995                 if (ifp->if_flags & IFF_UP) {
996                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
997                                 if ((ifp->if_flags ^ adapter->if_flags) &
998                                     (IFF_PROMISC | IFF_ALLMULTI)) {
999                                         ixgbe_set_promisc(adapter);
1000                                 }
1001                         } else
1002                                 ixgbe_init_locked(adapter);
1003                 } else
1004                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1005                                 ixgbe_stop(adapter);
1006                 adapter->if_flags = ifp->if_flags;
1007                 IXGBE_CORE_UNLOCK(adapter);
1008                 break;
1009         case SIOCADDMULTI:
1010         case SIOCDELMULTI:
1011                 IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
1012                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1013                         IXGBE_CORE_LOCK(adapter);
1014                         ixgbe_disable_intr(adapter);
1015                         ixgbe_set_multi(adapter);
1016                         ixgbe_enable_intr(adapter);
1017                         IXGBE_CORE_UNLOCK(adapter);
1018                 }
1019                 break;
1020         case SIOCSIFMEDIA:
1021         case SIOCGIFMEDIA:
1022                 IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
1023                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1024                 break;
1025         case SIOCSIFCAP:
1026         {
1027                 int mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1028                 IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
1029                 if (mask & IFCAP_HWCSUM)
1030                         ifp->if_capenable ^= IFCAP_HWCSUM;
1031                 if (mask & IFCAP_TSO4)
1032                         ifp->if_capenable ^= IFCAP_TSO4;
1033                 if (mask & IFCAP_TSO6)
1034                         ifp->if_capenable ^= IFCAP_TSO6;
1035                 if (mask & IFCAP_LRO)
1036                         ifp->if_capenable ^= IFCAP_LRO;
1037                 if (mask & IFCAP_VLAN_HWTAGGING)
1038                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1039                 if (mask & IFCAP_VLAN_HWFILTER)
1040                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1041                 if (mask & IFCAP_VLAN_HWTSO)
1042                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1043                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1044                         IXGBE_CORE_LOCK(adapter);
1045                         ixgbe_init_locked(adapter);
1046                         IXGBE_CORE_UNLOCK(adapter);
1047                 }
1048                 VLAN_CAPABILITIES(ifp);
1049                 break;
1050         }
1051         case SIOCGI2C:
1052         {
1053                 struct ixgbe_i2c_req    i2c;
1054                 IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)");
1055                 error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
1056                 if (error)
1057                         break;
1058                 if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
1059                         error = EINVAL;
1060                         break;
1061                 }
1062                 hw->phy.ops.read_i2c_byte(hw, i2c.offset,
1063                     i2c.dev_addr, i2c.data);
1064                 error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
1065                 break;
1066         }
1067         default:
1068                 IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
1069                 error = ether_ioctl(ifp, command, data);
1070                 break;
1071         }
1072
1073         return (error);
1074 }
1075
1076 /*********************************************************************
1077  *  Init entry point
1078  *
1079  *  This routine is used in two ways. It is used by the stack as
1080  *  init entry point in network interface structure. It is also used
1081  *  by the driver as a hw/sw initialization routine to get to a
1082  *  consistent state.
1083  *
1084  *  return 0 on success, positive on failure
1085  **********************************************************************/
1086 #define IXGBE_MHADD_MFS_SHIFT 16
1087
1088 static void
1089 ixgbe_init_locked(struct adapter *adapter)
1090 {
1091         struct ifnet   *ifp = adapter->ifp;
1092         device_t        dev = adapter->dev;
1093         struct ixgbe_hw *hw = &adapter->hw;
1094         u32             k, txdctl, mhadd, gpie;
1095         u32             rxdctl, rxctrl;
1096
1097         mtx_assert(&adapter->core_mtx, MA_OWNED);
1098         INIT_DEBUGOUT("ixgbe_init_locked: begin");
1099         hw->adapter_stopped = FALSE;
1100         ixgbe_stop_adapter(hw);
1101         callout_stop(&adapter->timer);
1102
1103         /* reprogram the RAR[0] in case user changed it. */
1104         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
1105
1106         /* Get the latest mac address, User can use a LAA */
1107         bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr,
1108               IXGBE_ETH_LENGTH_OF_ADDRESS);
1109         ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
1110         hw->addr_ctrl.rar_used_count = 1;
1111
1112         /* Set the various hardware offload abilities */
1113         ifp->if_hwassist = 0;
1114         if (ifp->if_capenable & IFCAP_TSO)
1115                 ifp->if_hwassist |= CSUM_TSO;
1116         if (ifp->if_capenable & IFCAP_TXCSUM) {
1117                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1118 #if __FreeBSD_version >= 800000
1119                 if (hw->mac.type != ixgbe_mac_82598EB)
1120                         ifp->if_hwassist |= CSUM_SCTP;
1121 #endif
1122         }
1123
1124         /* Prepare transmit descriptors and buffers */
1125         if (ixgbe_setup_transmit_structures(adapter)) {
1126                 device_printf(dev,"Could not setup transmit structures\n");
1127                 ixgbe_stop(adapter);
1128                 return;
1129         }
1130
1131         ixgbe_init_hw(hw);
1132         ixgbe_initialize_transmit_units(adapter);
1133
1134         /* Setup Multicast table */
1135         ixgbe_set_multi(adapter);
1136
1137         /*
1138         ** Determine the correct mbuf pool
1139         ** for doing jumbo frames
1140         */
1141         if (adapter->max_frame_size <= 2048)
1142                 adapter->rx_mbuf_sz = MCLBYTES;
1143         else if (adapter->max_frame_size <= 4096)
1144                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1145         else if (adapter->max_frame_size <= 9216)
1146                 adapter->rx_mbuf_sz = MJUM9BYTES;
1147         else
1148                 adapter->rx_mbuf_sz = MJUM16BYTES;
1149
1150         /* Prepare receive descriptors and buffers */
1151         if (ixgbe_setup_receive_structures(adapter)) {
1152                 device_printf(dev,"Could not setup receive structures\n");
1153                 ixgbe_stop(adapter);
1154                 return;
1155         }
1156
1157         /* Configure RX settings */
1158         ixgbe_initialize_receive_units(adapter);
1159
1160         gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
1161
1162         /* Enable Fan Failure Interrupt */
1163         gpie |= IXGBE_SDP1_GPIEN;
1164
1165         /* Add for Module detection */
1166         if (hw->mac.type == ixgbe_mac_82599EB)
1167                 gpie |= IXGBE_SDP2_GPIEN;
1168
1169         /* Thermal Failure Detection */
1170         if (hw->mac.type == ixgbe_mac_X540)
1171                 gpie |= IXGBE_SDP0_GPIEN;
1172
1173         if (adapter->msix > 1) {
1174                 /* Enable Enhanced MSIX mode */
1175                 gpie |= IXGBE_GPIE_MSIX_MODE;
1176                 gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
1177                     IXGBE_GPIE_OCD;
1178         }
1179         IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
1180
1181         /* Set MTU size */
1182         if (ifp->if_mtu > ETHERMTU) {
1183                 mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
1184                 mhadd &= ~IXGBE_MHADD_MFS_MASK;
1185                 mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
1186                 IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
1187         }
1188         
1189         /* Now enable all the queues */
1190
1191         for (int i = 0; i < adapter->num_queues; i++) {
1192                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
1193                 txdctl |= IXGBE_TXDCTL_ENABLE;
1194                 /* Set WTHRESH to 8, burst writeback */
1195                 txdctl |= (8 << 16);
1196                 /*
1197                  * When the internal queue falls below PTHRESH (32),
1198                  * start prefetching as long as there are at least
1199                  * HTHRESH (1) buffers ready. The values are taken
1200                  * from the Intel linux driver 3.8.21.
1201                  * Prefetching enables tx line rate even with 1 queue.
1202                  */
1203                 txdctl |= (32 << 0) | (1 << 8);
1204                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
1205         }
1206
1207         for (int i = 0; i < adapter->num_queues; i++) {
1208                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
1209                 if (hw->mac.type == ixgbe_mac_82598EB) {
1210                         /*
1211                         ** PTHRESH = 21
1212                         ** HTHRESH = 4
1213                         ** WTHRESH = 8
1214                         */
1215                         rxdctl &= ~0x3FFFFF;
1216                         rxdctl |= 0x080420;
1217                 }
1218                 rxdctl |= IXGBE_RXDCTL_ENABLE;
1219                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
1220                 for (k = 0; k < 10; k++) {
1221                         if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
1222                             IXGBE_RXDCTL_ENABLE)
1223                                 break;
1224                         else
1225                                 msec_delay(1);
1226                 }
1227                 wmb();
1228 #ifdef DEV_NETMAP
1229                 /*
1230                  * In netmap mode, we must preserve the buffers made
1231                  * available to userspace before the if_init()
1232                  * (this is true by default on the TX side, because
1233                  * init makes all buffers available to userspace).
1234                  *
1235                  * netmap_reset() and the device specific routines
1236                  * (e.g. ixgbe_setup_receive_rings()) map these
1237                  * buffers at the end of the NIC ring, so here we
1238                  * must set the RDT (tail) register to make sure
1239                  * they are not overwritten.
1240                  *
1241                  * In this driver the NIC ring starts at RDH = 0,
1242                  * RDT points to the last slot available for reception (?),
1243                  * so RDT = num_rx_desc - 1 means the whole ring is available.
1244                  */
1245                 if (ifp->if_capenable & IFCAP_NETMAP) {
1246                         struct netmap_adapter *na = NA(adapter->ifp);
1247                         struct netmap_kring *kring = &na->rx_rings[i];
1248                         int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
1249
1250                         IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
1251                 } else
1252 #endif /* DEV_NETMAP */
1253                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
1254         }
1255
1256         /* Enable Receive engine */
1257         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1258         if (hw->mac.type == ixgbe_mac_82598EB)
1259                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
1260         rxctrl |= IXGBE_RXCTRL_RXEN;
1261         ixgbe_enable_rx_dma(hw, rxctrl);
1262
1263         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
1264
1265         /* Set up MSI/X routing */
1266         if (ixgbe_enable_msix)  {
1267                 ixgbe_configure_ivars(adapter);
1268                 /* Set up auto-mask */
1269                 if (hw->mac.type == ixgbe_mac_82598EB)
1270                         IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1271                 else {
1272                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1273                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1274                 }
1275         } else {  /* Simple settings for Legacy/MSI */
1276                 ixgbe_set_ivar(adapter, 0, 0, 0);
1277                 ixgbe_set_ivar(adapter, 0, 0, 1);
1278                 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1279         }
1280
1281 #ifdef IXGBE_FDIR
1282         /* Init Flow director */
1283         if (hw->mac.type != ixgbe_mac_82598EB) {
1284                 u32 hdrm = 32 << fdir_pballoc;
1285
1286                 hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
1287                 ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
1288         }
1289 #endif
1290
1291         /*
1292         ** Check on any SFP devices that
1293         ** need to be kick-started
1294         */
1295         if (hw->phy.type == ixgbe_phy_none) {
1296                 int err = hw->phy.ops.identify(hw);
1297                 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1298                         device_printf(dev,
1299                             "Unsupported SFP+ module type was detected.\n");
1300                         return;
1301                 }
1302         }
1303
1304         /* Set moderation on the Link interrupt */
1305         IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
1306
1307         /* Config/Enable Link */
1308         ixgbe_config_link(adapter);
1309
1310         /* Hardware Packet Buffer & Flow Control setup */
1311         {
1312                 u32 rxpb, frame, size, tmp;
1313
1314                 frame = adapter->max_frame_size;
1315
1316                 /* Calculate High Water */
1317                 if (hw->mac.type == ixgbe_mac_X540)
1318                         tmp = IXGBE_DV_X540(frame, frame);
1319                 else
1320                         tmp = IXGBE_DV(frame, frame);
1321                 size = IXGBE_BT2KB(tmp);
1322                 rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
1323                 hw->fc.high_water[0] = rxpb - size;
1324
1325                 /* Now calculate Low Water */
1326                 if (hw->mac.type == ixgbe_mac_X540)
1327                         tmp = IXGBE_LOW_DV_X540(frame);
1328                 else
1329                         tmp = IXGBE_LOW_DV(frame);
1330                 hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1331                 
1332                 hw->fc.requested_mode = adapter->fc;
1333                 hw->fc.pause_time = IXGBE_FC_PAUSE;
1334                 hw->fc.send_xon = TRUE;
1335         }
1336         /* Initialize the FC settings */
1337         ixgbe_start_hw(hw);
1338
1339         /* Set up VLAN support and filter */
1340         ixgbe_setup_vlan_hw_support(adapter);
1341
1342         /* And now turn on interrupts */
1343         ixgbe_enable_intr(adapter);
1344
1345         /* Now inform the stack we're ready */
1346         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1347
1348         return;
1349 }
1350
1351 static void
1352 ixgbe_init(void *arg)
1353 {
1354         struct adapter *adapter = arg;
1355
1356         IXGBE_CORE_LOCK(adapter);
1357         ixgbe_init_locked(adapter);
1358         IXGBE_CORE_UNLOCK(adapter);
1359         return;
1360 }
1361
1362
1363 /*
1364 **
1365 ** MSIX Interrupt Handlers and Tasklets
1366 **
1367 */
1368
1369 static inline void
1370 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
1371 {
1372         struct ixgbe_hw *hw = &adapter->hw;
1373         u64     queue = (u64)(1 << vector);
1374         u32     mask;
1375
1376         if (hw->mac.type == ixgbe_mac_82598EB) {
1377                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1378                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
1379         } else {
1380                 mask = (queue & 0xFFFFFFFF);
1381                 if (mask)
1382                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
1383                 mask = (queue >> 32);
1384                 if (mask)
1385                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
1386         }
1387 }
1388
1389 static inline void
1390 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
1391 {
1392         struct ixgbe_hw *hw = &adapter->hw;
1393         u64     queue = (u64)(1 << vector);
1394         u32     mask;
1395
1396         if (hw->mac.type == ixgbe_mac_82598EB) {
1397                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1398                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
1399         } else {
1400                 mask = (queue & 0xFFFFFFFF);
1401                 if (mask)
1402                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
1403                 mask = (queue >> 32);
1404                 if (mask)
1405                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
1406         }
1407 }
1408
1409 static void
1410 ixgbe_handle_que(void *context, int pending)
1411 {
1412         struct ix_queue *que = context;
1413         struct adapter  *adapter = que->adapter;
1414         struct tx_ring  *txr = que->txr;
1415         struct ifnet    *ifp = adapter->ifp;
1416         bool            more;
1417
1418         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1419                 more = ixgbe_rxeof(que);
1420                 IXGBE_TX_LOCK(txr);
1421                 ixgbe_txeof(txr);
1422 #ifndef IXGBE_LEGACY_TX
1423                 if (!drbr_empty(ifp, txr->br))
1424                         ixgbe_mq_start_locked(ifp, txr);
1425 #else
1426                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1427                         ixgbe_start_locked(txr, ifp);
1428 #endif
1429                 IXGBE_TX_UNLOCK(txr);
1430         }
1431
1432         /* Reenable this interrupt */
1433         if (que->res != NULL)
1434                 ixgbe_enable_queue(adapter, que->msix);
1435         else
1436                 ixgbe_enable_intr(adapter);
1437         return;
1438 }
1439
1440
1441 /*********************************************************************
1442  *
1443  *  Legacy Interrupt Service routine
1444  *
1445  **********************************************************************/
1446
1447 static void
1448 ixgbe_legacy_irq(void *arg)
1449 {
1450         struct ix_queue *que = arg;
1451         struct adapter  *adapter = que->adapter;
1452         struct ixgbe_hw *hw = &adapter->hw;
1453         struct ifnet    *ifp = adapter->ifp;
1454         struct          tx_ring *txr = adapter->tx_rings;
1455         bool            more;
1456         u32             reg_eicr;
1457
1458
1459         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1460
1461         ++que->irqs;
1462         if (reg_eicr == 0) {
1463                 ixgbe_enable_intr(adapter);
1464                 return;
1465         }
1466
1467         more = ixgbe_rxeof(que);
1468
1469         IXGBE_TX_LOCK(txr);
1470         ixgbe_txeof(txr);
1471 #ifdef IXGBE_LEGACY_TX
1472         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1473                 ixgbe_start_locked(txr, ifp);
1474 #else
1475         if (!drbr_empty(ifp, txr->br))
1476                 ixgbe_mq_start_locked(ifp, txr);
1477 #endif
1478         IXGBE_TX_UNLOCK(txr);
1479
1480         /* Check for fan failure */
1481         if ((hw->phy.media_type == ixgbe_media_type_copper) &&
1482             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1483                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1484                     "REPLACE IMMEDIATELY!!\n");
1485                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
1486         }
1487
1488         /* Link status change */
1489         if (reg_eicr & IXGBE_EICR_LSC)
1490                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1491
1492         if (more)
1493                 taskqueue_enqueue(que->tq, &que->que_task);
1494         else
1495                 ixgbe_enable_intr(adapter);
1496         return;
1497 }
1498
1499
1500 /*********************************************************************
1501  *
1502  *  MSIX Queue Interrupt Service routine
1503  *
1504  **********************************************************************/
1505 void
1506 ixgbe_msix_que(void *arg)
1507 {
1508         struct ix_queue *que = arg;
1509         struct adapter  *adapter = que->adapter;
1510         struct ifnet    *ifp = adapter->ifp;
1511         struct tx_ring  *txr = que->txr;
1512         struct rx_ring  *rxr = que->rxr;
1513         bool            more;
1514         u32             newitr = 0;
1515
1516         /* Protect against spurious interrupts */
1517         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1518                 return;
1519
1520         ixgbe_disable_queue(adapter, que->msix);
1521         ++que->irqs;
1522
1523         more = ixgbe_rxeof(que);
1524
1525         IXGBE_TX_LOCK(txr);
1526         ixgbe_txeof(txr);
1527 #ifdef IXGBE_LEGACY_TX
1528         if (!IFQ_DRV_IS_EMPTY(ifp->if_snd))
1529                 ixgbe_start_locked(txr, ifp);
1530 #else
1531         if (!drbr_empty(ifp, txr->br))
1532                 ixgbe_mq_start_locked(ifp, txr);
1533 #endif
1534         IXGBE_TX_UNLOCK(txr);
1535
1536         /* Do AIM now? */
1537
1538         if (ixgbe_enable_aim == FALSE)
1539                 goto no_calc;
1540         /*
1541         ** Do Adaptive Interrupt Moderation:
1542         **  - Write out last calculated setting
1543         **  - Calculate based on average size over
1544         **    the last interval.
1545         */
1546         if (que->eitr_setting)
1547                 IXGBE_WRITE_REG(&adapter->hw,
1548                     IXGBE_EITR(que->msix), que->eitr_setting);
1549  
1550         que->eitr_setting = 0;
1551
1552         /* Idle, do nothing */
1553         if ((txr->bytes == 0) && (rxr->bytes == 0))
1554                 goto no_calc;
1555                                 
1556         if ((txr->bytes) && (txr->packets))
1557                 newitr = txr->bytes/txr->packets;
1558         if ((rxr->bytes) && (rxr->packets))
1559                 newitr = max(newitr,
1560                     (rxr->bytes / rxr->packets));
1561         newitr += 24; /* account for hardware frame, crc */
1562
1563         /* set an upper boundary */
1564         newitr = min(newitr, 3000);
1565
1566         /* Be nice to the mid range */
1567         if ((newitr > 300) && (newitr < 1200))
1568                 newitr = (newitr / 3);
1569         else
1570                 newitr = (newitr / 2);
1571
1572         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
1573                 newitr |= newitr << 16;
1574         else
1575                 newitr |= IXGBE_EITR_CNT_WDIS;
1576                  
1577         /* save for next interrupt */
1578         que->eitr_setting = newitr;
1579
1580         /* Reset state */
1581         txr->bytes = 0;
1582         txr->packets = 0;
1583         rxr->bytes = 0;
1584         rxr->packets = 0;
1585
1586 no_calc:
1587         if (more)
1588                 taskqueue_enqueue(que->tq, &que->que_task);
1589         else
1590                 ixgbe_enable_queue(adapter, que->msix);
1591         return;
1592 }
1593
1594
1595 static void
1596 ixgbe_msix_link(void *arg)
1597 {
1598         struct adapter  *adapter = arg;
1599         struct ixgbe_hw *hw = &adapter->hw;
1600         u32             reg_eicr;
1601
1602         ++adapter->link_irq;
1603
1604         /* First get the cause */
1605         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
1606         /* Be sure the queue bits are not cleared */
1607         reg_eicr &= ~IXGBE_EICR_RTX_QUEUE;
1608         /* Clear interrupt with write */
1609         IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
1610
1611         /* Link status change */
1612         if (reg_eicr & IXGBE_EICR_LSC)
1613                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1614
1615         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
1616 #ifdef IXGBE_FDIR
1617                 if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
1618                         /* This is probably overkill :) */
1619                         if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
1620                                 return;
1621                         /* Disable the interrupt */
1622                         IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
1623                         taskqueue_enqueue(adapter->tq, &adapter->fdir_task);
1624                 } else
1625 #endif
1626                 if (reg_eicr & IXGBE_EICR_ECC) {
1627                         device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
1628                             "Please Reboot!!\n");
1629                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
1630                 } else
1631
1632                 if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
1633                         /* Clear the interrupt */
1634                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1635                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
1636                 } else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
1637                         /* Clear the interrupt */
1638                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
1639                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
1640                 }
1641         } 
1642
1643         /* Check for fan failure */
1644         if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
1645             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1646                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1647                     "REPLACE IMMEDIATELY!!\n");
1648                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1649         }
1650
1651         /* Check for over temp condition */
1652         if ((hw->mac.type == ixgbe_mac_X540) &&
1653             (reg_eicr & IXGBE_EICR_TS)) {
1654                 device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
1655                     "PHY IS SHUT DOWN!!\n");
1656                 device_printf(adapter->dev, "System shutdown required\n");
1657                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
1658         }
1659
1660         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
1661         return;
1662 }
1663
1664 /*********************************************************************
1665  *
1666  *  Media Ioctl callback
1667  *
1668  *  This routine is called whenever the user queries the status of
1669  *  the interface using ifconfig.
1670  *
1671  **********************************************************************/
1672 static void
1673 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
1674 {
1675         struct adapter *adapter = ifp->if_softc;
1676
1677         INIT_DEBUGOUT("ixgbe_media_status: begin");
1678         IXGBE_CORE_LOCK(adapter);
1679         ixgbe_update_link_status(adapter);
1680
1681         ifmr->ifm_status = IFM_AVALID;
1682         ifmr->ifm_active = IFM_ETHER;
1683
1684         if (!adapter->link_active) {
1685                 IXGBE_CORE_UNLOCK(adapter);
1686                 return;
1687         }
1688
1689         ifmr->ifm_status |= IFM_ACTIVE;
1690
1691         switch (adapter->link_speed) {
1692                 case IXGBE_LINK_SPEED_100_FULL:
1693                         ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1694                         break;
1695                 case IXGBE_LINK_SPEED_1GB_FULL:
1696                         ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1697                         break;
1698                 case IXGBE_LINK_SPEED_10GB_FULL:
1699                         ifmr->ifm_active |= adapter->optics | IFM_FDX;
1700                         break;
1701         }
1702
1703         IXGBE_CORE_UNLOCK(adapter);
1704
1705         return;
1706 }
1707
1708 /*********************************************************************
1709  *
1710  *  Media Ioctl callback
1711  *
1712  *  This routine is called when the user changes speed/duplex using
1713  *  media/mediopt option with ifconfig.
1714  *
1715  **********************************************************************/
1716 static int
1717 ixgbe_media_change(struct ifnet * ifp)
1718 {
1719         struct adapter *adapter = ifp->if_softc;
1720         struct ifmedia *ifm = &adapter->media;
1721
1722         INIT_DEBUGOUT("ixgbe_media_change: begin");
1723
1724         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1725                 return (EINVAL);
1726
1727         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1728         case IFM_AUTO:
1729                 adapter->hw.phy.autoneg_advertised =
1730                     IXGBE_LINK_SPEED_100_FULL |
1731                     IXGBE_LINK_SPEED_1GB_FULL |
1732                     IXGBE_LINK_SPEED_10GB_FULL;
1733                 break;
1734         default:
1735                 device_printf(adapter->dev, "Only auto media type\n");
1736                 return (EINVAL);
1737         }
1738
1739         return (0);
1740 }
1741
1742 /*********************************************************************
1743  *
1744  *  This routine maps the mbufs to tx descriptors, allowing the
1745  *  TX engine to transmit the packets. 
1746  *      - return 0 on success, positive on failure
1747  *
1748  **********************************************************************/
1749
1750 static int
1751 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1752 {
1753         struct adapter  *adapter = txr->adapter;
1754         u32             olinfo_status = 0, cmd_type_len;
1755         int             i, j, error, nsegs;
1756         int             first;
1757         bool            remap = TRUE;
1758         struct mbuf     *m_head;
1759         bus_dma_segment_t segs[adapter->num_segs];
1760         bus_dmamap_t    map;
1761         struct ixgbe_tx_buf *txbuf;
1762         union ixgbe_adv_tx_desc *txd = NULL;
1763
1764         m_head = *m_headp;
1765
1766         /* Basic descriptor defines */
1767         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1768             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1769
1770         if (m_head->m_flags & M_VLANTAG)
1771                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1772
1773         /*
1774          * Important to capture the first descriptor
1775          * used because it will contain the index of
1776          * the one we tell the hardware to report back
1777          */
1778         first = txr->next_avail_desc;
1779         txbuf = &txr->tx_buffers[first];
1780         map = txbuf->map;
1781
1782         /*
1783          * Map the packet for DMA.
1784          */
1785 retry:
1786         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1787             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1788
1789         if (__predict_false(error)) {
1790                 struct mbuf *m;
1791
1792                 switch (error) {
1793                 case EFBIG:
1794                         /* Try it again? - one try */
1795                         if (remap == TRUE) {
1796                                 remap = FALSE;
1797                                 m = m_defrag(*m_headp, M_NOWAIT);
1798                                 if (m == NULL) {
1799                                         adapter->mbuf_defrag_failed++;
1800                                         m_freem(*m_headp);
1801                                         *m_headp = NULL;
1802                                         return (ENOBUFS);
1803                                 }
1804                                 *m_headp = m;
1805                                 goto retry;
1806                         } else
1807                                 return (error);
1808                 case ENOMEM:
1809                         txr->no_tx_dma_setup++;
1810                         return (error);
1811                 default:
1812                         txr->no_tx_dma_setup++;
1813                         m_freem(*m_headp);
1814                         *m_headp = NULL;
1815                         return (error);
1816                 }
1817         }
1818
1819         /* Make certain there are enough descriptors */
1820         if (nsegs > txr->tx_avail - 2) {
1821                 txr->no_desc_avail++;
1822                 bus_dmamap_unload(txr->txtag, map);
1823                 return (ENOBUFS);
1824         }
1825         m_head = *m_headp;
1826
1827         /*
1828         ** Set up the appropriate offload context
1829         ** this will consume the first descriptor
1830         */
1831         error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1832         if (__predict_false(error)) {
1833                 if (error == ENOBUFS)
1834                         *m_headp = NULL;
1835                 return (error);
1836         }
1837
1838 #ifdef IXGBE_FDIR
1839         /* Do the flow director magic */
1840         if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
1841                 ++txr->atr_count;
1842                 if (txr->atr_count >= atr_sample_rate) {
1843                         ixgbe_atr(txr, m_head);
1844                         txr->atr_count = 0;
1845                 }
1846         }
1847 #endif
1848
1849         i = txr->next_avail_desc;
1850         for (j = 0; j < nsegs; j++) {
1851                 bus_size_t seglen;
1852                 bus_addr_t segaddr;
1853
1854                 txbuf = &txr->tx_buffers[i];
1855                 txd = &txr->tx_base[i];
1856                 seglen = segs[j].ds_len;
1857                 segaddr = htole64(segs[j].ds_addr);
1858
1859                 txd->read.buffer_addr = segaddr;
1860                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
1861                     cmd_type_len |seglen);
1862                 txd->read.olinfo_status = htole32(olinfo_status);
1863
1864                 if (++i == txr->num_desc)
1865                         i = 0;
1866         }
1867
1868         txd->read.cmd_type_len |=
1869             htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
1870         txr->tx_avail -= nsegs;
1871         txr->next_avail_desc = i;
1872
1873         txbuf->m_head = m_head;
1874         /*
1875         ** Here we swap the map so the last descriptor,
1876         ** which gets the completion interrupt has the
1877         ** real map, and the first descriptor gets the
1878         ** unused map from this descriptor.
1879         */
1880         txr->tx_buffers[first].map = txbuf->map;
1881         txbuf->map = map;
1882         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1883
1884         /* Set the EOP descriptor that will be marked done */
1885         txbuf = &txr->tx_buffers[first];
1886         txbuf->eop = txd;
1887
1888         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1889             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1890         /*
1891          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1892          * hardware that this frame is available to transmit.
1893          */
1894         ++txr->total_packets;
1895         IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
1896
1897         return (0);
1898
1899 }
1900
1901 static void
1902 ixgbe_set_promisc(struct adapter *adapter)
1903 {
1904         u_int32_t       reg_rctl;
1905         struct ifnet   *ifp = adapter->ifp;
1906         int             mcnt = 0;
1907
1908         reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1909         reg_rctl &= (~IXGBE_FCTRL_UPE);
1910         if (ifp->if_flags & IFF_ALLMULTI)
1911                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
1912         else {
1913                 struct  ifmultiaddr *ifma;
1914 #if __FreeBSD_version < 800000
1915                 IF_ADDR_LOCK(ifp);
1916 #else
1917                 if_maddr_rlock(ifp);
1918 #endif
1919                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1920                         if (ifma->ifma_addr->sa_family != AF_LINK)
1921                                 continue;
1922                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1923                                 break;
1924                         mcnt++;
1925                 }
1926 #if __FreeBSD_version < 800000
1927                 IF_ADDR_UNLOCK(ifp);
1928 #else
1929                 if_maddr_runlock(ifp);
1930 #endif
1931         }
1932         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
1933                 reg_rctl &= (~IXGBE_FCTRL_MPE);
1934         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1935
1936         if (ifp->if_flags & IFF_PROMISC) {
1937                 reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1938                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1939         } else if (ifp->if_flags & IFF_ALLMULTI) {
1940                 reg_rctl |= IXGBE_FCTRL_MPE;
1941                 reg_rctl &= ~IXGBE_FCTRL_UPE;
1942                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1943         }
1944         return;
1945 }
1946
1947
1948 /*********************************************************************
1949  *  Multicast Update
1950  *
1951  *  This routine is called whenever multicast address list is updated.
1952  *
1953  **********************************************************************/
1954 #define IXGBE_RAR_ENTRIES 16
1955
1956 static void
1957 ixgbe_set_multi(struct adapter *adapter)
1958 {
1959         u32     fctrl;
1960         u8      *mta;
1961         u8      *update_ptr;
1962         struct  ifmultiaddr *ifma;
1963         int     mcnt = 0;
1964         struct ifnet   *ifp = adapter->ifp;
1965
1966         IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
1967
1968         mta = adapter->mta;
1969         bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
1970             MAX_NUM_MULTICAST_ADDRESSES);
1971
1972 #if __FreeBSD_version < 800000
1973         IF_ADDR_LOCK(ifp);
1974 #else
1975         if_maddr_rlock(ifp);
1976 #endif
1977         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1978                 if (ifma->ifma_addr->sa_family != AF_LINK)
1979                         continue;
1980                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1981                         break;
1982                 bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr),
1983                     &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1984                     IXGBE_ETH_LENGTH_OF_ADDRESS);
1985                 mcnt++;
1986         }
1987 #if __FreeBSD_version < 800000
1988         IF_ADDR_UNLOCK(ifp);
1989 #else
1990         if_maddr_runlock(ifp);
1991 #endif
1992
1993         fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1994         fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1995         if (ifp->if_flags & IFF_PROMISC)
1996                 fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1997         else if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES ||
1998             ifp->if_flags & IFF_ALLMULTI) {
1999                 fctrl |= IXGBE_FCTRL_MPE;
2000                 fctrl &= ~IXGBE_FCTRL_UPE;
2001         } else
2002                 fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
2003         
2004         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
2005
2006         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
2007                 update_ptr = mta;
2008                 ixgbe_update_mc_addr_list(&adapter->hw,
2009                     update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
2010         }
2011
2012         return;
2013 }
2014
2015 /*
2016  * This is an iterator function now needed by the multicast
2017  * shared code. It simply feeds the shared code routine the
2018  * addresses in the array of ixgbe_set_multi() one by one.
2019  */
2020 static u8 *
2021 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
2022 {
2023         u8 *addr = *update_ptr;
2024         u8 *newptr;
2025         *vmdq = 0;
2026
2027         newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
2028         *update_ptr = newptr;
2029         return addr;
2030 }
2031
2032
2033 /*********************************************************************
2034  *  Timer routine
2035  *
2036  *  This routine checks for link status,updates statistics,
2037  *  and runs the watchdog check.
2038  *
2039  **********************************************************************/
2040
2041 static void
2042 ixgbe_local_timer(void *arg)
2043 {
2044         struct adapter  *adapter = arg;
2045         device_t        dev = adapter->dev;
2046         struct ix_queue *que = adapter->queues;
2047         struct tx_ring  *txr = adapter->tx_rings;
2048         int             hung = 0, paused = 0;
2049
2050         mtx_assert(&adapter->core_mtx, MA_OWNED);
2051
2052         /* Check for pluggable optics */
2053         if (adapter->sfp_probe)
2054                 if (!ixgbe_sfp_probe(adapter))
2055                         goto out; /* Nothing to do */
2056
2057         ixgbe_update_link_status(adapter);
2058         ixgbe_update_stats_counters(adapter);
2059
2060         /*
2061          * If the interface has been paused
2062          * then don't do the watchdog check
2063          */
2064         if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
2065                 paused = 1;
2066
2067         /*
2068         ** Check the TX queues status
2069         **      - watchdog only if all queues show hung
2070         */          
2071         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2072                 if ((txr->queue_status == IXGBE_QUEUE_HUNG) &&
2073                     (paused == 0))
2074                         ++hung;
2075                 else if (txr->queue_status == IXGBE_QUEUE_WORKING)
2076                         taskqueue_enqueue(que->tq, &txr->txq_task);
2077         }
2078         /* Only truely watchdog if all queues show hung */
2079         if (hung == adapter->num_queues)
2080                 goto watchdog;
2081
2082 out:
2083         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
2084         return;
2085
2086 watchdog:
2087         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2088         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2089             IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
2090             IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
2091         device_printf(dev,"TX(%d) desc avail = %d,"
2092             "Next TX to Clean = %d\n",
2093             txr->me, txr->tx_avail, txr->next_to_clean);
2094         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2095         adapter->watchdog_events++;
2096         ixgbe_init_locked(adapter);
2097 }
2098
2099 /*
2100 ** Note: this routine updates the OS on the link state
2101 **      the real check of the hardware only happens with
2102 **      a link interrupt.
2103 */
2104 static void
2105 ixgbe_update_link_status(struct adapter *adapter)
2106 {
2107         struct ifnet    *ifp = adapter->ifp;
2108         device_t dev = adapter->dev;
2109
2110
2111         if (adapter->link_up){ 
2112                 if (adapter->link_active == FALSE) {
2113                         if (bootverbose)
2114                                 device_printf(dev,"Link is up %d Gbps %s \n",
2115                                     ((adapter->link_speed == 128)? 10:1),
2116                                     "Full Duplex");
2117                         adapter->link_active = TRUE;
2118                         /* Update any Flow Control changes */
2119                         ixgbe_fc_enable(&adapter->hw);
2120                         if_link_state_change(ifp, LINK_STATE_UP);
2121                 }
2122         } else { /* Link down */
2123                 if (adapter->link_active == TRUE) {
2124                         if (bootverbose)
2125                                 device_printf(dev,"Link is Down\n");
2126                         if_link_state_change(ifp, LINK_STATE_DOWN);
2127                         adapter->link_active = FALSE;
2128                 }
2129         }
2130
2131         return;
2132 }
2133
2134
2135 /*********************************************************************
2136  *
2137  *  This routine disables all traffic on the adapter by issuing a
2138  *  global reset on the MAC and deallocates TX/RX buffers.
2139  *
2140  **********************************************************************/
2141
2142 static void
2143 ixgbe_stop(void *arg)
2144 {
2145         struct ifnet   *ifp;
2146         struct adapter *adapter = arg;
2147         struct ixgbe_hw *hw = &adapter->hw;
2148         ifp = adapter->ifp;
2149
2150         mtx_assert(&adapter->core_mtx, MA_OWNED);
2151
2152         INIT_DEBUGOUT("ixgbe_stop: begin\n");
2153         ixgbe_disable_intr(adapter);
2154         callout_stop(&adapter->timer);
2155
2156         /* Let the stack know...*/
2157         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2158
2159         ixgbe_reset_hw(hw);
2160         hw->adapter_stopped = FALSE;
2161         ixgbe_stop_adapter(hw);
2162         if (hw->mac.type == ixgbe_mac_82599EB)
2163                 ixgbe_stop_mac_link_on_d3_82599(hw);
2164         /* Turn off the laser - noop with no optics */
2165         ixgbe_disable_tx_laser(hw);
2166
2167         /* Update the stack */
2168         adapter->link_up = FALSE;
2169         ixgbe_update_link_status(adapter);
2170
2171         /* reprogram the RAR[0] in case user changed it. */
2172         ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
2173
2174         return;
2175 }
2176
2177
2178 /*********************************************************************
2179  *
2180  *  Determine hardware revision.
2181  *
2182  **********************************************************************/
2183 static void
2184 ixgbe_identify_hardware(struct adapter *adapter)
2185 {
2186         device_t        dev = adapter->dev;
2187         struct ixgbe_hw *hw = &adapter->hw;
2188
2189         /* Save off the information about this board */
2190         hw->vendor_id = pci_get_vendor(dev);
2191         hw->device_id = pci_get_device(dev);
2192         hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
2193         hw->subsystem_vendor_id =
2194             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2195         hw->subsystem_device_id =
2196             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2197
2198         /* We need this here to set the num_segs below */
2199         ixgbe_set_mac_type(hw);
2200
2201         /* Pick up the 82599 and VF settings */
2202         if (hw->mac.type != ixgbe_mac_82598EB) {
2203                 hw->phy.smart_speed = ixgbe_smart_speed;
2204                 adapter->num_segs = IXGBE_82599_SCATTER;
2205         } else
2206                 adapter->num_segs = IXGBE_82598_SCATTER;
2207
2208         return;
2209 }
2210
2211 /*********************************************************************
2212  *
2213  *  Determine optic type
2214  *
2215  **********************************************************************/
2216 static void
2217 ixgbe_setup_optics(struct adapter *adapter)
2218 {
2219         struct ixgbe_hw *hw = &adapter->hw;
2220         int             layer;
2221
2222         layer = ixgbe_get_supported_physical_layer(hw);
2223
2224         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
2225                 adapter->optics = IFM_10G_T;
2226                 return;
2227         }
2228
2229         if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
2230                 adapter->optics = IFM_1000_T;
2231                 return;
2232         }
2233
2234         if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
2235                 adapter->optics = IFM_1000_SX;
2236                 return;
2237         }
2238
2239         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
2240             IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
2241                 adapter->optics = IFM_10G_LR;
2242                 return;
2243         }
2244
2245         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
2246                 adapter->optics = IFM_10G_SR;
2247                 return;
2248         }
2249
2250         if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
2251                 adapter->optics = IFM_10G_TWINAX;
2252                 return;
2253         }
2254
2255         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
2256             IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
2257                 adapter->optics = IFM_10G_CX4;
2258                 return;
2259         }
2260
2261         /* If we get here just set the default */
2262         adapter->optics = IFM_ETHER | IFM_AUTO;
2263         return;
2264 }
2265
2266 /*********************************************************************
2267  *
2268  *  Setup the Legacy or MSI Interrupt handler
2269  *
2270  **********************************************************************/
2271 static int
2272 ixgbe_allocate_legacy(struct adapter *adapter)
2273 {
2274         device_t        dev = adapter->dev;
2275         struct          ix_queue *que = adapter->queues;
2276 #ifndef IXGBE_LEGACY_TX
2277         struct tx_ring          *txr = adapter->tx_rings;
2278 #endif
2279         int             error, rid = 0;
2280
2281         /* MSI RID at 1 */
2282         if (adapter->msix == 1)
2283                 rid = 1;
2284
2285         /* We allocate a single interrupt resource */
2286         adapter->res = bus_alloc_resource_any(dev,
2287             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2288         if (adapter->res == NULL) {
2289                 device_printf(dev, "Unable to allocate bus resource: "
2290                     "interrupt\n");
2291                 return (ENXIO);
2292         }
2293
2294         /*
2295          * Try allocating a fast interrupt and the associated deferred
2296          * processing contexts.
2297          */
2298 #ifndef IXGBE_LEGACY_TX
2299         TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr);
2300 #endif
2301         TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2302         que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,
2303             taskqueue_thread_enqueue, &que->tq);
2304         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s ixq",
2305             device_get_nameunit(adapter->dev));
2306
2307         /* Tasklets for Link, SFP and Multispeed Fiber */
2308         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2309         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2310         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2311 #ifdef IXGBE_FDIR
2312         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2313 #endif
2314         adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT,
2315             taskqueue_thread_enqueue, &adapter->tq);
2316         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2317             device_get_nameunit(adapter->dev));
2318
2319         if ((error = bus_setup_intr(dev, adapter->res,
2320             INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_legacy_irq,
2321             que, &adapter->tag)) != 0) {
2322                 device_printf(dev, "Failed to register fast interrupt "
2323                     "handler: %d\n", error);
2324                 taskqueue_free(que->tq);
2325                 taskqueue_free(adapter->tq);
2326                 que->tq = NULL;
2327                 adapter->tq = NULL;
2328                 return (error);
2329         }
2330         /* For simplicity in the handlers */
2331         adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
2332
2333         return (0);
2334 }
2335
2336
2337 /*********************************************************************
2338  *
2339  *  Setup MSIX Interrupt resources and handlers 
2340  *
2341  **********************************************************************/
2342 static int
2343 ixgbe_allocate_msix(struct adapter *adapter)
2344 {
2345         device_t        dev = adapter->dev;
2346         struct          ix_queue *que = adapter->queues;
2347         struct          tx_ring *txr = adapter->tx_rings;
2348         int             error, rid, vector = 0;
2349
2350         for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
2351                 rid = vector + 1;
2352                 que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2353                     RF_SHAREABLE | RF_ACTIVE);
2354                 if (que->res == NULL) {
2355                         device_printf(dev,"Unable to allocate"
2356                             " bus resource: que interrupt [%d]\n", vector);
2357                         return (ENXIO);
2358                 }
2359                 /* Set the handler function */
2360                 error = bus_setup_intr(dev, que->res,
2361                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2362                     ixgbe_msix_que, que, &que->tag);
2363                 if (error) {
2364                         que->res = NULL;
2365                         device_printf(dev, "Failed to register QUE handler");
2366                         return (error);
2367                 }
2368 #if __FreeBSD_version >= 800504
2369                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2370 #endif
2371                 que->msix = vector;
2372                 adapter->que_mask |= (u64)(1 << que->msix);
2373                 /*
2374                 ** Bind the msix vector, and thus the
2375                 ** ring to the corresponding cpu.
2376                 */
2377                 if (adapter->num_queues > 1)
2378                         bus_bind_intr(dev, que->res, i);
2379
2380 #ifndef IXGBE_LEGACY_TX
2381                 TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr);
2382 #endif
2383                 TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2384                 que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,
2385                     taskqueue_thread_enqueue, &que->tq);
2386                 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2387                     device_get_nameunit(adapter->dev));
2388         }
2389
2390         /* and Link */
2391         rid = vector + 1;
2392         adapter->res = bus_alloc_resource_any(dev,
2393             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2394         if (!adapter->res) {
2395                 device_printf(dev,"Unable to allocate"
2396             " bus resource: Link interrupt [%d]\n", rid);
2397                 return (ENXIO);
2398         }
2399         /* Set the link handler function */
2400         error = bus_setup_intr(dev, adapter->res,
2401             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2402             ixgbe_msix_link, adapter, &adapter->tag);
2403         if (error) {
2404                 adapter->res = NULL;
2405                 device_printf(dev, "Failed to register LINK handler");
2406                 return (error);
2407         }
2408 #if __FreeBSD_version >= 800504
2409         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2410 #endif
2411         adapter->linkvec = vector;
2412         /* Tasklets for Link, SFP and Multispeed Fiber */
2413         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2414         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2415         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2416 #ifdef IXGBE_FDIR
2417         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2418 #endif
2419         adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT,
2420             taskqueue_thread_enqueue, &adapter->tq);
2421         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2422             device_get_nameunit(adapter->dev));
2423
2424         return (0);
2425 }
2426
2427 /*
2428  * Setup Either MSI/X or MSI
2429  */
2430 static int
2431 ixgbe_setup_msix(struct adapter *adapter)
2432 {
2433         device_t dev = adapter->dev;
2434         int rid, want, queues, msgs;
2435
2436         /* Override by tuneable */
2437         if (ixgbe_enable_msix == 0)
2438                 goto msi;
2439
2440         /* First try MSI/X */
2441         msgs = pci_msix_count(dev); 
2442         if (msgs == 0)
2443                 goto msi;
2444         rid = PCIR_BAR(MSIX_82598_BAR);
2445         adapter->msix_mem = bus_alloc_resource_any(dev,
2446             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2447         if (adapter->msix_mem == NULL) {
2448                 rid += 4;       /* 82599 maps in higher BAR */
2449                 adapter->msix_mem = bus_alloc_resource_any(dev,
2450                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2451         }
2452         if (adapter->msix_mem == NULL) {
2453                 /* May not be enabled */
2454                 device_printf(adapter->dev,
2455                     "Unable to map MSIX table \n");
2456                 goto msi;
2457         }
2458
2459         /* Figure out a reasonable auto config value */
2460         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2461
2462         if (ixgbe_num_queues != 0)
2463                 queues = ixgbe_num_queues;
2464         /* Set max queues to 8 when autoconfiguring */
2465         else if ((ixgbe_num_queues == 0) && (queues > 8))
2466                 queues = 8;
2467
2468         /* reflect correct sysctl value */
2469         ixgbe_num_queues = queues;
2470
2471         /*
2472         ** Want one vector (RX/TX pair) per queue
2473         ** plus an additional for Link.
2474         */
2475         want = queues + 1;
2476         if (msgs >= want)
2477                 msgs = want;
2478         else {
2479                 device_printf(adapter->dev,
2480                     "MSIX Configuration Problem, "
2481                     "%d vectors but %d queues wanted!\n",
2482                     msgs, want);
2483                 goto msi;
2484         }
2485         if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2486                 device_printf(adapter->dev,
2487                     "Using MSIX interrupts with %d vectors\n", msgs);
2488                 adapter->num_queues = queues;
2489                 return (msgs);
2490         }
2491         /*
2492         ** If MSIX alloc failed or provided us with
2493         ** less than needed, free and fall through to MSI
2494         */
2495         pci_release_msi(dev);
2496
2497 msi:
2498         if (adapter->msix_mem != NULL) {
2499                 bus_release_resource(dev, SYS_RES_MEMORY,
2500                     rid, adapter->msix_mem);
2501                 adapter->msix_mem = NULL;
2502         }
2503         msgs = 1;
2504         if (pci_alloc_msi(dev, &msgs) == 0) {
2505                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2506                 return (msgs);
2507         }
2508         device_printf(adapter->dev,"Using a Legacy interrupt\n");
2509         return (0);
2510 }
2511
2512
2513 static int
2514 ixgbe_allocate_pci_resources(struct adapter *adapter)
2515 {
2516         int             rid;
2517         device_t        dev = adapter->dev;
2518
2519         rid = PCIR_BAR(0);
2520         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2521             &rid, RF_ACTIVE);
2522
2523         if (!(adapter->pci_mem)) {
2524                 device_printf(dev,"Unable to allocate bus resource: memory\n");
2525                 return (ENXIO);
2526         }
2527
2528         adapter->osdep.mem_bus_space_tag =
2529                 rman_get_bustag(adapter->pci_mem);
2530         adapter->osdep.mem_bus_space_handle =
2531                 rman_get_bushandle(adapter->pci_mem);
2532         adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle;
2533
2534         /* Legacy defaults */
2535         adapter->num_queues = 1;
2536         adapter->hw.back = &adapter->osdep;
2537
2538         /*
2539         ** Now setup MSI or MSI/X, should
2540         ** return us the number of supported
2541         ** vectors. (Will be 1 for MSI)
2542         */
2543         adapter->msix = ixgbe_setup_msix(adapter);
2544         return (0);
2545 }
2546
2547 static void
2548 ixgbe_free_pci_resources(struct adapter * adapter)
2549 {
2550         struct          ix_queue *que = adapter->queues;
2551         device_t        dev = adapter->dev;
2552         int             rid, memrid;
2553
2554         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
2555                 memrid = PCIR_BAR(MSIX_82598_BAR);
2556         else
2557                 memrid = PCIR_BAR(MSIX_82599_BAR);
2558
2559         /*
2560         ** There is a slight possibility of a failure mode
2561         ** in attach that will result in entering this function
2562         ** before interrupt resources have been initialized, and
2563         ** in that case we do not want to execute the loops below
2564         ** We can detect this reliably by the state of the adapter
2565         ** res pointer.
2566         */
2567         if (adapter->res == NULL)
2568                 goto mem;
2569
2570         /*
2571         **  Release all msix queue resources:
2572         */
2573         for (int i = 0; i < adapter->num_queues; i++, que++) {
2574                 rid = que->msix + 1;
2575                 if (que->tag != NULL) {
2576                         bus_teardown_intr(dev, que->res, que->tag);
2577                         que->tag = NULL;
2578                 }
2579                 if (que->res != NULL)
2580                         bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
2581         }
2582
2583
2584         /* Clean the Legacy or Link interrupt last */
2585         if (adapter->linkvec) /* we are doing MSIX */
2586                 rid = adapter->linkvec + 1;
2587         else
2588                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2589
2590         if (adapter->tag != NULL) {
2591                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2592                 adapter->tag = NULL;
2593         }
2594         if (adapter->res != NULL)
2595                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2596
2597 mem:
2598         if (adapter->msix)
2599                 pci_release_msi(dev);
2600
2601         if (adapter->msix_mem != NULL)
2602                 bus_release_resource(dev, SYS_RES_MEMORY,
2603                     memrid, adapter->msix_mem);
2604
2605         if (adapter->pci_mem != NULL)
2606                 bus_release_resource(dev, SYS_RES_MEMORY,
2607                     PCIR_BAR(0), adapter->pci_mem);
2608
2609         return;
2610 }
2611
2612 /*********************************************************************
2613  *
2614  *  Setup networking device structure and register an interface.
2615  *
2616  **********************************************************************/
2617 static int
2618 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
2619 {
2620         struct ixgbe_hw *hw = &adapter->hw;
2621         struct ifnet   *ifp;
2622
2623         INIT_DEBUGOUT("ixgbe_setup_interface: begin");
2624
2625         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2626         if (ifp == NULL) {
2627                 device_printf(dev, "can not allocate ifnet structure\n");
2628                 return (-1);
2629         }
2630         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2631 #if __FreeBSD_version < 1000025
2632         ifp->if_baudrate = 1000000000;
2633 #else
2634         if_initbaudrate(ifp, IF_Gbps(10));
2635 #endif
2636         ifp->if_init = ixgbe_init;
2637         ifp->if_softc = adapter;
2638         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2639         ifp->if_ioctl = ixgbe_ioctl;
2640 #ifndef IXGBE_LEGACY_TX
2641         ifp->if_transmit = ixgbe_mq_start;
2642         ifp->if_qflush = ixgbe_qflush;
2643 #else
2644         ifp->if_start = ixgbe_start;
2645         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
2646         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2;
2647         IFQ_SET_READY(&ifp->if_snd);
2648 #endif
2649
2650         ether_ifattach(ifp, adapter->hw.mac.addr);
2651
2652         adapter->max_frame_size =
2653             ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
2654
2655         /*
2656          * Tell the upper layer(s) we support long frames.
2657          */
2658         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2659
2660         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWCSUM;
2661         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2662         ifp->if_capabilities |= IFCAP_LRO;
2663         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2664                              |  IFCAP_VLAN_HWTSO
2665                              |  IFCAP_VLAN_MTU
2666                              |  IFCAP_HWSTATS;
2667         ifp->if_capenable = ifp->if_capabilities;
2668
2669         /*
2670         ** Don't turn this on by default, if vlans are
2671         ** created on another pseudo device (eg. lagg)
2672         ** then vlan events are not passed thru, breaking
2673         ** operation, but with HW FILTER off it works. If
2674         ** using vlans directly on the ixgbe driver you can
2675         ** enable this and get full hardware tag filtering.
2676         */
2677         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2678
2679         /*
2680          * Specify the media types supported by this adapter and register
2681          * callbacks to update media and link information
2682          */
2683         ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
2684                      ixgbe_media_status);
2685         ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
2686         ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
2687         if (hw->device_id == IXGBE_DEV_ID_82598AT) {
2688                 ifmedia_add(&adapter->media,
2689                     IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2690                 ifmedia_add(&adapter->media,
2691                     IFM_ETHER | IFM_1000_T, 0, NULL);
2692         }
2693         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2694         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2695
2696         return (0);
2697 }
2698
2699 static void
2700 ixgbe_config_link(struct adapter *adapter)
2701 {
2702         struct ixgbe_hw *hw = &adapter->hw;
2703         u32     autoneg, err = 0;
2704         bool    sfp, negotiate;
2705
2706         sfp = ixgbe_is_sfp(hw);
2707
2708         if (sfp) { 
2709                 if (hw->phy.multispeed_fiber) {
2710                         hw->mac.ops.setup_sfp(hw);
2711                         ixgbe_enable_tx_laser(hw);
2712                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
2713                 } else
2714                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
2715         } else {
2716                 if (hw->mac.ops.check_link)
2717                         err = ixgbe_check_link(hw, &adapter->link_speed,
2718                             &adapter->link_up, FALSE);
2719                 if (err)
2720                         goto out;
2721                 autoneg = hw->phy.autoneg_advertised;
2722                 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
2723                         err  = hw->mac.ops.get_link_capabilities(hw,
2724                             &autoneg, &negotiate);
2725                 if (err)
2726                         goto out;
2727                 if (hw->mac.ops.setup_link)
2728                         err = hw->mac.ops.setup_link(hw,
2729                             autoneg, adapter->link_up);
2730         }
2731 out:
2732         return;
2733 }
2734
2735 /********************************************************************
2736  * Manage DMA'able memory.
2737  *******************************************************************/
2738 static void
2739 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2740 {
2741         if (error)
2742                 return;
2743         *(bus_addr_t *) arg = segs->ds_addr;
2744         return;
2745 }
2746
2747 static int
2748 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2749                 struct ixgbe_dma_alloc *dma, int mapflags)
2750 {
2751         device_t dev = adapter->dev;
2752         int             r;
2753
2754         r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),   /* parent */
2755                                DBA_ALIGN, 0,    /* alignment, bounds */
2756                                BUS_SPACE_MAXADDR,       /* lowaddr */
2757                                BUS_SPACE_MAXADDR,       /* highaddr */
2758                                NULL, NULL,      /* filter, filterarg */
2759                                size,    /* maxsize */
2760                                1,       /* nsegments */
2761                                size,    /* maxsegsize */
2762                                BUS_DMA_ALLOCNOW,        /* flags */
2763                                NULL,    /* lockfunc */
2764                                NULL,    /* lockfuncarg */
2765                                &dma->dma_tag);
2766         if (r != 0) {
2767                 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2768                        "error %u\n", r);
2769                 goto fail_0;
2770         }
2771         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2772                              BUS_DMA_NOWAIT, &dma->dma_map);
2773         if (r != 0) {
2774                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2775                        "error %u\n", r);
2776                 goto fail_1;
2777         }
2778         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2779                             size,
2780                             ixgbe_dmamap_cb,
2781                             &dma->dma_paddr,
2782                             mapflags | BUS_DMA_NOWAIT);
2783         if (r != 0) {
2784                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2785                        "error %u\n", r);
2786                 goto fail_2;
2787         }
2788         dma->dma_size = size;
2789         return (0);
2790 fail_2:
2791         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2792 fail_1:
2793         bus_dma_tag_destroy(dma->dma_tag);
2794 fail_0:
2795         dma->dma_map = NULL;
2796         dma->dma_tag = NULL;
2797         return (r);
2798 }
2799
2800 static void
2801 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2802 {
2803         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2804             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2805         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2806         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2807         bus_dma_tag_destroy(dma->dma_tag);
2808 }
2809
2810
2811 /*********************************************************************
2812  *
2813  *  Allocate memory for the transmit and receive rings, and then
2814  *  the descriptors associated with each, called only once at attach.
2815  *
2816  **********************************************************************/
2817 static int
2818 ixgbe_allocate_queues(struct adapter *adapter)
2819 {
2820         device_t        dev = adapter->dev;
2821         struct ix_queue *que;
2822         struct tx_ring  *txr;
2823         struct rx_ring  *rxr;
2824         int rsize, tsize, error = IXGBE_SUCCESS;
2825         int txconf = 0, rxconf = 0;
2826
2827         /* First allocate the top level queue structs */
2828         if (!(adapter->queues =
2829             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2830             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2831                 device_printf(dev, "Unable to allocate queue memory\n");
2832                 error = ENOMEM;
2833                 goto fail;
2834         }
2835
2836         /* First allocate the TX ring struct memory */
2837         if (!(adapter->tx_rings =
2838             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2839             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2840                 device_printf(dev, "Unable to allocate TX ring memory\n");
2841                 error = ENOMEM;
2842                 goto tx_fail;
2843         }
2844
2845         /* Next allocate the RX */
2846         if (!(adapter->rx_rings =
2847             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2848             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2849                 device_printf(dev, "Unable to allocate RX ring memory\n");
2850                 error = ENOMEM;
2851                 goto rx_fail;
2852         }
2853
2854         /* For the ring itself */
2855         tsize = roundup2(adapter->num_tx_desc *
2856             sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2857
2858         /*
2859          * Now set up the TX queues, txconf is needed to handle the
2860          * possibility that things fail midcourse and we need to
2861          * undo memory gracefully
2862          */ 
2863         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2864                 /* Set up some basics */
2865                 txr = &adapter->tx_rings[i];
2866                 txr->adapter = adapter;
2867                 txr->me = i;
2868                 txr->num_desc = adapter->num_tx_desc;
2869
2870                 /* Initialize the TX side lock */
2871                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2872                     device_get_nameunit(dev), txr->me);
2873                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2874
2875                 if (ixgbe_dma_malloc(adapter, tsize,
2876                         &txr->txdma, BUS_DMA_NOWAIT)) {
2877                         device_printf(dev,
2878                             "Unable to allocate TX Descriptor memory\n");
2879                         error = ENOMEM;
2880                         goto err_tx_desc;
2881                 }
2882                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2883                 bzero((void *)txr->tx_base, tsize);
2884
2885                 /* Now allocate transmit buffers for the ring */
2886                 if (ixgbe_allocate_transmit_buffers(txr)) {
2887                         device_printf(dev,
2888                             "Critical Failure setting up transmit buffers\n");
2889                         error = ENOMEM;
2890                         goto err_tx_desc;
2891                 }
2892 #ifndef IXGBE_LEGACY_TX
2893                 /* Allocate a buf ring */
2894                 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2895                     M_WAITOK, &txr->tx_mtx);
2896                 if (txr->br == NULL) {
2897                         device_printf(dev,
2898                             "Critical Failure setting up buf ring\n");
2899                         error = ENOMEM;
2900                         goto err_tx_desc;
2901                 }
2902 #endif
2903         }
2904
2905         /*
2906          * Next the RX queues...
2907          */ 
2908         rsize = roundup2(adapter->num_rx_desc *
2909             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2910         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2911                 rxr = &adapter->rx_rings[i];
2912                 /* Set up some basics */
2913                 rxr->adapter = adapter;
2914                 rxr->me = i;
2915                 rxr->num_desc = adapter->num_rx_desc;
2916
2917                 /* Initialize the RX side lock */
2918                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2919                     device_get_nameunit(dev), rxr->me);
2920                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2921
2922                 if (ixgbe_dma_malloc(adapter, rsize,
2923                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2924                         device_printf(dev,
2925                             "Unable to allocate RxDescriptor memory\n");
2926                         error = ENOMEM;
2927                         goto err_rx_desc;
2928                 }
2929                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2930                 bzero((void *)rxr->rx_base, rsize);
2931
2932                 /* Allocate receive buffers for the ring*/
2933                 if (ixgbe_allocate_receive_buffers(rxr)) {
2934                         device_printf(dev,
2935                             "Critical Failure setting up receive buffers\n");
2936                         error = ENOMEM;
2937                         goto err_rx_desc;
2938                 }
2939         }
2940
2941         /*
2942         ** Finally set up the queue holding structs
2943         */
2944         for (int i = 0; i < adapter->num_queues; i++) {
2945                 que = &adapter->queues[i];
2946                 que->adapter = adapter;
2947                 que->txr = &adapter->tx_rings[i];
2948                 que->rxr = &adapter->rx_rings[i];
2949         }
2950
2951         return (0);
2952
2953 err_rx_desc:
2954         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2955                 ixgbe_dma_free(adapter, &rxr->rxdma);
2956 err_tx_desc:
2957         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2958                 ixgbe_dma_free(adapter, &txr->txdma);
2959         free(adapter->rx_rings, M_DEVBUF);
2960 rx_fail:
2961         free(adapter->tx_rings, M_DEVBUF);
2962 tx_fail:
2963         free(adapter->queues, M_DEVBUF);
2964 fail:
2965         return (error);
2966 }
2967
2968 /*********************************************************************
2969  *
2970  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2971  *  the information needed to transmit a packet on the wire. This is
2972  *  called only once at attach, setup is done every reset.
2973  *
2974  **********************************************************************/
2975 static int
2976 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
2977 {
2978         struct adapter *adapter = txr->adapter;
2979         device_t dev = adapter->dev;
2980         struct ixgbe_tx_buf *txbuf;
2981         int error, i;
2982
2983         /*
2984          * Setup DMA descriptor areas.
2985          */
2986         if ((error = bus_dma_tag_create(
2987                                bus_get_dma_tag(adapter->dev),   /* parent */
2988                                1, 0,            /* alignment, bounds */
2989                                BUS_SPACE_MAXADDR,       /* lowaddr */
2990                                BUS_SPACE_MAXADDR,       /* highaddr */
2991                                NULL, NULL,              /* filter, filterarg */
2992                                IXGBE_TSO_SIZE,          /* maxsize */
2993                                adapter->num_segs,       /* nsegments */
2994                                PAGE_SIZE,               /* maxsegsize */
2995                                0,                       /* flags */
2996                                NULL,                    /* lockfunc */
2997                                NULL,                    /* lockfuncarg */
2998                                &txr->txtag))) {
2999                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3000                 goto fail;
3001         }
3002
3003         if (!(txr->tx_buffers =
3004             (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
3005             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3006                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3007                 error = ENOMEM;
3008                 goto fail;
3009         }
3010
3011         /* Create the descriptor buffer dma maps */
3012         txbuf = txr->tx_buffers;
3013         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3014                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3015                 if (error != 0) {
3016                         device_printf(dev, "Unable to create TX DMA map\n");
3017                         goto fail;
3018                 }
3019         }
3020
3021         return 0;
3022 fail:
3023         /* We free all, it handles case where we are in the middle */
3024         ixgbe_free_transmit_structures(adapter);
3025         return (error);
3026 }
3027
3028 /*********************************************************************
3029  *
3030  *  Initialize a transmit ring.
3031  *
3032  **********************************************************************/
3033 static void
3034 ixgbe_setup_transmit_ring(struct tx_ring *txr)
3035 {
3036         struct adapter *adapter = txr->adapter;
3037         struct ixgbe_tx_buf *txbuf;
3038         int i;
3039 #ifdef DEV_NETMAP
3040         struct netmap_adapter *na = NA(adapter->ifp);
3041         struct netmap_slot *slot;
3042 #endif /* DEV_NETMAP */
3043
3044         /* Clear the old ring contents */
3045         IXGBE_TX_LOCK(txr);
3046 #ifdef DEV_NETMAP
3047         /*
3048          * (under lock): if in netmap mode, do some consistency
3049          * checks and set slot to entry 0 of the netmap ring.
3050          */
3051         slot = netmap_reset(na, NR_TX, txr->me, 0);
3052 #endif /* DEV_NETMAP */
3053         bzero((void *)txr->tx_base,
3054               (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
3055         /* Reset indices */
3056         txr->next_avail_desc = 0;
3057         txr->next_to_clean = 0;
3058
3059         /* Free any existing tx buffers. */
3060         txbuf = txr->tx_buffers;
3061         for (i = 0; i < txr->num_desc; i++, txbuf++) {
3062                 if (txbuf->m_head != NULL) {
3063                         bus_dmamap_sync(txr->txtag, txbuf->map,
3064                             BUS_DMASYNC_POSTWRITE);
3065                         bus_dmamap_unload(txr->txtag, txbuf->map);
3066                         m_freem(txbuf->m_head);
3067                         txbuf->m_head = NULL;
3068                 }
3069 #ifdef DEV_NETMAP
3070                 /*
3071                  * In netmap mode, set the map for the packet buffer.
3072                  * NOTE: Some drivers (not this one) also need to set
3073                  * the physical buffer address in the NIC ring.
3074                  * Slots in the netmap ring (indexed by "si") are
3075                  * kring->nkr_hwofs positions "ahead" wrt the
3076                  * corresponding slot in the NIC ring. In some drivers
3077                  * (not here) nkr_hwofs can be negative. Function
3078                  * netmap_idx_n2k() handles wraparounds properly.
3079                  */
3080                 if (slot) {
3081                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3082                         netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3083                 }
3084 #endif /* DEV_NETMAP */
3085                 /* Clear the EOP descriptor pointer */
3086                 txbuf->eop = NULL;
3087         }
3088
3089 #ifdef IXGBE_FDIR
3090         /* Set the rate at which we sample packets */
3091         if (adapter->hw.mac.type != ixgbe_mac_82598EB)
3092                 txr->atr_sample = atr_sample_rate;
3093 #endif
3094
3095         /* Set number of descriptors available */
3096         txr->tx_avail = adapter->num_tx_desc;
3097
3098         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3099             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3100         IXGBE_TX_UNLOCK(txr);
3101 }
3102
3103 /*********************************************************************
3104  *
3105  *  Initialize all transmit rings.
3106  *
3107  **********************************************************************/
3108 static int
3109 ixgbe_setup_transmit_structures(struct adapter *adapter)
3110 {
3111         struct tx_ring *txr = adapter->tx_rings;
3112
3113         for (int i = 0; i < adapter->num_queues; i++, txr++)
3114                 ixgbe_setup_transmit_ring(txr);
3115
3116         return (0);
3117 }
3118
3119 /*********************************************************************
3120  *
3121  *  Enable transmit unit.
3122  *
3123  **********************************************************************/
3124 static void
3125 ixgbe_initialize_transmit_units(struct adapter *adapter)
3126 {
3127         struct tx_ring  *txr = adapter->tx_rings;
3128         struct ixgbe_hw *hw = &adapter->hw;
3129
3130         /* Setup the Base and Length of the Tx Descriptor Ring */
3131
3132         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3133                 u64     tdba = txr->txdma.dma_paddr;
3134                 u32     txctrl;
3135
3136                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
3137                        (tdba & 0x00000000ffffffffULL));
3138                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
3139                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
3140                     adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
3141
3142                 /* Setup the HW Tx Head and Tail descriptor pointers */
3143                 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
3144                 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
3145
3146                 /* Setup Transmit Descriptor Cmd Settings */
3147                 txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
3148                 txr->queue_status = IXGBE_QUEUE_IDLE;
3149
3150                 /* Set the processing limit */
3151                 txr->process_limit = ixgbe_tx_process_limit;
3152
3153                 /* Disable Head Writeback */
3154                 switch (hw->mac.type) {
3155                 case ixgbe_mac_82598EB:
3156                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
3157                         break;
3158                 case ixgbe_mac_82599EB:
3159                 case ixgbe_mac_X540:
3160                 default:
3161                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
3162                         break;
3163                 }
3164                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3165                 switch (hw->mac.type) {
3166                 case ixgbe_mac_82598EB:
3167                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
3168                         break;
3169                 case ixgbe_mac_82599EB:
3170                 case ixgbe_mac_X540:
3171                 default:
3172                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
3173                         break;
3174                 }
3175
3176         }
3177
3178         if (hw->mac.type != ixgbe_mac_82598EB) {
3179                 u32 dmatxctl, rttdcs;
3180                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3181                 dmatxctl |= IXGBE_DMATXCTL_TE;
3182                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3183                 /* Disable arbiter to set MTQC */
3184                 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3185                 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3186                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3187                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
3188                 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3189                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3190         }
3191
3192         return;
3193 }
3194
3195 /*********************************************************************
3196  *
3197  *  Free all transmit rings.
3198  *
3199  **********************************************************************/
3200 static void
3201 ixgbe_free_transmit_structures(struct adapter *adapter)
3202 {
3203         struct tx_ring *txr = adapter->tx_rings;
3204
3205         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3206                 IXGBE_TX_LOCK(txr);
3207                 ixgbe_free_transmit_buffers(txr);
3208                 ixgbe_dma_free(adapter, &txr->txdma);
3209                 IXGBE_TX_UNLOCK(txr);
3210                 IXGBE_TX_LOCK_DESTROY(txr);
3211         }
3212         free(adapter->tx_rings, M_DEVBUF);
3213 }
3214
3215 /*********************************************************************
3216  *
3217  *  Free transmit ring related data structures.
3218  *
3219  **********************************************************************/
3220 static void
3221 ixgbe_free_transmit_buffers(struct tx_ring *txr)
3222 {
3223         struct adapter *adapter = txr->adapter;
3224         struct ixgbe_tx_buf *tx_buffer;
3225         int             i;
3226
3227         INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
3228
3229         if (txr->tx_buffers == NULL)
3230                 return;
3231
3232         tx_buffer = txr->tx_buffers;
3233         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3234                 if (tx_buffer->m_head != NULL) {
3235                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3236                             BUS_DMASYNC_POSTWRITE);
3237                         bus_dmamap_unload(txr->txtag,
3238                             tx_buffer->map);
3239                         m_freem(tx_buffer->m_head);
3240                         tx_buffer->m_head = NULL;
3241                         if (tx_buffer->map != NULL) {
3242                                 bus_dmamap_destroy(txr->txtag,
3243                                     tx_buffer->map);
3244                                 tx_buffer->map = NULL;
3245                         }
3246                 } else if (tx_buffer->map != NULL) {
3247                         bus_dmamap_unload(txr->txtag,
3248                             tx_buffer->map);
3249                         bus_dmamap_destroy(txr->txtag,
3250                             tx_buffer->map);
3251                         tx_buffer->map = NULL;
3252                 }
3253         }
3254 #ifdef IXGBE_LEGACY_TX
3255         if (txr->br != NULL)
3256                 buf_ring_free(txr->br, M_DEVBUF);
3257 #endif
3258         if (txr->tx_buffers != NULL) {
3259                 free(txr->tx_buffers, M_DEVBUF);
3260                 txr->tx_buffers = NULL;
3261         }
3262         if (txr->txtag != NULL) {
3263                 bus_dma_tag_destroy(txr->txtag);
3264                 txr->txtag = NULL;
3265         }
3266         return;
3267 }
3268
3269 /*********************************************************************
3270  *
3271  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3272  *
3273  **********************************************************************/
3274
3275 static int
3276 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3277     u32 *cmd_type_len, u32 *olinfo_status)
3278 {
3279         struct ixgbe_adv_tx_context_desc *TXD;
3280         struct ether_vlan_header *eh;
3281         struct ip *ip;
3282         struct ip6_hdr *ip6;
3283         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3284         int     ehdrlen, ip_hlen = 0;
3285         u16     etype;
3286         u8      ipproto = 0;
3287         int     offload = TRUE;
3288         int     ctxd = txr->next_avail_desc;
3289         u16     vtag = 0;
3290
3291         /* First check if TSO is to be used */
3292         if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3293                 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3294
3295         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3296                 offload = FALSE;
3297
3298         /* Indicate the whole packet as payload when not doing TSO */
3299         *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
3300
3301         /* Now ready a context descriptor */
3302         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3303
3304         /*
3305         ** In advanced descriptors the vlan tag must 
3306         ** be placed into the context descriptor. Hence
3307         ** we need to make one even if not doing offloads.
3308         */
3309         if (mp->m_flags & M_VLANTAG) {
3310                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3311                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3312         } else if (offload == FALSE) /* ... no offload to do */
3313                 return (0);
3314
3315         /*
3316          * Determine where frame payload starts.
3317          * Jump over vlan headers if already present,
3318          * helpful for QinQ too.
3319          */
3320         eh = mtod(mp, struct ether_vlan_header *);
3321         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3322                 etype = ntohs(eh->evl_proto);
3323                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3324         } else {
3325                 etype = ntohs(eh->evl_encap_proto);
3326                 ehdrlen = ETHER_HDR_LEN;
3327         }
3328
3329         /* Set the ether header length */
3330         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3331
3332         switch (etype) {
3333                 case ETHERTYPE_IP:
3334                         ip = (struct ip *)(mp->m_data + ehdrlen);
3335                         ip_hlen = ip->ip_hl << 2;
3336                         ipproto = ip->ip_p;
3337                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3338                         break;
3339                 case ETHERTYPE_IPV6:
3340                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3341                         ip_hlen = sizeof(struct ip6_hdr);
3342                         /* XXX-BZ this will go badly in case of ext hdrs. */
3343                         ipproto = ip6->ip6_nxt;
3344                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3345                         break;
3346                 default:
3347                         offload = FALSE;
3348                         break;
3349         }
3350
3351         vlan_macip_lens |= ip_hlen;
3352         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3353
3354         switch (ipproto) {
3355                 case IPPROTO_TCP:
3356                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3357                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3358                         break;
3359
3360                 case IPPROTO_UDP:
3361                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3362                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
3363                         break;
3364
3365 #if __FreeBSD_version >= 800000
3366                 case IPPROTO_SCTP:
3367                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3368                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
3369                         break;
3370 #endif
3371                 default:
3372                         offload = FALSE;
3373                         break;
3374         }
3375
3376         if (offload) /* For the TX descriptor setup */
3377                 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3378
3379         /* Now copy bits into descriptor */
3380         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3381         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3382         TXD->seqnum_seed = htole32(0);
3383         TXD->mss_l4len_idx = htole32(0);
3384
3385         /* We've consumed the first desc, adjust counters */
3386         if (++ctxd == txr->num_desc)
3387                 ctxd = 0;
3388         txr->next_avail_desc = ctxd;
3389         --txr->tx_avail;
3390
3391         return (0);
3392 }
3393
3394 /**********************************************************************
3395  *
3396  *  Setup work for hardware segmentation offload (TSO) on
3397  *  adapters using advanced tx descriptors
3398  *
3399  **********************************************************************/
3400 static int
3401 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3402     u32 *cmd_type_len, u32 *olinfo_status)
3403 {
3404         struct ixgbe_adv_tx_context_desc *TXD;
3405         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3406         u32 mss_l4len_idx = 0, paylen;
3407         u16 vtag = 0, eh_type;
3408         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3409         struct ether_vlan_header *eh;
3410 #ifdef INET6
3411         struct ip6_hdr *ip6;
3412 #endif
3413 #ifdef INET
3414         struct ip *ip;
3415 #endif
3416         struct tcphdr *th;
3417
3418
3419         /*
3420          * Determine where frame payload starts.
3421          * Jump over vlan headers if already present
3422          */
3423         eh = mtod(mp, struct ether_vlan_header *);
3424         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3425                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3426                 eh_type = eh->evl_proto;
3427         } else {
3428                 ehdrlen = ETHER_HDR_LEN;
3429                 eh_type = eh->evl_encap_proto;
3430         }
3431
3432         switch (ntohs(eh_type)) {
3433 #ifdef INET6
3434         case ETHERTYPE_IPV6:
3435                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3436                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3437                 if (ip6->ip6_nxt != IPPROTO_TCP)
3438                         return (ENXIO);
3439                 ip_hlen = sizeof(struct ip6_hdr);
3440                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3441                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3442                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3443                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3444                 break;
3445 #endif
3446 #ifdef INET
3447         case ETHERTYPE_IP:
3448                 ip = (struct ip *)(mp->m_data + ehdrlen);
3449                 if (ip->ip_p != IPPROTO_TCP)
3450                         return (ENXIO);
3451                 ip->ip_sum = 0;
3452                 ip_hlen = ip->ip_hl << 2;
3453                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3454                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3455                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3456                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3457                 /* Tell transmit desc to also do IPv4 checksum. */
3458                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3459                 break;
3460 #endif
3461         default:
3462                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3463                     __func__, ntohs(eh_type));
3464                 break;
3465         }
3466
3467         ctxd = txr->next_avail_desc;
3468         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3469
3470         tcp_hlen = th->th_off << 2;
3471
3472         /* This is used in the transmit desc in encap */
3473         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3474
3475         /* VLAN MACLEN IPLEN */
3476         if (mp->m_flags & M_VLANTAG) {
3477                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3478                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3479         }
3480
3481         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3482         vlan_macip_lens |= ip_hlen;
3483         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3484
3485         /* ADV DTYPE TUCMD */
3486         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3487         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3488         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3489
3490         /* MSS L4LEN IDX */
3491         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
3492         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
3493         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3494
3495         TXD->seqnum_seed = htole32(0);
3496
3497         if (++ctxd == txr->num_desc)
3498                 ctxd = 0;
3499
3500         txr->tx_avail--;
3501         txr->next_avail_desc = ctxd;
3502         *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
3503         *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3504         *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
3505         ++txr->tso_tx;
3506         return (0);
3507 }
3508
3509 #ifdef IXGBE_FDIR
3510 /*
3511 ** This routine parses packet headers so that Flow
3512 ** Director can make a hashed filter table entry 
3513 ** allowing traffic flows to be identified and kept
3514 ** on the same cpu.  This would be a performance
3515 ** hit, but we only do it at IXGBE_FDIR_RATE of
3516 ** packets.
3517 */
3518 static void
3519 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
3520 {
3521         struct adapter                  *adapter = txr->adapter;
3522         struct ix_queue                 *que;
3523         struct ip                       *ip;
3524         struct tcphdr                   *th;
3525         struct udphdr                   *uh;
3526         struct ether_vlan_header        *eh;
3527         union ixgbe_atr_hash_dword      input = {.dword = 0}; 
3528         union ixgbe_atr_hash_dword      common = {.dword = 0}; 
3529         int                             ehdrlen, ip_hlen;
3530         u16                             etype;
3531
3532         eh = mtod(mp, struct ether_vlan_header *);
3533         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3534                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3535                 etype = eh->evl_proto;
3536         } else {
3537                 ehdrlen = ETHER_HDR_LEN;
3538                 etype = eh->evl_encap_proto;
3539         }
3540
3541         /* Only handling IPv4 */
3542         if (etype != htons(ETHERTYPE_IP))
3543                 return;
3544
3545         ip = (struct ip *)(mp->m_data + ehdrlen);
3546         ip_hlen = ip->ip_hl << 2;
3547
3548         /* check if we're UDP or TCP */
3549         switch (ip->ip_p) {
3550         case IPPROTO_TCP:
3551                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3552                 /* src and dst are inverted */
3553                 common.port.dst ^= th->th_sport;
3554                 common.port.src ^= th->th_dport;
3555                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
3556                 break;
3557         case IPPROTO_UDP:
3558                 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
3559                 /* src and dst are inverted */
3560                 common.port.dst ^= uh->uh_sport;
3561                 common.port.src ^= uh->uh_dport;
3562                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
3563                 break;
3564         default:
3565                 return;
3566         }
3567
3568         input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
3569         if (mp->m_pkthdr.ether_vtag)
3570                 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
3571         else
3572                 common.flex_bytes ^= etype;
3573         common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
3574
3575         que = &adapter->queues[txr->me];
3576         /*
3577         ** This assumes the Rx queue and Tx
3578         ** queue are bound to the same CPU
3579         */
3580         ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
3581             input, common, que->msix);
3582 }
3583 #endif /* IXGBE_FDIR */
3584
3585 /**********************************************************************
3586  *
3587  *  Examine each tx_buffer in the used queue. If the hardware is done
3588  *  processing the packet then free associated resources. The
3589  *  tx_buffer is put back on the free queue.
3590  *
3591  **********************************************************************/
3592 static void
3593 ixgbe_txeof(struct tx_ring *txr)
3594 {
3595         struct adapter          *adapter = txr->adapter;
3596         struct ifnet            *ifp = adapter->ifp;
3597         u32                     work, processed = 0;
3598         u16                     limit = txr->process_limit;
3599         struct ixgbe_tx_buf     *buf;
3600         union ixgbe_adv_tx_desc *txd;
3601
3602         mtx_assert(&txr->tx_mtx, MA_OWNED);
3603
3604 #ifdef DEV_NETMAP
3605         if (ifp->if_capenable & IFCAP_NETMAP) {
3606                 struct netmap_adapter *na = NA(ifp);
3607                 struct netmap_kring *kring = &na->tx_rings[txr->me];
3608                 txd = txr->tx_base;
3609                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3610                     BUS_DMASYNC_POSTREAD);
3611                 /*
3612                  * In netmap mode, all the work is done in the context
3613                  * of the client thread. Interrupt handlers only wake up
3614                  * clients, which may be sleeping on individual rings
3615                  * or on a global resource for all rings.
3616                  * To implement tx interrupt mitigation, we wake up the client
3617                  * thread roughly every half ring, even if the NIC interrupts
3618                  * more frequently. This is implemented as follows:
3619                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
3620                  *   the slot that should wake up the thread (nkr_num_slots
3621                  *   means the user thread should not be woken up);
3622                  * - the driver ignores tx interrupts unless netmap_mitigate=0
3623                  *   or the slot has the DD bit set.
3624                  *
3625                  * When the driver has separate locks, we need to
3626                  * release and re-acquire txlock to avoid deadlocks.
3627                  * XXX see if we can find a better way.
3628                  */
3629                 if (!netmap_mitigate ||
3630                     (kring->nr_kflags < kring->nkr_num_slots &&
3631                     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
3632                         netmap_tx_irq(ifp, txr->me);
3633                 }
3634                 return;
3635         }
3636 #endif /* DEV_NETMAP */
3637
3638         if (txr->tx_avail == txr->num_desc) {
3639                 txr->queue_status = IXGBE_QUEUE_IDLE;
3640                 return;
3641         }
3642
3643         /* Get work starting point */
3644         work = txr->next_to_clean;
3645         buf = &txr->tx_buffers[work];
3646         txd = &txr->tx_base[work];
3647         work -= txr->num_desc; /* The distance to ring end */
3648         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3649             BUS_DMASYNC_POSTREAD);
3650
3651         do {
3652                 union ixgbe_adv_tx_desc *eop= buf->eop;
3653                 if (eop == NULL) /* No work */
3654                         break;
3655
3656                 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
3657                         break;  /* I/O not complete */
3658
3659                 if (buf->m_head) {
3660                         txr->bytes +=
3661                             buf->m_head->m_pkthdr.len;
3662                         bus_dmamap_sync(txr->txtag,
3663                             buf->map,
3664                             BUS_DMASYNC_POSTWRITE);
3665                         bus_dmamap_unload(txr->txtag,
3666                             buf->map);
3667                         m_freem(buf->m_head);
3668                         buf->m_head = NULL;
3669                         buf->map = NULL;
3670                 }
3671                 buf->eop = NULL;
3672                 ++txr->tx_avail;
3673
3674                 /* We clean the range if multi segment */
3675                 while (txd != eop) {
3676                         ++txd;
3677                         ++buf;
3678                         ++work;
3679                         /* wrap the ring? */
3680                         if (__predict_false(!work)) {
3681                                 work -= txr->num_desc;
3682                                 buf = txr->tx_buffers;
3683                                 txd = txr->tx_base;
3684                         }
3685                         if (buf->m_head) {
3686                                 txr->bytes +=
3687                                     buf->m_head->m_pkthdr.len;
3688                                 bus_dmamap_sync(txr->txtag,
3689                                     buf->map,
3690                                     BUS_DMASYNC_POSTWRITE);
3691                                 bus_dmamap_unload(txr->txtag,
3692                                     buf->map);
3693                                 m_freem(buf->m_head);
3694                                 buf->m_head = NULL;
3695                                 buf->map = NULL;
3696                         }
3697                         ++txr->tx_avail;
3698                         buf->eop = NULL;
3699
3700                 }
3701                 ++txr->packets;
3702                 ++processed;
3703                 ++ifp->if_opackets;
3704                 txr->watchdog_time = ticks;
3705
3706                 /* Try the next packet */
3707                 ++txd;
3708                 ++buf;
3709                 ++work;
3710                 /* reset with a wrap */
3711                 if (__predict_false(!work)) {
3712                         work -= txr->num_desc;
3713                         buf = txr->tx_buffers;
3714                         txd = txr->tx_base;
3715                 }
3716                 prefetch(txd);
3717         } while (__predict_true(--limit));
3718
3719         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3720             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3721
3722         work += txr->num_desc;
3723         txr->next_to_clean = work;
3724
3725         /*
3726         ** Watchdog calculation, we know there's
3727         ** work outstanding or the first return
3728         ** would have been taken, so none processed
3729         ** for too long indicates a hang.
3730         */
3731         if ((!processed) && ((ticks - txr->watchdog_time) > IXGBE_WATCHDOG))
3732                 txr->queue_status = IXGBE_QUEUE_HUNG;
3733
3734         if (txr->tx_avail == txr->num_desc)
3735                 txr->queue_status = IXGBE_QUEUE_IDLE;
3736
3737         return;
3738 }
3739
3740 /*********************************************************************
3741  *
3742  *  Refresh mbuf buffers for RX descriptor rings
3743  *   - now keeps its own state so discards due to resource
3744  *     exhaustion are unnecessary, if an mbuf cannot be obtained
3745  *     it just returns, keeping its placeholder, thus it can simply
3746  *     be recalled to try again.
3747  *
3748  **********************************************************************/
3749 static void
3750 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
3751 {
3752         struct adapter          *adapter = rxr->adapter;
3753         bus_dma_segment_t       seg[1];
3754         struct ixgbe_rx_buf     *rxbuf;
3755         struct mbuf             *mp;
3756         int                     i, j, nsegs, error;
3757         bool                    refreshed = FALSE;
3758
3759         i = j = rxr->next_to_refresh;
3760         /* Control the loop with one beyond */
3761         if (++j == rxr->num_desc)
3762                 j = 0;
3763
3764         while (j != limit) {
3765                 rxbuf = &rxr->rx_buffers[i];
3766                 if (rxbuf->buf == NULL) {
3767                         mp = m_getjcl(M_NOWAIT, MT_DATA,
3768                             M_PKTHDR, rxr->mbuf_sz);
3769                         if (mp == NULL)
3770                                 goto update;
3771                         if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3772                                 m_adj(mp, ETHER_ALIGN);
3773                 } else
3774                         mp = rxbuf->buf;
3775
3776                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
3777
3778                 /* If we're dealing with an mbuf that was copied rather
3779                  * than replaced, there's no need to go through busdma.
3780                  */
3781                 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
3782                         /* Get the memory mapping */
3783                         error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3784                             rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
3785                         if (error != 0) {
3786                                 printf("Refresh mbufs: payload dmamap load"
3787                                     " failure - %d\n", error);
3788                                 m_free(mp);
3789                                 rxbuf->buf = NULL;
3790                                 goto update;
3791                         }
3792                         rxbuf->buf = mp;
3793                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3794                             BUS_DMASYNC_PREREAD);
3795                         rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
3796                             htole64(seg[0].ds_addr);
3797                 } else {
3798                         rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
3799                         rxbuf->flags &= ~IXGBE_RX_COPY;
3800                 }
3801
3802                 refreshed = TRUE;
3803                 /* Next is precalculated */
3804                 i = j;
3805                 rxr->next_to_refresh = i;
3806                 if (++j == rxr->num_desc)
3807                         j = 0;
3808         }
3809 update:
3810         if (refreshed) /* Update hardware tail index */
3811                 IXGBE_WRITE_REG(&adapter->hw,
3812                     IXGBE_RDT(rxr->me), rxr->next_to_refresh);
3813         return;
3814 }
3815
3816 /*********************************************************************
3817  *
3818  *  Allocate memory for rx_buffer structures. Since we use one
3819  *  rx_buffer per received packet, the maximum number of rx_buffer's
3820  *  that we'll need is equal to the number of receive descriptors
3821  *  that we've allocated.
3822  *
3823  **********************************************************************/
3824 static int
3825 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
3826 {
3827         struct  adapter         *adapter = rxr->adapter;
3828         device_t                dev = adapter->dev;
3829         struct ixgbe_rx_buf     *rxbuf;
3830         int                     i, bsize, error;
3831
3832         bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
3833         if (!(rxr->rx_buffers =
3834             (struct ixgbe_rx_buf *) malloc(bsize,
3835             M_DEVBUF, M_NOWAIT | M_ZERO))) {
3836                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3837                 error = ENOMEM;
3838                 goto fail;
3839         }
3840
3841         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),   /* parent */
3842                                    1, 0,        /* alignment, bounds */
3843                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3844                                    BUS_SPACE_MAXADDR,   /* highaddr */
3845                                    NULL, NULL,          /* filter, filterarg */
3846                                    MJUM16BYTES,         /* maxsize */
3847                                    1,                   /* nsegments */
3848                                    MJUM16BYTES,         /* maxsegsize */
3849                                    0,                   /* flags */
3850                                    NULL,                /* lockfunc */
3851                                    NULL,                /* lockfuncarg */
3852                                    &rxr->ptag))) {
3853                 device_printf(dev, "Unable to create RX DMA tag\n");
3854                 goto fail;
3855         }
3856
3857         for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
3858                 rxbuf = &rxr->rx_buffers[i];
3859                 error = bus_dmamap_create(rxr->ptag,
3860                     BUS_DMA_NOWAIT, &rxbuf->pmap);
3861                 if (error) {
3862                         device_printf(dev, "Unable to create RX dma map\n");
3863                         goto fail;
3864                 }
3865         }
3866
3867         return (0);
3868
3869 fail:
3870         /* Frees all, but can handle partial completion */
3871         ixgbe_free_receive_structures(adapter);
3872         return (error);
3873 }
3874
3875 /*
3876 ** Used to detect a descriptor that has
3877 ** been merged by Hardware RSC.
3878 */
3879 static inline u32
3880 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
3881 {
3882         return (le32toh(rx->wb.lower.lo_dword.data) &
3883             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
3884 }
3885
3886 /*********************************************************************
3887  *
3888  *  Initialize Hardware RSC (LRO) feature on 82599
3889  *  for an RX ring, this is toggled by the LRO capability
3890  *  even though it is transparent to the stack.
3891  *
3892  *  NOTE: since this HW feature only works with IPV4 and 
3893  *        our testing has shown soft LRO to be as effective
3894  *        I have decided to disable this by default.
3895  *
3896  **********************************************************************/
3897 static void
3898 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
3899 {
3900         struct  adapter         *adapter = rxr->adapter;
3901         struct  ixgbe_hw        *hw = &adapter->hw;
3902         u32                     rscctrl, rdrxctl;
3903
3904         /* If turning LRO/RSC off we need to disable it */
3905         if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
3906                 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
3907                 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
3908                 return;
3909         }
3910
3911         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
3912         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
3913 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
3914         if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
3915 #endif /* DEV_NETMAP */
3916         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
3917         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
3918         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
3919
3920         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
3921         rscctrl |= IXGBE_RSCCTL_RSCEN;
3922         /*
3923         ** Limit the total number of descriptors that
3924         ** can be combined, so it does not exceed 64K
3925         */
3926         if (rxr->mbuf_sz == MCLBYTES)
3927                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
3928         else if (rxr->mbuf_sz == MJUMPAGESIZE)
3929                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
3930         else if (rxr->mbuf_sz == MJUM9BYTES)
3931                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
3932         else  /* Using 16K cluster */
3933                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
3934
3935         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
3936
3937         /* Enable TCP header recognition */
3938         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
3939             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
3940             IXGBE_PSRTYPE_TCPHDR));
3941
3942         /* Disable RSC for ACK packets */
3943         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
3944             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
3945
3946         rxr->hw_rsc = TRUE;
3947 }
3948
3949
3950 static void     
3951 ixgbe_free_receive_ring(struct rx_ring *rxr)
3952
3953         struct ixgbe_rx_buf       *rxbuf;
3954         int i;
3955
3956         for (i = 0; i < rxr->num_desc; i++) {
3957                 rxbuf = &rxr->rx_buffers[i];
3958                 if (rxbuf->buf != NULL) {
3959                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3960                             BUS_DMASYNC_POSTREAD);
3961                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3962                         rxbuf->buf->m_flags |= M_PKTHDR;
3963                         m_freem(rxbuf->buf);
3964                         rxbuf->buf = NULL;
3965                         rxbuf->flags = 0;
3966                 }
3967         }
3968 }
3969
3970
3971 /*********************************************************************
3972  *
3973  *  Initialize a receive ring and its buffers.
3974  *
3975  **********************************************************************/
3976 static int
3977 ixgbe_setup_receive_ring(struct rx_ring *rxr)
3978 {
3979         struct  adapter         *adapter;
3980         struct ifnet            *ifp;
3981         device_t                dev;
3982         struct ixgbe_rx_buf     *rxbuf;
3983         bus_dma_segment_t       seg[1];
3984         struct lro_ctrl         *lro = &rxr->lro;
3985         int                     rsize, nsegs, error = 0;
3986 #ifdef DEV_NETMAP
3987         struct netmap_adapter *na = NA(rxr->adapter->ifp);
3988         struct netmap_slot *slot;
3989 #endif /* DEV_NETMAP */
3990
3991         adapter = rxr->adapter;
3992         ifp = adapter->ifp;
3993         dev = adapter->dev;
3994
3995         /* Clear the ring contents */
3996         IXGBE_RX_LOCK(rxr);
3997 #ifdef DEV_NETMAP
3998         /* same as in ixgbe_setup_transmit_ring() */
3999         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4000 #endif /* DEV_NETMAP */
4001         rsize = roundup2(adapter->num_rx_desc *
4002             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
4003         bzero((void *)rxr->rx_base, rsize);
4004         /* Cache the size */
4005         rxr->mbuf_sz = adapter->rx_mbuf_sz;
4006
4007         /* Free current RX buffer structs and their mbufs */
4008         ixgbe_free_receive_ring(rxr);
4009
4010         /* Now replenish the mbufs */
4011         for (int j = 0; j != rxr->num_desc; ++j) {
4012                 struct mbuf     *mp;
4013
4014                 rxbuf = &rxr->rx_buffers[j];
4015 #ifdef DEV_NETMAP
4016                 /*
4017                  * In netmap mode, fill the map and set the buffer
4018                  * address in the NIC ring, considering the offset
4019                  * between the netmap and NIC rings (see comment in
4020                  * ixgbe_setup_transmit_ring() ). No need to allocate
4021                  * an mbuf, so end the block with a continue;
4022                  */
4023                 if (slot) {
4024                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4025                         uint64_t paddr;
4026                         void *addr;
4027
4028                         addr = PNMB(na, slot + sj, &paddr);
4029                         netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4030                         /* Update descriptor and the cached value */
4031                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4032                         rxbuf->addr = htole64(paddr);
4033                         continue;
4034                 }
4035 #endif /* DEV_NETMAP */
4036                 rxbuf->flags = 0; 
4037                 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
4038                     M_PKTHDR, adapter->rx_mbuf_sz);
4039                 if (rxbuf->buf == NULL) {
4040                         error = ENOBUFS;
4041                         goto fail;
4042                 }
4043                 mp = rxbuf->buf;
4044                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
4045                 /* Get the memory mapping */
4046                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4047                     rxbuf->pmap, mp, seg,
4048                     &nsegs, BUS_DMA_NOWAIT);
4049                 if (error != 0)
4050                         goto fail;
4051                 bus_dmamap_sync(rxr->ptag,
4052                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4053                 /* Update the descriptor and the cached value */
4054                 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
4055                 rxbuf->addr = htole64(seg[0].ds_addr);
4056         }
4057
4058
4059         /* Setup our descriptor indices */
4060         rxr->next_to_check = 0;
4061         rxr->next_to_refresh = 0;
4062         rxr->lro_enabled = FALSE;
4063         rxr->rx_copies = 0;
4064         rxr->rx_bytes = 0;
4065         rxr->vtag_strip = FALSE;
4066
4067         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4068             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4069
4070         /*
4071         ** Now set up the LRO interface:
4072         */
4073         if (ixgbe_rsc_enable)
4074                 ixgbe_setup_hw_rsc(rxr);
4075         else if (ifp->if_capenable & IFCAP_LRO) {
4076                 int err = tcp_lro_init(lro);
4077                 if (err) {
4078                         device_printf(dev, "LRO Initialization failed!\n");
4079                         goto fail;
4080                 }
4081                 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
4082                 rxr->lro_enabled = TRUE;
4083                 lro->ifp = adapter->ifp;
4084         }
4085
4086         IXGBE_RX_UNLOCK(rxr);
4087         return (0);
4088
4089 fail:
4090         ixgbe_free_receive_ring(rxr);
4091         IXGBE_RX_UNLOCK(rxr);
4092         return (error);
4093 }
4094
4095 /*********************************************************************
4096  *
4097  *  Initialize all receive rings.
4098  *
4099  **********************************************************************/
4100 static int
4101 ixgbe_setup_receive_structures(struct adapter *adapter)
4102 {
4103         struct rx_ring *rxr = adapter->rx_rings;
4104         int j;
4105
4106         for (j = 0; j < adapter->num_queues; j++, rxr++)
4107                 if (ixgbe_setup_receive_ring(rxr))
4108                         goto fail;
4109
4110         return (0);
4111 fail:
4112         /*
4113          * Free RX buffers allocated so far, we will only handle
4114          * the rings that completed, the failing case will have
4115          * cleaned up for itself. 'j' failed, so its the terminus.
4116          */
4117         for (int i = 0; i < j; ++i) {
4118                 rxr = &adapter->rx_rings[i];
4119                 ixgbe_free_receive_ring(rxr);
4120         }
4121
4122         return (ENOBUFS);
4123 }
4124
4125 /*********************************************************************
4126  *
4127  *  Setup receive registers and features.
4128  *
4129  **********************************************************************/
4130 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
4131
4132 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
4133         
4134 static void
4135 ixgbe_initialize_receive_units(struct adapter *adapter)
4136 {
4137         struct  rx_ring *rxr = adapter->rx_rings;
4138         struct ixgbe_hw *hw = &adapter->hw;
4139         struct ifnet   *ifp = adapter->ifp;
4140         u32             bufsz, rxctrl, fctrl, srrctl, rxcsum;
4141         u32             reta, mrqc = 0, hlreg, random[10];
4142
4143
4144         /*
4145          * Make sure receives are disabled while
4146          * setting up the descriptor ring
4147          */
4148         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4149         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
4150             rxctrl & ~IXGBE_RXCTRL_RXEN);
4151
4152         /* Enable broadcasts */
4153         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4154         fctrl |= IXGBE_FCTRL_BAM;
4155         fctrl |= IXGBE_FCTRL_DPF;
4156         fctrl |= IXGBE_FCTRL_PMCF;
4157         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4158
4159         /* Set for Jumbo Frames? */
4160         hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4161         if (ifp->if_mtu > ETHERMTU)
4162                 hlreg |= IXGBE_HLREG0_JUMBOEN;
4163         else
4164                 hlreg &= ~IXGBE_HLREG0_JUMBOEN;
4165 #ifdef DEV_NETMAP
4166         /* crcstrip is conditional in netmap (in RDRXCTL too ?) */
4167         if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
4168                 hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
4169         else
4170                 hlreg |= IXGBE_HLREG0_RXCRCSTRP;
4171 #endif /* DEV_NETMAP */
4172         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
4173
4174         bufsz = (adapter->rx_mbuf_sz +
4175             BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
4176
4177         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4178                 u64 rdba = rxr->rxdma.dma_paddr;
4179
4180                 /* Setup the Base and Length of the Rx Descriptor Ring */
4181                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
4182                                (rdba & 0x00000000ffffffffULL));
4183                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
4184                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
4185                     adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4186
4187                 /* Set up the SRRCTL register */
4188                 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
4189                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4190                 srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
4191                 srrctl |= bufsz;
4192                 srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4193
4194                 /*
4195                  * Set DROP_EN iff we have no flow control and >1 queue.
4196                  * Note that srrctl was cleared shortly before during reset,
4197                  * so we do not need to clear the bit, but do it just in case
4198                  * this code is moved elsewhere.
4199                  */
4200                 if (adapter->num_queues > 1 &&
4201                     adapter->hw.fc.requested_mode == ixgbe_fc_none) {
4202                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4203                 } else {
4204                         srrctl &= ~IXGBE_SRRCTL_DROP_EN;
4205                 }
4206
4207                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
4208
4209                 /* Setup the HW Rx Head and Tail Descriptor Pointers */
4210                 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
4211                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
4212
4213                 /* Set the processing limit */
4214                 rxr->process_limit = ixgbe_rx_process_limit;
4215         }
4216
4217         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
4218                 u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
4219                               IXGBE_PSRTYPE_UDPHDR |
4220                               IXGBE_PSRTYPE_IPV4HDR |
4221                               IXGBE_PSRTYPE_IPV6HDR;
4222                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
4223         }
4224
4225         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4226
4227         /* Setup RSS */
4228         if (adapter->num_queues > 1) {
4229                 int i, j;
4230                 reta = 0;
4231
4232                 /* set up random bits */
4233                 arc4rand(&random, sizeof(random), 0);
4234
4235                 /* Set up the redirection table */
4236                 for (i = 0, j = 0; i < 128; i++, j++) {
4237                         if (j == adapter->num_queues) j = 0;
4238                         reta = (reta << 8) | (j * 0x11);
4239                         if ((i & 3) == 3)
4240                                 IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
4241                 }
4242
4243                 /* Now fill our hash function seeds */
4244                 for (int i = 0; i < 10; i++)
4245                         IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), random[i]);
4246
4247                 /* Perform hash on these packet types */
4248                 mrqc = IXGBE_MRQC_RSSEN
4249                      | IXGBE_MRQC_RSS_FIELD_IPV4
4250                      | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
4251                      | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
4252                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
4253                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX
4254                      | IXGBE_MRQC_RSS_FIELD_IPV6
4255                      | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
4256                      | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
4257                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
4258                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4259
4260                 /* RSS and RX IPP Checksum are mutually exclusive */
4261                 rxcsum |= IXGBE_RXCSUM_PCSD;
4262         }
4263
4264         if (ifp->if_capenable & IFCAP_RXCSUM)
4265                 rxcsum |= IXGBE_RXCSUM_PCSD;
4266
4267         if (!(rxcsum & IXGBE_RXCSUM_PCSD))
4268                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4269
4270         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4271
4272         return;
4273 }
4274
4275 /*********************************************************************
4276  *
4277  *  Free all receive rings.
4278  *
4279  **********************************************************************/
4280 static void
4281 ixgbe_free_receive_structures(struct adapter *adapter)
4282 {
4283         struct rx_ring *rxr = adapter->rx_rings;
4284
4285         INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
4286
4287         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4288                 struct lro_ctrl         *lro = &rxr->lro;
4289                 ixgbe_free_receive_buffers(rxr);
4290                 /* Free LRO memory */
4291                 tcp_lro_free(lro);
4292                 /* Free the ring memory as well */
4293                 ixgbe_dma_free(adapter, &rxr->rxdma);
4294         }
4295
4296         free(adapter->rx_rings, M_DEVBUF);
4297 }
4298
4299
4300 /*********************************************************************
4301  *
4302  *  Free receive ring data structures
4303  *
4304  **********************************************************************/
4305 static void
4306 ixgbe_free_receive_buffers(struct rx_ring *rxr)
4307 {
4308         struct adapter          *adapter = rxr->adapter;
4309         struct ixgbe_rx_buf     *rxbuf;
4310
4311         INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
4312
4313         /* Cleanup any existing buffers */
4314         if (rxr->rx_buffers != NULL) {
4315                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4316                         rxbuf = &rxr->rx_buffers[i];
4317                         if (rxbuf->buf != NULL) {
4318                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4319                                     BUS_DMASYNC_POSTREAD);
4320                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4321                                 rxbuf->buf->m_flags |= M_PKTHDR;
4322                                 m_freem(rxbuf->buf);
4323                         }
4324                         rxbuf->buf = NULL;
4325                         if (rxbuf->pmap != NULL) {
4326                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4327                                 rxbuf->pmap = NULL;
4328                         }
4329                 }
4330                 if (rxr->rx_buffers != NULL) {
4331                         free(rxr->rx_buffers, M_DEVBUF);
4332                         rxr->rx_buffers = NULL;
4333                 }
4334         }
4335
4336         if (rxr->ptag != NULL) {
4337                 bus_dma_tag_destroy(rxr->ptag);
4338                 rxr->ptag = NULL;
4339         }
4340
4341         return;
4342 }
4343
4344 static __inline void
4345 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4346 {
4347                  
4348         /*
4349          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
4350          * should be computed by hardware. Also it should not have VLAN tag in
4351          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
4352          */
4353         if (rxr->lro_enabled &&
4354             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4355             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4356             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4357             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
4358             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4359             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
4360             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4361             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4362                 /*
4363                  * Send to the stack if:
4364                  **  - LRO not enabled, or
4365                  **  - no LRO resources, or
4366                  **  - lro enqueue fails
4367                  */
4368                 if (rxr->lro.lro_cnt != 0)
4369                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4370                                 return;
4371         }
4372         IXGBE_RX_UNLOCK(rxr);
4373         (*ifp->if_input)(ifp, m);
4374         IXGBE_RX_LOCK(rxr);
4375 }
4376
4377 static __inline void
4378 ixgbe_rx_discard(struct rx_ring *rxr, int i)
4379 {
4380         struct ixgbe_rx_buf     *rbuf;
4381
4382         rbuf = &rxr->rx_buffers[i];
4383
4384
4385         /*
4386         ** With advanced descriptors the writeback
4387         ** clobbers the buffer addrs, so its easier
4388         ** to just free the existing mbufs and take
4389         ** the normal refresh path to get new buffers
4390         ** and mapping.
4391         */
4392
4393         if (rbuf->fmp != NULL) {/* Partial chain ? */
4394                 rbuf->fmp->m_flags |= M_PKTHDR;
4395                 m_freem(rbuf->fmp);
4396                 rbuf->fmp = NULL;
4397                 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
4398         } else if (rbuf->buf) {
4399                 m_free(rbuf->buf);
4400                 rbuf->buf = NULL;
4401         }
4402
4403         rbuf->flags = 0;
4404  
4405         return;
4406 }
4407
4408
4409 /*********************************************************************
4410  *
4411  *  This routine executes in interrupt context. It replenishes
4412  *  the mbufs in the descriptor and sends data which has been
4413  *  dma'ed into host memory to upper layer.
4414  *
4415  *  We loop at most count times if count is > 0, or until done if
4416  *  count < 0.
4417  *
4418  *  Return TRUE for more work, FALSE for all clean.
4419  *********************************************************************/
4420 static bool
4421 ixgbe_rxeof(struct ix_queue *que)
4422 {
4423         struct adapter          *adapter = que->adapter;
4424         struct rx_ring          *rxr = que->rxr;
4425         struct ifnet            *ifp = adapter->ifp;
4426         struct lro_ctrl         *lro = &rxr->lro;
4427         struct lro_entry        *queued;
4428         int                     i, nextp, processed = 0;
4429         u32                     staterr = 0;
4430         u16                     count = rxr->process_limit;
4431         union ixgbe_adv_rx_desc *cur;
4432         struct ixgbe_rx_buf     *rbuf, *nbuf;
4433
4434         IXGBE_RX_LOCK(rxr);
4435
4436 #ifdef DEV_NETMAP
4437         /* Same as the txeof routine: wakeup clients on intr. */
4438         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4439                 IXGBE_RX_UNLOCK(rxr);
4440                 return (FALSE);
4441         }
4442 #endif /* DEV_NETMAP */
4443
4444         for (i = rxr->next_to_check; count != 0;) {
4445                 struct mbuf     *sendmp, *mp;
4446                 u32             rsc, ptype;
4447                 u16             len;
4448                 u16             vtag = 0;
4449                 bool            eop;
4450  
4451                 /* Sync the ring. */
4452                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4453                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4454
4455                 cur = &rxr->rx_base[i];
4456                 staterr = le32toh(cur->wb.upper.status_error);
4457
4458                 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
4459                         break;
4460                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4461                         break;
4462
4463                 count--;
4464                 sendmp = NULL;
4465                 nbuf = NULL;
4466                 rsc = 0;
4467                 cur->wb.upper.status_error = 0;
4468                 rbuf = &rxr->rx_buffers[i];
4469                 mp = rbuf->buf;
4470
4471                 len = le16toh(cur->wb.upper.length);
4472                 ptype = le32toh(cur->wb.lower.lo_dword.data) &
4473                     IXGBE_RXDADV_PKTTYPE_MASK;
4474                 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
4475
4476                 /* Make sure bad packets are discarded */
4477                 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
4478                         rxr->rx_discarded++;
4479                         ixgbe_rx_discard(rxr, i);
4480                         goto next_desc;
4481                 }
4482
4483                 /*
4484                 ** On 82599 which supports a hardware
4485                 ** LRO (called HW RSC), packets need
4486                 ** not be fragmented across sequential
4487                 ** descriptors, rather the next descriptor
4488                 ** is indicated in bits of the descriptor.
4489                 ** This also means that we might proceses
4490                 ** more than one packet at a time, something
4491                 ** that has never been true before, it
4492                 ** required eliminating global chain pointers
4493                 ** in favor of what we are doing here.  -jfv
4494                 */
4495                 if (!eop) {
4496                         /*
4497                         ** Figure out the next descriptor
4498                         ** of this frame.
4499                         */
4500                         if (rxr->hw_rsc == TRUE) {
4501                                 rsc = ixgbe_rsc_count(cur);
4502                                 rxr->rsc_num += (rsc - 1);
4503                         }
4504                         if (rsc) { /* Get hardware index */
4505                                 nextp = ((staterr &
4506                                     IXGBE_RXDADV_NEXTP_MASK) >>
4507                                     IXGBE_RXDADV_NEXTP_SHIFT);
4508                         } else { /* Just sequential */
4509                                 nextp = i + 1;
4510                                 if (nextp == adapter->num_rx_desc)
4511                                         nextp = 0;
4512                         }
4513                         nbuf = &rxr->rx_buffers[nextp];
4514                         prefetch(nbuf);
4515                 }
4516                 /*
4517                 ** Rather than using the fmp/lmp global pointers
4518                 ** we now keep the head of a packet chain in the
4519                 ** buffer struct and pass this along from one
4520                 ** descriptor to the next, until we get EOP.
4521                 */
4522                 mp->m_len = len;
4523                 /*
4524                 ** See if there is a stored head
4525                 ** that determines what we are
4526                 */
4527                 sendmp = rbuf->fmp;
4528                 if (sendmp != NULL) {  /* secondary frag */
4529                         rbuf->buf = rbuf->fmp = NULL;
4530                         mp->m_flags &= ~M_PKTHDR;
4531                         sendmp->m_pkthdr.len += mp->m_len;
4532                 } else {
4533                         /*
4534                          * Optimize.  This might be a small packet,
4535                          * maybe just a TCP ACK.  Do a fast copy that
4536                          * is cache aligned into a new mbuf, and
4537                          * leave the old mbuf+cluster for re-use.
4538                          */
4539                         if (eop && len <= IXGBE_RX_COPY_LEN) {
4540                                 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
4541                                 if (sendmp != NULL) {
4542                                         sendmp->m_data +=
4543                                             IXGBE_RX_COPY_ALIGN;
4544                                         ixgbe_bcopy(mp->m_data,
4545                                             sendmp->m_data, len);
4546                                         sendmp->m_len = len;
4547                                         rxr->rx_copies++;
4548                                         rbuf->flags |= IXGBE_RX_COPY;
4549                                 }
4550                         }
4551                         if (sendmp == NULL) {
4552                                 rbuf->buf = rbuf->fmp = NULL;
4553                                 sendmp = mp;
4554                         }
4555
4556                         /* first desc of a non-ps chain */
4557                         sendmp->m_flags |= M_PKTHDR;
4558                         sendmp->m_pkthdr.len = mp->m_len;
4559                 }
4560                 ++processed;
4561
4562                 /* Pass the head pointer on */
4563                 if (eop == 0) {
4564                         nbuf->fmp = sendmp;
4565                         sendmp = NULL;
4566                         mp->m_next = nbuf->buf;
4567                 } else { /* Sending this frame */
4568                         sendmp->m_pkthdr.rcvif = ifp;
4569                         ifp->if_ipackets++;
4570                         rxr->rx_packets++;
4571                         /* capture data for AIM */
4572                         rxr->bytes += sendmp->m_pkthdr.len;
4573                         rxr->rx_bytes += sendmp->m_pkthdr.len;
4574                         /* Process vlan info */
4575                         if ((rxr->vtag_strip) &&
4576                             (staterr & IXGBE_RXD_STAT_VP))
4577                                 vtag = le16toh(cur->wb.upper.vlan);
4578                         if (vtag) {
4579                                 sendmp->m_pkthdr.ether_vtag = vtag;
4580                                 sendmp->m_flags |= M_VLANTAG;
4581                         }
4582                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4583                                 ixgbe_rx_checksum(staterr, sendmp, ptype);
4584 #if __FreeBSD_version >= 800000
4585                         sendmp->m_pkthdr.flowid = que->msix;
4586                         M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
4587 #endif
4588                 }
4589 next_desc:
4590                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4591                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4592
4593                 /* Advance our pointers to the next descriptor. */
4594                 if (++i == rxr->num_desc)
4595                         i = 0;
4596
4597                 /* Now send to the stack or do LRO */
4598                 if (sendmp != NULL) {
4599                         rxr->next_to_check = i;
4600                         ixgbe_rx_input(rxr, ifp, sendmp, ptype);
4601                         i = rxr->next_to_check;
4602                 }
4603
4604                /* Every 8 descriptors we go to refresh mbufs */
4605                 if (processed == 8) {
4606                         ixgbe_refresh_mbufs(rxr, i);
4607                         processed = 0;
4608                 }
4609         }
4610
4611         /* Refresh any remaining buf structs */
4612         if (ixgbe_rx_unrefreshed(rxr))
4613                 ixgbe_refresh_mbufs(rxr, i);
4614
4615         rxr->next_to_check = i;
4616
4617         /*
4618          * Flush any outstanding LRO work
4619          */
4620         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4621                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4622                 tcp_lro_flush(lro, queued);
4623         }
4624
4625         IXGBE_RX_UNLOCK(rxr);
4626
4627         /*
4628         ** Still have cleaning to do?
4629         */
4630         if ((staterr & IXGBE_RXD_STAT_DD) != 0)
4631                 return (TRUE);
4632         else
4633                 return (FALSE);
4634 }
4635
4636
4637 /*********************************************************************
4638  *
4639  *  Verify that the hardware indicated that the checksum is valid.
4640  *  Inform the stack about the status of checksum so that stack
4641  *  doesn't spend time verifying the checksum.
4642  *
4643  *********************************************************************/
4644 static void
4645 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
4646 {
4647         u16     status = (u16) staterr;
4648         u8      errors = (u8) (staterr >> 24);
4649         bool    sctp = FALSE;
4650
4651         if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4652             (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
4653                 sctp = TRUE;
4654
4655         if (status & IXGBE_RXD_STAT_IPCS) {
4656                 if (!(errors & IXGBE_RXD_ERR_IPE)) {
4657                         /* IP Checksum Good */
4658                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4659                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4660
4661                 } else
4662                         mp->m_pkthdr.csum_flags = 0;
4663         }
4664         if (status & IXGBE_RXD_STAT_L4CS) {
4665                 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4666 #if __FreeBSD_version >= 800000
4667                 if (sctp)
4668                         type = CSUM_SCTP_VALID;
4669 #endif
4670                 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
4671                         mp->m_pkthdr.csum_flags |= type;
4672                         if (!sctp)
4673                                 mp->m_pkthdr.csum_data = htons(0xffff);
4674                 } 
4675         }
4676         return;
4677 }
4678
4679
4680 /*
4681 ** This routine is run via an vlan config EVENT,
4682 ** it enables us to use the HW Filter table since
4683 ** we can get the vlan id. This just creates the
4684 ** entry in the soft version of the VFTA, init will
4685 ** repopulate the real table.
4686 */
4687 static void
4688 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4689 {
4690         struct adapter  *adapter = ifp->if_softc;
4691         u16             index, bit;
4692
4693         if (ifp->if_softc !=  arg)   /* Not our event */
4694                 return;
4695
4696         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4697                 return;
4698
4699         IXGBE_CORE_LOCK(adapter);
4700         index = (vtag >> 5) & 0x7F;
4701         bit = vtag & 0x1F;
4702         adapter->shadow_vfta[index] |= (1 << bit);
4703         ++adapter->num_vlans;
4704         ixgbe_setup_vlan_hw_support(adapter);
4705         IXGBE_CORE_UNLOCK(adapter);
4706 }
4707
4708 /*
4709 ** This routine is run via an vlan
4710 ** unconfig EVENT, remove our entry
4711 ** in the soft vfta.
4712 */
4713 static void
4714 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4715 {
4716         struct adapter  *adapter = ifp->if_softc;
4717         u16             index, bit;
4718
4719         if (ifp->if_softc !=  arg)
4720                 return;
4721
4722         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4723                 return;
4724
4725         IXGBE_CORE_LOCK(adapter);
4726         index = (vtag >> 5) & 0x7F;
4727         bit = vtag & 0x1F;
4728         adapter->shadow_vfta[index] &= ~(1 << bit);
4729         --adapter->num_vlans;
4730         /* Re-init to load the changes */
4731         ixgbe_setup_vlan_hw_support(adapter);
4732         IXGBE_CORE_UNLOCK(adapter);
4733 }
4734
4735 static void
4736 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
4737 {
4738         struct ifnet    *ifp = adapter->ifp;
4739         struct ixgbe_hw *hw = &adapter->hw;
4740         struct rx_ring  *rxr;
4741         u32             ctrl;
4742
4743
4744         /*
4745         ** We get here thru init_locked, meaning
4746         ** a soft reset, this has already cleared
4747         ** the VFTA and other state, so if there
4748         ** have been no vlan's registered do nothing.
4749         */
4750         if (adapter->num_vlans == 0)
4751                 return;
4752
4753         /* Setup the queues for vlans */
4754         for (int i = 0; i < adapter->num_queues; i++) {
4755                 rxr = &adapter->rx_rings[i];
4756                 /* On 82599 the VLAN enable is per/queue in RXDCTL */
4757                 if (hw->mac.type != ixgbe_mac_82598EB) {
4758                         ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
4759                         ctrl |= IXGBE_RXDCTL_VME;
4760                         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
4761                 }
4762                 rxr->vtag_strip = TRUE;
4763         }
4764
4765         if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
4766                 return;
4767         /*
4768         ** A soft reset zero's out the VFTA, so
4769         ** we need to repopulate it now.
4770         */
4771         for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
4772                 if (adapter->shadow_vfta[i] != 0)
4773                         IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
4774                             adapter->shadow_vfta[i]);
4775
4776         ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4777         /* Enable the Filter Table if enabled */
4778         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
4779                 ctrl &= ~IXGBE_VLNCTRL_CFIEN;
4780                 ctrl |= IXGBE_VLNCTRL_VFE;
4781         }
4782         if (hw->mac.type == ixgbe_mac_82598EB)
4783                 ctrl |= IXGBE_VLNCTRL_VME;
4784         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
4785 }
4786
4787 static void
4788 ixgbe_enable_intr(struct adapter *adapter)
4789 {
4790         struct ixgbe_hw *hw = &adapter->hw;
4791         struct ix_queue *que = adapter->queues;
4792         u32             mask, fwsm;
4793
4794         mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
4795         /* Enable Fan Failure detection */
4796         if (hw->device_id == IXGBE_DEV_ID_82598AT)
4797                     mask |= IXGBE_EIMS_GPI_SDP1;
4798
4799         switch (adapter->hw.mac.type) {
4800                 case ixgbe_mac_82599EB:
4801                         mask |= IXGBE_EIMS_ECC;
4802                         mask |= IXGBE_EIMS_GPI_SDP0;
4803                         mask |= IXGBE_EIMS_GPI_SDP1;
4804                         mask |= IXGBE_EIMS_GPI_SDP2;
4805 #ifdef IXGBE_FDIR
4806                         mask |= IXGBE_EIMS_FLOW_DIR;
4807 #endif
4808                         break;
4809                 case ixgbe_mac_X540:
4810                         mask |= IXGBE_EIMS_ECC;
4811                         /* Detect if Thermal Sensor is enabled */
4812                         fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
4813                         if (fwsm & IXGBE_FWSM_TS_ENABLED)
4814                                 mask |= IXGBE_EIMS_TS;
4815 #ifdef IXGBE_FDIR
4816                         mask |= IXGBE_EIMS_FLOW_DIR;
4817 #endif
4818                 /* falls through */
4819                 default:
4820                         break;
4821         }
4822
4823         IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
4824
4825         /* With RSS we use auto clear */
4826         if (adapter->msix_mem) {
4827                 mask = IXGBE_EIMS_ENABLE_MASK;
4828                 /* Don't autoclear Link */
4829                 mask &= ~IXGBE_EIMS_OTHER;
4830                 mask &= ~IXGBE_EIMS_LSC;
4831                 IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
4832         }
4833
4834         /*
4835         ** Now enable all queues, this is done separately to
4836         ** allow for handling the extended (beyond 32) MSIX
4837         ** vectors that can be used by 82599
4838         */
4839         for (int i = 0; i < adapter->num_queues; i++, que++)
4840                 ixgbe_enable_queue(adapter, que->msix);
4841
4842         IXGBE_WRITE_FLUSH(hw);
4843
4844         return;
4845 }
4846
4847 static void
4848 ixgbe_disable_intr(struct adapter *adapter)
4849 {
4850         if (adapter->msix_mem)
4851                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
4852         if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
4853                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
4854         } else {
4855                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
4856                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
4857                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
4858         }
4859         IXGBE_WRITE_FLUSH(&adapter->hw);
4860         return;
4861 }
4862
4863 u16
4864 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
4865 {
4866         u16 value;
4867
4868         value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
4869             reg, 2);
4870
4871         return (value);
4872 }
4873
4874 void
4875 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
4876 {
4877         pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
4878             reg, value, 2);
4879
4880         return;
4881 }
4882
4883 /*
4884 ** Get the width and transaction speed of
4885 ** the slot this adapter is plugged into.
4886 */
4887 static void
4888 ixgbe_get_slot_info(struct ixgbe_hw *hw)
4889 {
4890         device_t                dev = ((struct ixgbe_osdep *)hw->back)->dev;
4891         struct ixgbe_mac_info   *mac = &hw->mac;
4892         u16                     link;
4893         u32                     offset;
4894
4895         /* For most devices simply call the shared code routine */
4896         if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
4897                 ixgbe_get_bus_info(hw);
4898                 goto display;
4899         }
4900
4901         /*
4902         ** For the Quad port adapter we need to parse back
4903         ** up the PCI tree to find the speed of the expansion
4904         ** slot into which this adapter is plugged. A bit more work.
4905         */
4906         dev = device_get_parent(device_get_parent(dev));
4907 #ifdef IXGBE_DEBUG
4908         device_printf(dev, "parent pcib = %x,%x,%x\n",
4909             pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
4910 #endif
4911         dev = device_get_parent(device_get_parent(dev));
4912 #ifdef IXGBE_DEBUG
4913         device_printf(dev, "slot pcib = %x,%x,%x\n",
4914             pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
4915 #endif
4916         /* Now get the PCI Express Capabilities offset */
4917         pci_find_cap(dev, PCIY_EXPRESS, &offset);
4918         /* ...and read the Link Status Register */
4919         link = pci_read_config(dev, offset + PCIER_LINK_STA, 2);
4920         switch (link & IXGBE_PCI_LINK_WIDTH) {
4921         case IXGBE_PCI_LINK_WIDTH_1:
4922                 hw->bus.width = ixgbe_bus_width_pcie_x1;
4923                 break;
4924         case IXGBE_PCI_LINK_WIDTH_2:
4925                 hw->bus.width = ixgbe_bus_width_pcie_x2;
4926                 break;
4927         case IXGBE_PCI_LINK_WIDTH_4:
4928                 hw->bus.width = ixgbe_bus_width_pcie_x4;
4929                 break;
4930         case IXGBE_PCI_LINK_WIDTH_8:
4931                 hw->bus.width = ixgbe_bus_width_pcie_x8;
4932                 break;
4933         default:
4934                 hw->bus.width = ixgbe_bus_width_unknown;
4935                 break;
4936         }
4937
4938         switch (link & IXGBE_PCI_LINK_SPEED) {
4939         case IXGBE_PCI_LINK_SPEED_2500:
4940                 hw->bus.speed = ixgbe_bus_speed_2500;
4941                 break;
4942         case IXGBE_PCI_LINK_SPEED_5000:
4943                 hw->bus.speed = ixgbe_bus_speed_5000;
4944                 break;
4945         case IXGBE_PCI_LINK_SPEED_8000:
4946                 hw->bus.speed = ixgbe_bus_speed_8000;
4947                 break;
4948         default:
4949                 hw->bus.speed = ixgbe_bus_speed_unknown;
4950                 break;
4951         }
4952
4953         mac->ops.set_lan_id(hw);
4954
4955 display:
4956         device_printf(dev,"PCI Express Bus: Speed %s %s\n",
4957             ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s":
4958             (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s":
4959             (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s":"Unknown"),
4960             (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
4961             (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
4962             (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
4963             ("Unknown"));
4964
4965         if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) &&
4966             ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
4967             (hw->bus.speed == ixgbe_bus_speed_2500))) {
4968                 device_printf(dev, "PCI-Express bandwidth available"
4969                     " for this card\n     is not sufficient for"
4970                     " optimal performance.\n");
4971                 device_printf(dev, "For optimal performance a x8 "
4972                     "PCIE, or x4 PCIE Gen2 slot is required.\n");
4973         }
4974         if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) &&
4975             ((hw->bus.width <= ixgbe_bus_width_pcie_x8) &&
4976             (hw->bus.speed < ixgbe_bus_speed_8000))) {
4977                 device_printf(dev, "PCI-Express bandwidth available"
4978                     " for this card\n     is not sufficient for"
4979                     " optimal performance.\n");
4980                 device_printf(dev, "For optimal performance a x8 "
4981                     "PCIE Gen3 slot is required.\n");
4982         }
4983
4984         return;
4985 }
4986
4987
4988 /*
4989 ** Setup the correct IVAR register for a particular MSIX interrupt
4990 **   (yes this is all very magic and confusing :)
4991 **  - entry is the register array entry
4992 **  - vector is the MSIX vector for this queue
4993 **  - type is RX/TX/MISC
4994 */
4995 static void
4996 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
4997 {
4998         struct ixgbe_hw *hw = &adapter->hw;
4999         u32 ivar, index;
5000
5001         vector |= IXGBE_IVAR_ALLOC_VAL;
5002
5003         switch (hw->mac.type) {
5004
5005         case ixgbe_mac_82598EB:
5006                 if (type == -1)
5007                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
5008                 else
5009                         entry += (type * 64);
5010                 index = (entry >> 2) & 0x1F;
5011                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
5012                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
5013                 ivar |= (vector << (8 * (entry & 0x3)));
5014                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
5015                 break;
5016
5017         case ixgbe_mac_82599EB:
5018         case ixgbe_mac_X540:
5019                 if (type == -1) { /* MISC IVAR */
5020                         index = (entry & 1) * 8;
5021                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
5022                         ivar &= ~(0xFF << index);
5023                         ivar |= (vector << index);
5024                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
5025                 } else {        /* RX/TX IVARS */
5026                         index = (16 * (entry & 1)) + (8 * type);
5027                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
5028                         ivar &= ~(0xFF << index);
5029                         ivar |= (vector << index);
5030                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
5031                 }
5032
5033         default:
5034                 break;
5035         }
5036 }
5037
5038 static void
5039 ixgbe_configure_ivars(struct adapter *adapter)
5040 {
5041         struct  ix_queue *que = adapter->queues;
5042         u32 newitr;
5043
5044         if (ixgbe_max_interrupt_rate > 0)
5045                 newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
5046         else
5047                 newitr = 0;
5048
5049         for (int i = 0; i < adapter->num_queues; i++, que++) {
5050                 /* First the RX queue entry */
5051                 ixgbe_set_ivar(adapter, i, que->msix, 0);
5052                 /* ... and the TX */
5053                 ixgbe_set_ivar(adapter, i, que->msix, 1);
5054                 /* Set an Initial EITR value */
5055                 IXGBE_WRITE_REG(&adapter->hw,
5056                     IXGBE_EITR(que->msix), newitr);
5057         }
5058
5059         /* For the Link interrupt */
5060         ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
5061 }
5062
5063 /*
5064 ** ixgbe_sfp_probe - called in the local timer to
5065 ** determine if a port had optics inserted.
5066 */  
5067 static bool ixgbe_sfp_probe(struct adapter *adapter)
5068 {
5069         struct ixgbe_hw *hw = &adapter->hw;
5070         device_t        dev = adapter->dev;
5071         bool            result = FALSE;
5072
5073         if ((hw->phy.type == ixgbe_phy_nl) &&
5074             (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
5075                 s32 ret = hw->phy.ops.identify_sfp(hw);
5076                 if (ret)
5077                         goto out;
5078                 ret = hw->phy.ops.reset(hw);
5079                 if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5080                         device_printf(dev,"Unsupported SFP+ module detected!");
5081                         printf(" Reload driver with supported module.\n");
5082                         adapter->sfp_probe = FALSE;
5083                         goto out;
5084                 } else
5085                         device_printf(dev,"SFP+ module detected!\n");
5086                 /* We now have supported optics */
5087                 adapter->sfp_probe = FALSE;
5088                 /* Set the optics type so system reports correctly */
5089                 ixgbe_setup_optics(adapter);
5090                 result = TRUE;
5091         }
5092 out:
5093         return (result);
5094 }
5095
5096 /*
5097 ** Tasklet handler for MSIX Link interrupts
5098 **  - do outside interrupt since it might sleep
5099 */
5100 static void
5101 ixgbe_handle_link(void *context, int pending)
5102 {
5103         struct adapter  *adapter = context;
5104
5105         ixgbe_check_link(&adapter->hw,
5106             &adapter->link_speed, &adapter->link_up, 0);
5107         ixgbe_update_link_status(adapter);
5108 }
5109
5110 /*
5111 ** Tasklet for handling SFP module interrupts
5112 */
5113 static void
5114 ixgbe_handle_mod(void *context, int pending)
5115 {
5116         struct adapter  *adapter = context;
5117         struct ixgbe_hw *hw = &adapter->hw;
5118         device_t        dev = adapter->dev;
5119         u32 err;
5120
5121         err = hw->phy.ops.identify_sfp(hw);
5122         if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5123                 device_printf(dev,
5124                     "Unsupported SFP+ module type was detected.\n");
5125                 return;
5126         }
5127         err = hw->mac.ops.setup_sfp(hw);
5128         if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5129                 device_printf(dev,
5130                     "Setup failure - unsupported SFP+ module type.\n");
5131                 return;
5132         }
5133         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
5134         return;
5135 }
5136
5137
5138 /*
5139 ** Tasklet for handling MSF (multispeed fiber) interrupts
5140 */
5141 static void
5142 ixgbe_handle_msf(void *context, int pending)
5143 {
5144         struct adapter  *adapter = context;
5145         struct ixgbe_hw *hw = &adapter->hw;
5146         u32 autoneg;
5147         bool negotiate;
5148
5149         autoneg = hw->phy.autoneg_advertised;
5150         if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
5151                 hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
5152         if (hw->mac.ops.setup_link)
5153                 hw->mac.ops.setup_link(hw, autoneg, TRUE);
5154         return;
5155 }
5156
5157 #ifdef IXGBE_FDIR
5158 /*
5159 ** Tasklet for reinitializing the Flow Director filter table
5160 */
5161 static void
5162 ixgbe_reinit_fdir(void *context, int pending)
5163 {
5164         struct adapter  *adapter = context;
5165         struct ifnet   *ifp = adapter->ifp;
5166
5167         if (adapter->fdir_reinit != 1) /* Shouldn't happen */
5168                 return;
5169         ixgbe_reinit_fdir_tables_82599(&adapter->hw);
5170         adapter->fdir_reinit = 0;
5171         /* re-enable flow director interrupts */
5172         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
5173         /* Restart the interface */
5174         ifp->if_drv_flags |= IFF_DRV_RUNNING;
5175         return;
5176 }
5177 #endif
5178
5179 /**********************************************************************
5180  *
5181  *  Update the board statistics counters.
5182  *
5183  **********************************************************************/
5184 static void
5185 ixgbe_update_stats_counters(struct adapter *adapter)
5186 {
5187         struct ifnet   *ifp = adapter->ifp;
5188         struct ixgbe_hw *hw = &adapter->hw;
5189         u32  missed_rx = 0, bprc, lxon, lxoff, total;
5190         u64  total_missed_rx = 0;
5191
5192         adapter->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
5193         adapter->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
5194         adapter->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
5195         adapter->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
5196
5197         /*
5198         ** Note: these are for the 8 possible traffic classes,
5199         **       which in current implementation is unused,
5200         **       therefore only 0 should read real data.
5201         */
5202         for (int i = 0; i < 8; i++) {
5203                 u32 mp;
5204                 mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
5205                 /* missed_rx tallies misses for the gprc workaround */
5206                 missed_rx += mp;
5207                 /* global total per queue */
5208                 adapter->stats.mpc[i] += mp;
5209                 /* Running comprehensive total for stats display */
5210                 total_missed_rx += adapter->stats.mpc[i];
5211                 if (hw->mac.type == ixgbe_mac_82598EB) {
5212                         adapter->stats.rnbc[i] +=
5213                             IXGBE_READ_REG(hw, IXGBE_RNBC(i));
5214                         adapter->stats.qbtc[i] +=
5215                             IXGBE_READ_REG(hw, IXGBE_QBTC(i));
5216                         adapter->stats.qbrc[i] +=
5217                             IXGBE_READ_REG(hw, IXGBE_QBRC(i));
5218                         adapter->stats.pxonrxc[i] +=
5219                             IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
5220                 } else
5221                         adapter->stats.pxonrxc[i] +=
5222                             IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
5223                 adapter->stats.pxontxc[i] +=
5224                     IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
5225                 adapter->stats.pxofftxc[i] +=
5226                     IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
5227                 adapter->stats.pxoffrxc[i] +=
5228                     IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
5229                 adapter->stats.pxon2offc[i] +=
5230                     IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
5231         }
5232         for (int i = 0; i < 16; i++) {
5233                 adapter->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
5234                 adapter->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
5235                 adapter->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
5236         }
5237         adapter->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
5238         adapter->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
5239         adapter->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
5240
5241         /* Hardware workaround, gprc counts missed packets */
5242         adapter->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
5243         adapter->stats.gprc -= missed_rx;
5244
5245         if (hw->mac.type != ixgbe_mac_82598EB) {
5246                 adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
5247                     ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
5248                 adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
5249                     ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
5250                 adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
5251                     ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
5252                 adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
5253                 adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
5254         } else {
5255                 adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
5256                 adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
5257                 /* 82598 only has a counter in the high register */
5258                 adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
5259                 adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
5260                 adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
5261         }
5262
5263         /*
5264          * Workaround: mprc hardware is incorrectly counting
5265          * broadcasts, so for now we subtract those.
5266          */
5267         bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
5268         adapter->stats.bprc += bprc;
5269         adapter->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
5270         if (hw->mac.type == ixgbe_mac_82598EB)
5271                 adapter->stats.mprc -= bprc;
5272
5273         adapter->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
5274         adapter->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
5275         adapter->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
5276         adapter->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
5277         adapter->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
5278         adapter->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
5279
5280         lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
5281         adapter->stats.lxontxc += lxon;
5282         lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
5283         adapter->stats.lxofftxc += lxoff;
5284         total = lxon + lxoff;
5285
5286         adapter->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
5287         adapter->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
5288         adapter->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
5289         adapter->stats.gptc -= total;
5290         adapter->stats.mptc -= total;
5291         adapter->stats.ptc64 -= total;
5292         adapter->stats.gotc -= total * ETHER_MIN_LEN;
5293
5294         adapter->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
5295         adapter->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
5296         adapter->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
5297         adapter->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
5298         adapter->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
5299         adapter->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
5300         adapter->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
5301         adapter->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
5302         adapter->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
5303         adapter->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
5304         adapter->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
5305         adapter->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
5306         adapter->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
5307         adapter->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
5308         adapter->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
5309         adapter->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
5310         adapter->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
5311         adapter->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
5312         /* Only read FCOE on 82599 */
5313         if (hw->mac.type != ixgbe_mac_82598EB) {
5314                 adapter->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
5315                 adapter->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
5316                 adapter->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
5317                 adapter->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
5318                 adapter->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
5319         }
5320
5321         /* Fill out the OS statistics structure */
5322         ifp->if_ipackets = adapter->stats.gprc;
5323         ifp->if_opackets = adapter->stats.gptc;
5324         ifp->if_ibytes = adapter->stats.gorc;
5325         ifp->if_obytes = adapter->stats.gotc;
5326         ifp->if_imcasts = adapter->stats.mprc;
5327         ifp->if_omcasts = adapter->stats.mptc;
5328         ifp->if_collisions = 0;
5329
5330         /* Rx Errors */
5331         ifp->if_iqdrops = total_missed_rx;
5332         ifp->if_ierrors = adapter->stats.crcerrs + adapter->stats.rlec;
5333 }
5334
5335 /** ixgbe_sysctl_tdh_handler - Handler function
5336  *  Retrieves the TDH value from the hardware
5337  */
5338 static int 
5339 ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS)
5340 {
5341         int error;
5342
5343         struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
5344         if (!txr) return 0;
5345
5346         unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
5347         error = sysctl_handle_int(oidp, &val, 0, req);
5348         if (error || !req->newptr)
5349                 return error;
5350         return 0;
5351 }
5352
5353 /** ixgbe_sysctl_tdt_handler - Handler function
5354  *  Retrieves the TDT value from the hardware
5355  */
5356 static int 
5357 ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS)
5358 {
5359         int error;
5360
5361         struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
5362         if (!txr) return 0;
5363
5364         unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
5365         error = sysctl_handle_int(oidp, &val, 0, req);
5366         if (error || !req->newptr)
5367                 return error;
5368         return 0;
5369 }
5370
5371 /** ixgbe_sysctl_rdh_handler - Handler function
5372  *  Retrieves the RDH value from the hardware
5373  */
5374 static int 
5375 ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS)
5376 {
5377         int error;
5378
5379         struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
5380         if (!rxr) return 0;
5381
5382         unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
5383         error = sysctl_handle_int(oidp, &val, 0, req);
5384         if (error || !req->newptr)
5385                 return error;
5386         return 0;
5387 }
5388
5389 /** ixgbe_sysctl_rdt_handler - Handler function
5390  *  Retrieves the RDT value from the hardware
5391  */
5392 static int 
5393 ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS)
5394 {
5395         int error;
5396
5397         struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
5398         if (!rxr) return 0;
5399
5400         unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
5401         error = sysctl_handle_int(oidp, &val, 0, req);
5402         if (error || !req->newptr)
5403                 return error;
5404         return 0;
5405 }
5406
5407 static int
5408 ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5409 {
5410         int error;
5411         struct ix_queue *que = ((struct ix_queue *)oidp->oid_arg1);
5412         unsigned int reg, usec, rate;
5413
5414         reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
5415         usec = ((reg & 0x0FF8) >> 3);
5416         if (usec > 0)
5417                 rate = 500000 / usec;
5418         else
5419                 rate = 0;
5420         error = sysctl_handle_int(oidp, &rate, 0, req);
5421         if (error || !req->newptr)
5422                 return error;
5423         reg &= ~0xfff; /* default, no limitation */
5424         ixgbe_max_interrupt_rate = 0;
5425         if (rate > 0 && rate < 500000) {
5426                 if (rate < 1000)
5427                         rate = 1000;
5428                 ixgbe_max_interrupt_rate = rate;
5429                 reg |= ((4000000/rate) & 0xff8 );
5430         }
5431         IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
5432         return 0;
5433 }
5434
5435 /*
5436  * Add sysctl variables, one per statistic, to the system.
5437  */
5438 static void
5439 ixgbe_add_hw_stats(struct adapter *adapter)
5440 {
5441
5442         device_t dev = adapter->dev;
5443
5444         struct tx_ring *txr = adapter->tx_rings;
5445         struct rx_ring *rxr = adapter->rx_rings;
5446
5447         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5448         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5449         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5450         struct ixgbe_hw_stats *stats = &adapter->stats;
5451
5452         struct sysctl_oid *stat_node, *queue_node;
5453         struct sysctl_oid_list *stat_list, *queue_list;
5454
5455 #define QUEUE_NAME_LEN 32
5456         char namebuf[QUEUE_NAME_LEN];
5457
5458         /* Driver Statistics */
5459         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5460                         CTLFLAG_RD, &adapter->dropped_pkts,
5461                         "Driver dropped packets");
5462         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_failed",
5463                         CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5464                         "m_defrag() failed");
5465         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events",
5466                         CTLFLAG_RD, &adapter->watchdog_events,
5467                         "Watchdog timeouts");
5468         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5469                         CTLFLAG_RD, &adapter->link_irq,
5470                         "Link MSIX IRQ Handled");
5471
5472         for (int i = 0; i < adapter->num_queues; i++, txr++) {
5473                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5474                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5475                                             CTLFLAG_RD, NULL, "Queue Name");
5476                 queue_list = SYSCTL_CHILDREN(queue_node);
5477
5478                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5479                                 CTLTYPE_UINT | CTLFLAG_RW, &adapter->queues[i],
5480                                 sizeof(&adapter->queues[i]),
5481                                 ixgbe_sysctl_interrupt_rate_handler, "IU",
5482                                 "Interrupt Rate");
5483                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs",
5484                                 CTLFLAG_RD, &(adapter->queues[i].irqs),
5485                                 "irqs on this queue");
5486                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5487                                 CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
5488                                 ixgbe_sysctl_tdh_handler, "IU",
5489                                 "Transmit Descriptor Head");
5490                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5491                                 CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
5492                                 ixgbe_sysctl_tdt_handler, "IU",
5493                                 "Transmit Descriptor Tail");
5494                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tso_tx",
5495                                 CTLFLAG_RD, &txr->tso_tx,
5496                                 "TSO");
5497                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_tx_dma_setup",
5498                                 CTLFLAG_RD, &txr->no_tx_dma_setup,
5499                                 "Driver tx dma failure in xmit");
5500                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5501                                 CTLFLAG_RD, &txr->no_desc_avail,
5502                                 "Queue No Descriptor Available");
5503                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5504                                 CTLFLAG_RD, &txr->total_packets,
5505                                 "Queue Packets Transmitted");
5506         }
5507
5508         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
5509                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5510                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
5511                                             CTLFLAG_RD, NULL, "Queue Name");
5512                 queue_list = SYSCTL_CHILDREN(queue_node);
5513
5514                 struct lro_ctrl *lro = &rxr->lro;
5515
5516                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5517                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
5518                                             CTLFLAG_RD, NULL, "Queue Name");
5519                 queue_list = SYSCTL_CHILDREN(queue_node);
5520
5521                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5522                                 CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
5523                                 ixgbe_sysctl_rdh_handler, "IU",
5524                                 "Receive Descriptor Head");
5525                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5526                                 CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
5527                                 ixgbe_sysctl_rdt_handler, "IU",
5528                                 "Receive Descriptor Tail");
5529                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5530                                 CTLFLAG_RD, &rxr->rx_packets,
5531                                 "Queue Packets Received");
5532                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5533                                 CTLFLAG_RD, &rxr->rx_bytes,
5534                                 "Queue Bytes Received");
5535                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_copies",
5536                                 CTLFLAG_RD, &rxr->rx_copies,
5537                                 "Copied RX Frames");
5538                 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
5539                                 CTLFLAG_RD, &lro->lro_queued, 0,
5540                                 "LRO Queued");
5541                 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
5542                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5543                                 "LRO Flushed");
5544         }
5545
5546         /* MAC stats get the own sub node */
5547
5548         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5549                                     CTLFLAG_RD, NULL, "MAC Statistics");
5550         stat_list = SYSCTL_CHILDREN(stat_node);
5551
5552         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5553                         CTLFLAG_RD, &stats->crcerrs,
5554                         "CRC Errors");
5555         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
5556                         CTLFLAG_RD, &stats->illerrc,
5557                         "Illegal Byte Errors");
5558         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
5559                         CTLFLAG_RD, &stats->errbc,
5560                         "Byte Errors");
5561         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
5562                         CTLFLAG_RD, &stats->mspdc,
5563                         "MAC Short Packets Discarded");
5564         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
5565                         CTLFLAG_RD, &stats->mlfc,
5566                         "MAC Local Faults");
5567         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
5568                         CTLFLAG_RD, &stats->mrfc,
5569                         "MAC Remote Faults");
5570         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
5571                         CTLFLAG_RD, &stats->rlec,
5572                         "Receive Length Errors");
5573
5574         /* Flow Control stats */
5575         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5576                         CTLFLAG_RD, &stats->lxontxc,
5577                         "Link XON Transmitted");
5578         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5579                         CTLFLAG_RD, &stats->lxonrxc,
5580                         "Link XON Received");
5581         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5582                         CTLFLAG_RD, &stats->lxofftxc,
5583                         "Link XOFF Transmitted");
5584         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5585                         CTLFLAG_RD, &stats->lxoffrxc,
5586                         "Link XOFF Received");
5587
5588         /* Packet Reception Stats */
5589         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
5590                         CTLFLAG_RD, &stats->tor, 
5591                         "Total Octets Received"); 
5592         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
5593                         CTLFLAG_RD, &stats->gorc, 
5594                         "Good Octets Received"); 
5595         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
5596                         CTLFLAG_RD, &stats->tpr,
5597                         "Total Packets Received");
5598         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
5599                         CTLFLAG_RD, &stats->gprc,
5600                         "Good Packets Received");
5601         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
5602                         CTLFLAG_RD, &stats->mprc,
5603                         "Multicast Packets Received");
5604         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
5605                         CTLFLAG_RD, &stats->bprc,
5606                         "Broadcast Packets Received");
5607         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5608                         CTLFLAG_RD, &stats->prc64,
5609                         "64 byte frames received ");
5610         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5611                         CTLFLAG_RD, &stats->prc127,
5612                         "65-127 byte frames received");
5613         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5614                         CTLFLAG_RD, &stats->prc255,
5615                         "128-255 byte frames received");
5616         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5617                         CTLFLAG_RD, &stats->prc511,
5618                         "256-511 byte frames received");
5619         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5620                         CTLFLAG_RD, &stats->prc1023,
5621                         "512-1023 byte frames received");
5622         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5623                         CTLFLAG_RD, &stats->prc1522,
5624                         "1023-1522 byte frames received");
5625         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
5626                         CTLFLAG_RD, &stats->ruc,
5627                         "Receive Undersized");
5628         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5629                         CTLFLAG_RD, &stats->rfc,
5630                         "Fragmented Packets Received ");
5631         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
5632                         CTLFLAG_RD, &stats->roc,
5633                         "Oversized Packets Received");
5634         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
5635                         CTLFLAG_RD, &stats->rjc,
5636                         "Received Jabber");
5637         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
5638                         CTLFLAG_RD, &stats->mngprc,
5639                         "Management Packets Received");
5640         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
5641                         CTLFLAG_RD, &stats->mngptc,
5642                         "Management Packets Dropped");
5643         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
5644                         CTLFLAG_RD, &stats->xec,
5645                         "Checksum Errors");
5646
5647         /* Packet Transmission Stats */
5648         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5649                         CTLFLAG_RD, &stats->gotc, 
5650                         "Good Octets Transmitted"); 
5651         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5652                         CTLFLAG_RD, &stats->tpt,
5653                         "Total Packets Transmitted");
5654         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5655                         CTLFLAG_RD, &stats->gptc,
5656                         "Good Packets Transmitted");
5657         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5658                         CTLFLAG_RD, &stats->bptc,
5659                         "Broadcast Packets Transmitted");
5660         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5661                         CTLFLAG_RD, &stats->mptc,
5662                         "Multicast Packets Transmitted");
5663         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
5664                         CTLFLAG_RD, &stats->mngptc,
5665                         "Management Packets Transmitted");
5666         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5667                         CTLFLAG_RD, &stats->ptc64,
5668                         "64 byte frames transmitted ");
5669         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5670                         CTLFLAG_RD, &stats->ptc127,
5671                         "65-127 byte frames transmitted");
5672         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5673                         CTLFLAG_RD, &stats->ptc255,
5674                         "128-255 byte frames transmitted");
5675         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5676                         CTLFLAG_RD, &stats->ptc511,
5677                         "256-511 byte frames transmitted");
5678         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5679                         CTLFLAG_RD, &stats->ptc1023,
5680                         "512-1023 byte frames transmitted");
5681         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5682                         CTLFLAG_RD, &stats->ptc1522,
5683                         "1024-1522 byte frames transmitted");
5684 }
5685
5686 /*
5687 ** Set flow control using sysctl:
5688 ** Flow control values:
5689 **      0 - off
5690 **      1 - rx pause
5691 **      2 - tx pause
5692 **      3 - full
5693 */
5694 static int
5695 ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS)
5696 {
5697         int error, last;
5698         struct adapter *adapter = (struct adapter *) arg1;
5699
5700         last = adapter->fc;
5701         error = sysctl_handle_int(oidp, &adapter->fc, 0, req);
5702         if ((error) || (req->newptr == NULL))
5703                 return (error);
5704
5705         /* Don't bother if it's not changed */
5706         if (adapter->fc == last)
5707                 return (0);
5708
5709         switch (adapter->fc) {
5710                 case ixgbe_fc_rx_pause:
5711                 case ixgbe_fc_tx_pause:
5712                 case ixgbe_fc_full:
5713                         adapter->hw.fc.requested_mode = adapter->fc;
5714                         if (adapter->num_queues > 1)
5715                                 ixgbe_disable_rx_drop(adapter);
5716                         break;
5717                 case ixgbe_fc_none:
5718                         adapter->hw.fc.requested_mode = ixgbe_fc_none;
5719                         if (adapter->num_queues > 1)
5720                                 ixgbe_enable_rx_drop(adapter);
5721                         break;
5722                 default:
5723                         adapter->fc = last;
5724                         return (EINVAL);
5725         }
5726         /* Don't autoneg if forcing a value */
5727         adapter->hw.fc.disable_fc_autoneg = TRUE;
5728         ixgbe_fc_enable(&adapter->hw);
5729         return error;
5730 }
5731
5732 /*
5733 ** Control link advertise speed:
5734 **      1 - advertise only 1G
5735 **      2 - advertise 100Mb
5736 **      3 - advertise normal
5737 */
5738 static int
5739 ixgbe_set_advertise(SYSCTL_HANDLER_ARGS)
5740 {
5741         int                     error = 0;
5742         struct adapter          *adapter;
5743         device_t                dev;
5744         struct ixgbe_hw         *hw;
5745         ixgbe_link_speed        speed, last;
5746
5747         adapter = (struct adapter *) arg1;
5748         dev = adapter->dev;
5749         hw = &adapter->hw;
5750         last = adapter->advertise;
5751
5752         error = sysctl_handle_int(oidp, &adapter->advertise, 0, req);
5753         if ((error) || (req->newptr == NULL))
5754                 return (error);
5755
5756         if (adapter->advertise == last) /* no change */
5757                 return (0);
5758
5759         if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
5760             (hw->phy.multispeed_fiber)))
5761                 return (EINVAL);
5762
5763         if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
5764                 device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
5765                 return (EINVAL);
5766         }
5767
5768         if (adapter->advertise == 1)
5769                 speed = IXGBE_LINK_SPEED_1GB_FULL;
5770         else if (adapter->advertise == 2)
5771                 speed = IXGBE_LINK_SPEED_100_FULL;
5772         else if (adapter->advertise == 3)
5773                 speed = IXGBE_LINK_SPEED_1GB_FULL |
5774                         IXGBE_LINK_SPEED_10GB_FULL;
5775         else {  /* bogus value */
5776                 adapter->advertise = last;
5777                 return (EINVAL);
5778         }
5779
5780         hw->mac.autotry_restart = TRUE;
5781         hw->mac.ops.setup_link(hw, speed, TRUE);
5782
5783         return (error);
5784 }
5785
5786 /*
5787 ** Thermal Shutdown Trigger
5788 **   - cause a Thermal Overtemp IRQ
5789 **   - this now requires firmware enabling
5790 */
5791 static int
5792 ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS)
5793 {
5794         int             error, fire = 0;
5795         struct adapter  *adapter = (struct adapter *) arg1;
5796         struct ixgbe_hw *hw = &adapter->hw;
5797
5798
5799         if (hw->mac.type != ixgbe_mac_X540)
5800                 return (0);
5801
5802         error = sysctl_handle_int(oidp, &fire, 0, req);
5803         if ((error) || (req->newptr == NULL))
5804                 return (error);
5805
5806         if (fire) {
5807                 u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
5808                 reg |= IXGBE_EICR_TS;
5809                 IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
5810         }
5811
5812         return (0);
5813 }
5814
5815 /*
5816 ** Enable the hardware to drop packets when the buffer is
5817 ** full. This is useful when multiqueue,so that no single
5818 ** queue being full stalls the entire RX engine. We only
5819 ** enable this when Multiqueue AND when Flow Control is 
5820 ** disabled.
5821 */
5822 static void
5823 ixgbe_enable_rx_drop(struct adapter *adapter)
5824 {
5825         struct ixgbe_hw *hw = &adapter->hw;
5826
5827         for (int i = 0; i < adapter->num_queues; i++) {
5828                 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
5829                 srrctl |= IXGBE_SRRCTL_DROP_EN;
5830                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
5831         }
5832 }
5833
5834 static void
5835 ixgbe_disable_rx_drop(struct adapter *adapter)
5836 {
5837         struct ixgbe_hw *hw = &adapter->hw;
5838
5839         for (int i = 0; i < adapter->num_queues; i++) {
5840                 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
5841                 srrctl &= ~IXGBE_SRRCTL_DROP_EN;
5842                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
5843         }
5844 }