]> CyberLeo.Net >> Repos - FreeBSD/releng/9.3.git/blob - sys/dev/ixgbe/ixgbe.c
Copy stable/9 to releng/9.3 as part of the 9.3-RELEASE cycle.
[FreeBSD/releng/9.3.git] / sys / dev / ixgbe / ixgbe.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2013, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38 #include "ixgbe.h"
39
40 /*********************************************************************
41  *  Set this to one to display debug statistics
42  *********************************************************************/
43 int             ixgbe_display_debug_stats = 0;
44
45 /*********************************************************************
46  *  Driver version
47  *********************************************************************/
48 char ixgbe_driver_version[] = "2.5.15";
49
50 /*********************************************************************
51  *  PCI Device ID Table
52  *
53  *  Used by probe to select devices to load on
54  *  Last field stores an index into ixgbe_strings
55  *  Last entry must be all 0s
56  *
57  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
58  *********************************************************************/
59
60 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
61 {
62         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
63         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
64         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
65         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
66         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
67         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
68         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
69         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
70         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
71         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
72         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
73         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
74         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
75         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
76         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
77         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
78         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
79         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
80         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
81         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
82         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
83         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
84         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0},
85         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
86         /* required last entry */
87         {0, 0, 0, 0, 0}
88 };
89
90 /*********************************************************************
91  *  Table of branding strings
92  *********************************************************************/
93
94 static char    *ixgbe_strings[] = {
95         "Intel(R) PRO/10GbE PCI-Express Network Driver"
96 };
97
98 /*********************************************************************
99  *  Function prototypes
100  *********************************************************************/
101 static int      ixgbe_probe(device_t);
102 static int      ixgbe_attach(device_t);
103 static int      ixgbe_detach(device_t);
104 static int      ixgbe_shutdown(device_t);
105 #ifdef IXGBE_LEGACY_TX
106 static void     ixgbe_start(struct ifnet *);
107 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
108 #else /* ! IXGBE_LEGACY_TX */
109 static int      ixgbe_mq_start(struct ifnet *, struct mbuf *);
110 static int      ixgbe_mq_start_locked(struct ifnet *, struct tx_ring *);
111 static void     ixgbe_qflush(struct ifnet *);
112 static void     ixgbe_deferred_mq_start(void *, int);
113 #endif /* IXGBE_LEGACY_TX */
114 static int      ixgbe_ioctl(struct ifnet *, u_long, caddr_t);
115 static void     ixgbe_init(void *);
116 static void     ixgbe_init_locked(struct adapter *);
117 static void     ixgbe_stop(void *);
118 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
119 static int      ixgbe_media_change(struct ifnet *);
120 static void     ixgbe_identify_hardware(struct adapter *);
121 static int      ixgbe_allocate_pci_resources(struct adapter *);
122 static void     ixgbe_get_slot_info(struct ixgbe_hw *);
123 static int      ixgbe_allocate_msix(struct adapter *);
124 static int      ixgbe_allocate_legacy(struct adapter *);
125 static int      ixgbe_allocate_queues(struct adapter *);
126 static int      ixgbe_setup_msix(struct adapter *);
127 static void     ixgbe_free_pci_resources(struct adapter *);
128 static void     ixgbe_local_timer(void *);
129 static int      ixgbe_setup_interface(device_t, struct adapter *);
130 static void     ixgbe_config_link(struct adapter *);
131
132 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
133 static int      ixgbe_setup_transmit_structures(struct adapter *);
134 static void     ixgbe_setup_transmit_ring(struct tx_ring *);
135 static void     ixgbe_initialize_transmit_units(struct adapter *);
136 static void     ixgbe_free_transmit_structures(struct adapter *);
137 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
138
139 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
140 static int      ixgbe_setup_receive_structures(struct adapter *);
141 static int      ixgbe_setup_receive_ring(struct rx_ring *);
142 static void     ixgbe_initialize_receive_units(struct adapter *);
143 static void     ixgbe_free_receive_structures(struct adapter *);
144 static void     ixgbe_free_receive_buffers(struct rx_ring *);
145 static void     ixgbe_setup_hw_rsc(struct rx_ring *);
146
147 static void     ixgbe_enable_intr(struct adapter *);
148 static void     ixgbe_disable_intr(struct adapter *);
149 static void     ixgbe_update_stats_counters(struct adapter *);
150 static void     ixgbe_txeof(struct tx_ring *);
151 static bool     ixgbe_rxeof(struct ix_queue *);
152 static void     ixgbe_rx_checksum(u32, struct mbuf *, u32);
153 static void     ixgbe_set_promisc(struct adapter *);
154 static void     ixgbe_set_multi(struct adapter *);
155 static void     ixgbe_update_link_status(struct adapter *);
156 static void     ixgbe_refresh_mbufs(struct rx_ring *, int);
157 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
158 static int      ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS);
159 static int      ixgbe_set_advertise(SYSCTL_HANDLER_ARGS);
160 static int      ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS);
161 static int      ixgbe_dma_malloc(struct adapter *, bus_size_t,
162                     struct ixgbe_dma_alloc *, int);
163 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
164 static int      ixgbe_tx_ctx_setup(struct tx_ring *,
165                     struct mbuf *, u32 *, u32 *);
166 static int      ixgbe_tso_setup(struct tx_ring *,
167                     struct mbuf *, u32 *, u32 *);
168 static void     ixgbe_set_ivar(struct adapter *, u8, u8, s8);
169 static void     ixgbe_configure_ivars(struct adapter *);
170 static u8 *     ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
171
172 static void     ixgbe_setup_vlan_hw_support(struct adapter *);
173 static void     ixgbe_register_vlan(void *, struct ifnet *, u16);
174 static void     ixgbe_unregister_vlan(void *, struct ifnet *, u16);
175
176 static void     ixgbe_add_hw_stats(struct adapter *adapter);
177
178 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
179 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
180                     struct mbuf *, u32);
181
182 static void     ixgbe_enable_rx_drop(struct adapter *);
183 static void     ixgbe_disable_rx_drop(struct adapter *);
184
185 /* Support for pluggable optic modules */
186 static bool     ixgbe_sfp_probe(struct adapter *);
187 static void     ixgbe_setup_optics(struct adapter *);
188
189 /* Legacy (single vector interrupt handler */
190 static void     ixgbe_legacy_irq(void *);
191
192 /* The MSI/X Interrupt handlers */
193 static void     ixgbe_msix_que(void *);
194 static void     ixgbe_msix_link(void *);
195
196 /* Deferred interrupt tasklets */
197 static void     ixgbe_handle_que(void *, int);
198 static void     ixgbe_handle_link(void *, int);
199 static void     ixgbe_handle_msf(void *, int);
200 static void     ixgbe_handle_mod(void *, int);
201
202 #ifdef IXGBE_FDIR
203 static void     ixgbe_atr(struct tx_ring *, struct mbuf *);
204 static void     ixgbe_reinit_fdir(void *, int);
205 #endif
206
207 /* Missing shared code prototype */
208 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw);
209
210 /*********************************************************************
211  *  FreeBSD Device Interface Entry Points
212  *********************************************************************/
213
214 static device_method_t ixgbe_methods[] = {
215         /* Device interface */
216         DEVMETHOD(device_probe, ixgbe_probe),
217         DEVMETHOD(device_attach, ixgbe_attach),
218         DEVMETHOD(device_detach, ixgbe_detach),
219         DEVMETHOD(device_shutdown, ixgbe_shutdown),
220         DEVMETHOD_END
221 };
222
223 static driver_t ixgbe_driver = {
224         "ix", ixgbe_methods, sizeof(struct adapter),
225 };
226
227 devclass_t ixgbe_devclass;
228 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
229
230 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
231 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
232
233 /*
234 ** TUNEABLE PARAMETERS:
235 */
236
237 static SYSCTL_NODE(_hw, OID_AUTO, ix, CTLFLAG_RD, 0,
238                    "IXGBE driver parameters");
239
240 /*
241 ** AIM: Adaptive Interrupt Moderation
242 ** which means that the interrupt rate
243 ** is varied over time based on the
244 ** traffic for that interrupt vector
245 */
246 static int ixgbe_enable_aim = TRUE;
247 TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
248 SYSCTL_INT(_hw_ix, OID_AUTO, enable_aim, CTLFLAG_RW, &ixgbe_enable_aim, 0,
249     "Enable adaptive interrupt moderation");
250
251 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
252 TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
253 SYSCTL_INT(_hw_ix, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
254     &ixgbe_max_interrupt_rate, 0, "Maximum interrupts per second");
255
256 /* How many packets rxeof tries to clean at a time */
257 static int ixgbe_rx_process_limit = 256;
258 TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
259 SYSCTL_INT(_hw_ix, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
260     &ixgbe_rx_process_limit, 0,
261     "Maximum number of received packets to process at a time,"
262     "-1 means unlimited");
263
264 /* How many packets txeof tries to clean at a time */
265 static int ixgbe_tx_process_limit = 256;
266 TUNABLE_INT("hw.ixgbe.tx_process_limit", &ixgbe_tx_process_limit);
267 SYSCTL_INT(_hw_ix, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
268     &ixgbe_tx_process_limit, 0,
269     "Maximum number of sent packets to process at a time,"
270     "-1 means unlimited");
271
272 /*
273 ** Smart speed setting, default to on
274 ** this only works as a compile option
275 ** right now as its during attach, set
276 ** this to 'ixgbe_smart_speed_off' to
277 ** disable.
278 */
279 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
280
281 /*
282  * MSIX should be the default for best performance,
283  * but this allows it to be forced off for testing.
284  */
285 static int ixgbe_enable_msix = 1;
286 TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
287 SYSCTL_INT(_hw_ix, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixgbe_enable_msix, 0,
288     "Enable MSI-X interrupts");
289
290 /*
291  * Number of Queues, can be set to 0,
292  * it then autoconfigures based on the
293  * number of cpus with a max of 8. This
294  * can be overriden manually here.
295  */
296 static int ixgbe_num_queues = 0;
297 TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
298 SYSCTL_INT(_hw_ix, OID_AUTO, num_queues, CTLFLAG_RDTUN, &ixgbe_num_queues, 0,
299     "Number of queues to configure, 0 indicates autoconfigure");
300
301 /*
302 ** Number of TX descriptors per ring,
303 ** setting higher than RX as this seems
304 ** the better performing choice.
305 */
306 static int ixgbe_txd = PERFORM_TXD;
307 TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd);
308 SYSCTL_INT(_hw_ix, OID_AUTO, txd, CTLFLAG_RDTUN, &ixgbe_txd, 0,
309     "Number of receive descriptors per queue");
310
311 /* Number of RX descriptors per ring */
312 static int ixgbe_rxd = PERFORM_RXD;
313 TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd);
314 SYSCTL_INT(_hw_ix, OID_AUTO, rxd, CTLFLAG_RDTUN, &ixgbe_rxd, 0,
315     "Number of receive descriptors per queue");
316
317 /*
318 ** Defining this on will allow the use
319 ** of unsupported SFP+ modules, note that
320 ** doing so you are on your own :)
321 */
322 static int allow_unsupported_sfp = FALSE;
323 TUNABLE_INT("hw.ixgbe.unsupported_sfp", &allow_unsupported_sfp);
324
325 /*
326 ** HW RSC control: 
327 **  this feature only works with
328 **  IPv4, and only on 82599 and later.
329 **  Also this will cause IP forwarding to
330 **  fail and that can't be controlled by
331 **  the stack as LRO can. For all these
332 **  reasons I've deemed it best to leave
333 **  this off and not bother with a tuneable
334 **  interface, this would need to be compiled
335 **  to enable.
336 */
337 static bool ixgbe_rsc_enable = FALSE;
338
339 /* Keep running tab on them for sanity check */
340 static int ixgbe_total_ports;
341
342 #ifdef IXGBE_FDIR
343 /*
344 ** For Flow Director: this is the
345 ** number of TX packets we sample
346 ** for the filter pool, this means
347 ** every 20th packet will be probed.
348 **
349 ** This feature can be disabled by 
350 ** setting this to 0.
351 */
352 static int atr_sample_rate = 20;
353 /* 
354 ** Flow Director actually 'steals'
355 ** part of the packet buffer as its
356 ** filter pool, this variable controls
357 ** how much it uses:
358 **  0 = 64K, 1 = 128K, 2 = 256K
359 */
360 static int fdir_pballoc = 1;
361 #endif
362
363 #ifdef DEV_NETMAP
364 /*
365  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
366  * be a reference on how to implement netmap support in a driver.
367  * Additional comments are in ixgbe_netmap.h .
368  *
369  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
370  * that extend the standard driver.
371  */
372 #include <dev/netmap/ixgbe_netmap.h>
373 #endif /* DEV_NETMAP */
374
375 /*********************************************************************
376  *  Device identification routine
377  *
378  *  ixgbe_probe determines if the driver should be loaded on
379  *  adapter based on PCI vendor/device id of the adapter.
380  *
381  *  return BUS_PROBE_DEFAULT on success, positive on failure
382  *********************************************************************/
383
384 static int
385 ixgbe_probe(device_t dev)
386 {
387         ixgbe_vendor_info_t *ent;
388
389         u16     pci_vendor_id = 0;
390         u16     pci_device_id = 0;
391         u16     pci_subvendor_id = 0;
392         u16     pci_subdevice_id = 0;
393         char    adapter_name[256];
394
395         INIT_DEBUGOUT("ixgbe_probe: begin");
396
397         pci_vendor_id = pci_get_vendor(dev);
398         if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID)
399                 return (ENXIO);
400
401         pci_device_id = pci_get_device(dev);
402         pci_subvendor_id = pci_get_subvendor(dev);
403         pci_subdevice_id = pci_get_subdevice(dev);
404
405         ent = ixgbe_vendor_info_array;
406         while (ent->vendor_id != 0) {
407                 if ((pci_vendor_id == ent->vendor_id) &&
408                     (pci_device_id == ent->device_id) &&
409
410                     ((pci_subvendor_id == ent->subvendor_id) ||
411                      (ent->subvendor_id == 0)) &&
412
413                     ((pci_subdevice_id == ent->subdevice_id) ||
414                      (ent->subdevice_id == 0))) {
415                         sprintf(adapter_name, "%s, Version - %s",
416                                 ixgbe_strings[ent->index],
417                                 ixgbe_driver_version);
418                         device_set_desc_copy(dev, adapter_name);
419                         ++ixgbe_total_ports;
420                         return (BUS_PROBE_DEFAULT);
421                 }
422                 ent++;
423         }
424         return (ENXIO);
425 }
426
427 /*********************************************************************
428  *  Device initialization routine
429  *
430  *  The attach entry point is called when the driver is being loaded.
431  *  This routine identifies the type of hardware, allocates all resources
432  *  and initializes the hardware.
433  *
434  *  return 0 on success, positive on failure
435  *********************************************************************/
436
437 static int
438 ixgbe_attach(device_t dev)
439 {
440         struct adapter *adapter;
441         struct ixgbe_hw *hw;
442         int             error = 0;
443         u16             csum;
444         u32             ctrl_ext;
445
446         INIT_DEBUGOUT("ixgbe_attach: begin");
447
448         /* Allocate, clear, and link in our adapter structure */
449         adapter = device_get_softc(dev);
450         adapter->dev = adapter->osdep.dev = dev;
451         hw = &adapter->hw;
452
453         /* Core Lock Init*/
454         IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
455
456         /* SYSCTL APIs */
457
458         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
459                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
460                         OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW,
461                         adapter, 0, ixgbe_set_flowcntl, "I", "Flow Control");
462
463         SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
464                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
465                         OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
466                         &ixgbe_enable_aim, 1, "Interrupt Moderation");
467
468         /*
469         ** Allow a kind of speed control by forcing the autoneg
470         ** advertised speed list to only a certain value, this
471         ** supports 1G on 82599 devices, and 100Mb on x540.
472         */
473         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
474                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
475                         OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW,
476                         adapter, 0, ixgbe_set_advertise, "I", "Link Speed");
477
478         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
479                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
480                         OID_AUTO, "ts", CTLTYPE_INT | CTLFLAG_RW, adapter,
481                         0, ixgbe_set_thermal_test, "I", "Thermal Test");
482
483         /* Set up the timer callout */
484         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
485
486         /* Determine hardware revision */
487         ixgbe_identify_hardware(adapter);
488
489         /* Do base PCI setup - map BAR0 */
490         if (ixgbe_allocate_pci_resources(adapter)) {
491                 device_printf(dev, "Allocation of PCI resources failed\n");
492                 error = ENXIO;
493                 goto err_out;
494         }
495
496         /* Do descriptor calc and sanity checks */
497         if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
498             ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
499                 device_printf(dev, "TXD config issue, using default!\n");
500                 adapter->num_tx_desc = DEFAULT_TXD;
501         } else
502                 adapter->num_tx_desc = ixgbe_txd;
503
504         /*
505         ** With many RX rings it is easy to exceed the
506         ** system mbuf allocation. Tuning nmbclusters
507         ** can alleviate this.
508         */
509         if (nmbclusters > 0 ) {
510                 int s;
511                 s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
512                 if (s > nmbclusters) {
513                         device_printf(dev, "RX Descriptors exceed "
514                             "system mbuf max, using default instead!\n");
515                         ixgbe_rxd = DEFAULT_RXD;
516                 }
517         }
518
519         if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
520             ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) {
521                 device_printf(dev, "RXD config issue, using default!\n");
522                 adapter->num_rx_desc = DEFAULT_RXD;
523         } else
524                 adapter->num_rx_desc = ixgbe_rxd;
525
526         /* Allocate our TX/RX Queues */
527         if (ixgbe_allocate_queues(adapter)) {
528                 error = ENOMEM;
529                 goto err_out;
530         }
531
532         /* Allocate multicast array memory. */
533         adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
534             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
535         if (adapter->mta == NULL) {
536                 device_printf(dev, "Can not allocate multicast setup array\n");
537                 error = ENOMEM;
538                 goto err_late;
539         }
540
541         /* Initialize the shared code */
542         hw->allow_unsupported_sfp = allow_unsupported_sfp;
543         error = ixgbe_init_shared_code(hw);
544         if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
545                 /*
546                 ** No optics in this port, set up
547                 ** so the timer routine will probe 
548                 ** for later insertion.
549                 */
550                 adapter->sfp_probe = TRUE;
551                 error = 0;
552         } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
553                 device_printf(dev,"Unsupported SFP+ module detected!\n");
554                 error = EIO;
555                 goto err_late;
556         } else if (error) {
557                 device_printf(dev,"Unable to initialize the shared code\n");
558                 error = EIO;
559                 goto err_late;
560         }
561
562         /* Make sure we have a good EEPROM before we read from it */
563         if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
564                 device_printf(dev,"The EEPROM Checksum Is Not Valid\n");
565                 error = EIO;
566                 goto err_late;
567         }
568
569         error = ixgbe_init_hw(hw);
570         switch (error) {
571         case IXGBE_ERR_EEPROM_VERSION:
572                 device_printf(dev, "This device is a pre-production adapter/"
573                     "LOM.  Please be aware there may be issues associated "
574                     "with your hardware.\n If you are experiencing problems "
575                     "please contact your Intel or hardware representative "
576                     "who provided you with this hardware.\n");
577                 break;
578         case IXGBE_ERR_SFP_NOT_SUPPORTED:
579                 device_printf(dev,"Unsupported SFP+ Module\n");
580                 error = EIO;
581                 goto err_late;
582         case IXGBE_ERR_SFP_NOT_PRESENT:
583                 device_printf(dev,"No SFP+ Module found\n");
584                 /* falls thru */
585         default:
586                 break;
587         }
588
589         /* Detect and set physical type */
590         ixgbe_setup_optics(adapter);
591
592         if ((adapter->msix > 1) && (ixgbe_enable_msix))
593                 error = ixgbe_allocate_msix(adapter); 
594         else
595                 error = ixgbe_allocate_legacy(adapter); 
596         if (error) 
597                 goto err_late;
598
599         /* Setup OS specific network interface */
600         if (ixgbe_setup_interface(dev, adapter) != 0)
601                 goto err_late;
602
603         /* Initialize statistics */
604         ixgbe_update_stats_counters(adapter);
605
606         /* Register for VLAN events */
607         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
608             ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
609         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
610             ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
611
612         /*
613         ** Check PCIE slot type/speed/width
614         */
615         ixgbe_get_slot_info(hw);
616
617         /* Set an initial default flow control value */
618         adapter->fc =  ixgbe_fc_full;
619
620         /* let hardware know driver is loaded */
621         ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
622         ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
623         IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
624
625         ixgbe_add_hw_stats(adapter);
626
627 #ifdef DEV_NETMAP
628         ixgbe_netmap_attach(adapter);
629 #endif /* DEV_NETMAP */
630         INIT_DEBUGOUT("ixgbe_attach: end");
631         return (0);
632 err_late:
633         ixgbe_free_transmit_structures(adapter);
634         ixgbe_free_receive_structures(adapter);
635 err_out:
636         if (adapter->ifp != NULL)
637                 if_free(adapter->ifp);
638         ixgbe_free_pci_resources(adapter);
639         free(adapter->mta, M_DEVBUF);
640         return (error);
641
642 }
643
644 /*********************************************************************
645  *  Device removal routine
646  *
647  *  The detach entry point is called when the driver is being removed.
648  *  This routine stops the adapter and deallocates all the resources
649  *  that were allocated for driver operation.
650  *
651  *  return 0 on success, positive on failure
652  *********************************************************************/
653
654 static int
655 ixgbe_detach(device_t dev)
656 {
657         struct adapter *adapter = device_get_softc(dev);
658         struct ix_queue *que = adapter->queues;
659         struct tx_ring *txr = adapter->tx_rings;
660         u32     ctrl_ext;
661
662         INIT_DEBUGOUT("ixgbe_detach: begin");
663
664         /* Make sure VLANS are not using driver */
665         if (adapter->ifp->if_vlantrunk != NULL) {
666                 device_printf(dev,"Vlan in use, detach first\n");
667                 return (EBUSY);
668         }
669
670         IXGBE_CORE_LOCK(adapter);
671         ixgbe_stop(adapter);
672         IXGBE_CORE_UNLOCK(adapter);
673
674         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
675                 if (que->tq) {
676 #ifndef IXGBE_LEGACY_TX
677                         taskqueue_drain(que->tq, &txr->txq_task);
678 #endif
679                         taskqueue_drain(que->tq, &que->que_task);
680                         taskqueue_free(que->tq);
681                 }
682         }
683
684         /* Drain the Link queue */
685         if (adapter->tq) {
686                 taskqueue_drain(adapter->tq, &adapter->link_task);
687                 taskqueue_drain(adapter->tq, &adapter->mod_task);
688                 taskqueue_drain(adapter->tq, &adapter->msf_task);
689 #ifdef IXGBE_FDIR
690                 taskqueue_drain(adapter->tq, &adapter->fdir_task);
691 #endif
692                 taskqueue_free(adapter->tq);
693         }
694
695         /* let hardware know driver is unloading */
696         ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
697         ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
698         IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
699
700         /* Unregister VLAN events */
701         if (adapter->vlan_attach != NULL)
702                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
703         if (adapter->vlan_detach != NULL)
704                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
705
706         ether_ifdetach(adapter->ifp);
707         callout_drain(&adapter->timer);
708 #ifdef DEV_NETMAP
709         netmap_detach(adapter->ifp);
710 #endif /* DEV_NETMAP */
711         ixgbe_free_pci_resources(adapter);
712         bus_generic_detach(dev);
713         if_free(adapter->ifp);
714
715         ixgbe_free_transmit_structures(adapter);
716         ixgbe_free_receive_structures(adapter);
717         free(adapter->mta, M_DEVBUF);
718
719         IXGBE_CORE_LOCK_DESTROY(adapter);
720         return (0);
721 }
722
723 /*********************************************************************
724  *
725  *  Shutdown entry point
726  *
727  **********************************************************************/
728
729 static int
730 ixgbe_shutdown(device_t dev)
731 {
732         struct adapter *adapter = device_get_softc(dev);
733         IXGBE_CORE_LOCK(adapter);
734         ixgbe_stop(adapter);
735         IXGBE_CORE_UNLOCK(adapter);
736         return (0);
737 }
738
739
740 #ifdef IXGBE_LEGACY_TX
741 /*********************************************************************
742  *  Transmit entry point
743  *
744  *  ixgbe_start is called by the stack to initiate a transmit.
745  *  The driver will remain in this routine as long as there are
746  *  packets to transmit and transmit resources are available.
747  *  In case resources are not available stack is notified and
748  *  the packet is requeued.
749  **********************************************************************/
750
751 static void
752 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
753 {
754         struct mbuf    *m_head;
755         struct adapter *adapter = txr->adapter;
756
757         IXGBE_TX_LOCK_ASSERT(txr);
758
759         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
760                 return;
761         if (!adapter->link_active)
762                 return;
763
764         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
765                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
766                         break;
767
768                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
769                 if (m_head == NULL)
770                         break;
771
772                 if (ixgbe_xmit(txr, &m_head)) {
773                         if (m_head != NULL)
774                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
775                         break;
776                 }
777                 /* Send a copy of the frame to the BPF listener */
778                 ETHER_BPF_MTAP(ifp, m_head);
779
780                 /* Set watchdog on */
781                 txr->watchdog_time = ticks;
782                 txr->queue_status = IXGBE_QUEUE_WORKING;
783
784         }
785         return;
786 }
787
788 /*
789  * Legacy TX start - called by the stack, this
790  * always uses the first tx ring, and should
791  * not be used with multiqueue tx enabled.
792  */
793 static void
794 ixgbe_start(struct ifnet *ifp)
795 {
796         struct adapter *adapter = ifp->if_softc;
797         struct tx_ring  *txr = adapter->tx_rings;
798
799         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
800                 IXGBE_TX_LOCK(txr);
801                 ixgbe_start_locked(txr, ifp);
802                 IXGBE_TX_UNLOCK(txr);
803         }
804         return;
805 }
806
807 #else /* ! IXGBE_LEGACY_TX */
808
809 /*
810 ** Multiqueue Transmit driver
811 **
812 */
813 static int
814 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
815 {
816         struct adapter  *adapter = ifp->if_softc;
817         struct ix_queue *que;
818         struct tx_ring  *txr;
819         int             i, err = 0;
820
821         /* Which queue to use */
822         if ((m->m_flags & M_FLOWID) != 0)
823                 i = m->m_pkthdr.flowid % adapter->num_queues;
824         else
825                 i = curcpu % adapter->num_queues;
826
827         txr = &adapter->tx_rings[i];
828         que = &adapter->queues[i];
829
830         err = drbr_enqueue(ifp, txr->br, m);
831         if (err)
832                 return (err);
833         if (IXGBE_TX_TRYLOCK(txr)) {
834                 err = ixgbe_mq_start_locked(ifp, txr);
835                 IXGBE_TX_UNLOCK(txr);
836         } else
837                 taskqueue_enqueue(que->tq, &txr->txq_task);
838
839         return (err);
840 }
841
842 static int
843 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
844 {
845         struct adapter  *adapter = txr->adapter;
846         struct mbuf     *next;
847         int             enqueued = 0, err = 0;
848
849         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
850             adapter->link_active == 0)
851                 return (ENETDOWN);
852
853         /* Process the queue */
854 #if __FreeBSD_version < 901504
855         next = drbr_dequeue(ifp, txr->br);
856         while (next != NULL) {
857                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
858                         if (next != NULL)
859                                 err = drbr_enqueue(ifp, txr->br, next);
860 #else
861         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
862                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
863                         if (next == NULL) {
864                                 drbr_advance(ifp, txr->br);
865                         } else {
866                                 drbr_putback(ifp, txr->br, next);
867                         }
868 #endif
869                         break;
870                 }
871 #if __FreeBSD_version >= 901504
872                 drbr_advance(ifp, txr->br);
873 #endif
874                 enqueued++;
875                 /* Send a copy of the frame to the BPF listener */
876                 ETHER_BPF_MTAP(ifp, next);
877                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
878                         break;
879 #if __FreeBSD_version < 901504
880                 next = drbr_dequeue(ifp, txr->br);
881 #endif
882         }
883
884         if (enqueued > 0) {
885                 /* Set watchdog on */
886                 txr->queue_status = IXGBE_QUEUE_WORKING;
887                 txr->watchdog_time = ticks;
888         }
889
890         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
891                 ixgbe_txeof(txr);
892
893         return (err);
894 }
895
896 /*
897  * Called from a taskqueue to drain queued transmit packets.
898  */
899 static void
900 ixgbe_deferred_mq_start(void *arg, int pending)
901 {
902         struct tx_ring *txr = arg;
903         struct adapter *adapter = txr->adapter;
904         struct ifnet *ifp = adapter->ifp;
905
906         IXGBE_TX_LOCK(txr);
907         if (!drbr_empty(ifp, txr->br))
908                 ixgbe_mq_start_locked(ifp, txr);
909         IXGBE_TX_UNLOCK(txr);
910 }
911
912 /*
913 ** Flush all ring buffers
914 */
915 static void
916 ixgbe_qflush(struct ifnet *ifp)
917 {
918         struct adapter  *adapter = ifp->if_softc;
919         struct tx_ring  *txr = adapter->tx_rings;
920         struct mbuf     *m;
921
922         for (int i = 0; i < adapter->num_queues; i++, txr++) {
923                 IXGBE_TX_LOCK(txr);
924                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
925                         m_freem(m);
926                 IXGBE_TX_UNLOCK(txr);
927         }
928         if_qflush(ifp);
929 }
930 #endif /* IXGBE_LEGACY_TX */
931
932 /*********************************************************************
933  *  Ioctl entry point
934  *
935  *  ixgbe_ioctl is called when the user wants to configure the
936  *  interface.
937  *
938  *  return 0 on success, positive on failure
939  **********************************************************************/
940
941 static int
942 ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data)
943 {
944         struct adapter  *adapter = ifp->if_softc;
945         struct ixgbe_hw *hw = &adapter->hw;
946         struct ifreq    *ifr = (struct ifreq *) data;
947 #if defined(INET) || defined(INET6)
948         struct ifaddr *ifa = (struct ifaddr *)data;
949         bool            avoid_reset = FALSE;
950 #endif
951         int             error = 0;
952
953         switch (command) {
954
955         case SIOCSIFADDR:
956 #ifdef INET
957                 if (ifa->ifa_addr->sa_family == AF_INET)
958                         avoid_reset = TRUE;
959 #endif
960 #ifdef INET6
961                 if (ifa->ifa_addr->sa_family == AF_INET6)
962                         avoid_reset = TRUE;
963 #endif
964 #if defined(INET) || defined(INET6)
965                 /*
966                 ** Calling init results in link renegotiation,
967                 ** so we avoid doing it when possible.
968                 */
969                 if (avoid_reset) {
970                         ifp->if_flags |= IFF_UP;
971                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
972                                 ixgbe_init(adapter);
973                         if (!(ifp->if_flags & IFF_NOARP))
974                                 arp_ifinit(ifp, ifa);
975                 } else
976                         error = ether_ioctl(ifp, command, data);
977 #endif
978                 break;
979         case SIOCSIFMTU:
980                 IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
981                 if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
982                         error = EINVAL;
983                 } else {
984                         IXGBE_CORE_LOCK(adapter);
985                         ifp->if_mtu = ifr->ifr_mtu;
986                         adapter->max_frame_size =
987                                 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
988                         ixgbe_init_locked(adapter);
989                         IXGBE_CORE_UNLOCK(adapter);
990                 }
991                 break;
992         case SIOCSIFFLAGS:
993                 IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
994                 IXGBE_CORE_LOCK(adapter);
995                 if (ifp->if_flags & IFF_UP) {
996                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
997                                 if ((ifp->if_flags ^ adapter->if_flags) &
998                                     (IFF_PROMISC | IFF_ALLMULTI)) {
999                                         ixgbe_set_promisc(adapter);
1000                                 }
1001                         } else
1002                                 ixgbe_init_locked(adapter);
1003                 } else
1004                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1005                                 ixgbe_stop(adapter);
1006                 adapter->if_flags = ifp->if_flags;
1007                 IXGBE_CORE_UNLOCK(adapter);
1008                 break;
1009         case SIOCADDMULTI:
1010         case SIOCDELMULTI:
1011                 IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
1012                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1013                         IXGBE_CORE_LOCK(adapter);
1014                         ixgbe_disable_intr(adapter);
1015                         ixgbe_set_multi(adapter);
1016                         ixgbe_enable_intr(adapter);
1017                         IXGBE_CORE_UNLOCK(adapter);
1018                 }
1019                 break;
1020         case SIOCSIFMEDIA:
1021         case SIOCGIFMEDIA:
1022                 IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
1023                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1024                 break;
1025         case SIOCSIFCAP:
1026         {
1027                 int mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1028                 IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
1029                 if (mask & IFCAP_HWCSUM)
1030                         ifp->if_capenable ^= IFCAP_HWCSUM;
1031                 if (mask & IFCAP_TSO4)
1032                         ifp->if_capenable ^= IFCAP_TSO4;
1033                 if (mask & IFCAP_TSO6)
1034                         ifp->if_capenable ^= IFCAP_TSO6;
1035                 if (mask & IFCAP_LRO)
1036                         ifp->if_capenable ^= IFCAP_LRO;
1037                 if (mask & IFCAP_VLAN_HWTAGGING)
1038                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1039                 if (mask & IFCAP_VLAN_HWFILTER)
1040                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1041                 if (mask & IFCAP_VLAN_HWTSO)
1042                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1043                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1044                         IXGBE_CORE_LOCK(adapter);
1045                         ixgbe_init_locked(adapter);
1046                         IXGBE_CORE_UNLOCK(adapter);
1047                 }
1048                 VLAN_CAPABILITIES(ifp);
1049                 break;
1050         }
1051         case SIOCGI2C:
1052         {
1053                 struct ixgbe_i2c_req    i2c;
1054                 IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)");
1055                 error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
1056                 if (error)
1057                         break;
1058                 if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
1059                         error = EINVAL;
1060                         break;
1061                 }
1062                 hw->phy.ops.read_i2c_byte(hw, i2c.offset,
1063                     i2c.dev_addr, i2c.data);
1064                 error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
1065                 break;
1066         }
1067         default:
1068                 IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
1069                 error = ether_ioctl(ifp, command, data);
1070                 break;
1071         }
1072
1073         return (error);
1074 }
1075
1076 /*********************************************************************
1077  *  Init entry point
1078  *
1079  *  This routine is used in two ways. It is used by the stack as
1080  *  init entry point in network interface structure. It is also used
1081  *  by the driver as a hw/sw initialization routine to get to a
1082  *  consistent state.
1083  *
1084  *  return 0 on success, positive on failure
1085  **********************************************************************/
1086 #define IXGBE_MHADD_MFS_SHIFT 16
1087
1088 static void
1089 ixgbe_init_locked(struct adapter *adapter)
1090 {
1091         struct ifnet   *ifp = adapter->ifp;
1092         device_t        dev = adapter->dev;
1093         struct ixgbe_hw *hw = &adapter->hw;
1094         u32             k, txdctl, mhadd, gpie;
1095         u32             rxdctl, rxctrl;
1096
1097         mtx_assert(&adapter->core_mtx, MA_OWNED);
1098         INIT_DEBUGOUT("ixgbe_init_locked: begin");
1099         hw->adapter_stopped = FALSE;
1100         ixgbe_stop_adapter(hw);
1101         callout_stop(&adapter->timer);
1102
1103         /* reprogram the RAR[0] in case user changed it. */
1104         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
1105
1106         /* Get the latest mac address, User can use a LAA */
1107         bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr,
1108               IXGBE_ETH_LENGTH_OF_ADDRESS);
1109         ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
1110         hw->addr_ctrl.rar_used_count = 1;
1111
1112         /* Set the various hardware offload abilities */
1113         ifp->if_hwassist = 0;
1114         if (ifp->if_capenable & IFCAP_TSO)
1115                 ifp->if_hwassist |= CSUM_TSO;
1116         if (ifp->if_capenable & IFCAP_TXCSUM) {
1117                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1118 #if __FreeBSD_version >= 800000
1119                 if (hw->mac.type != ixgbe_mac_82598EB)
1120                         ifp->if_hwassist |= CSUM_SCTP;
1121 #endif
1122         }
1123
1124         /* Prepare transmit descriptors and buffers */
1125         if (ixgbe_setup_transmit_structures(adapter)) {
1126                 device_printf(dev,"Could not setup transmit structures\n");
1127                 ixgbe_stop(adapter);
1128                 return;
1129         }
1130
1131         ixgbe_init_hw(hw);
1132         ixgbe_initialize_transmit_units(adapter);
1133
1134         /* Setup Multicast table */
1135         ixgbe_set_multi(adapter);
1136
1137         /*
1138         ** Determine the correct mbuf pool
1139         ** for doing jumbo frames
1140         */
1141         if (adapter->max_frame_size <= 2048)
1142                 adapter->rx_mbuf_sz = MCLBYTES;
1143         else if (adapter->max_frame_size <= 4096)
1144                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1145         else if (adapter->max_frame_size <= 9216)
1146                 adapter->rx_mbuf_sz = MJUM9BYTES;
1147         else
1148                 adapter->rx_mbuf_sz = MJUM16BYTES;
1149
1150         /* Prepare receive descriptors and buffers */
1151         if (ixgbe_setup_receive_structures(adapter)) {
1152                 device_printf(dev,"Could not setup receive structures\n");
1153                 ixgbe_stop(adapter);
1154                 return;
1155         }
1156
1157         /* Configure RX settings */
1158         ixgbe_initialize_receive_units(adapter);
1159
1160         gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
1161
1162         /* Enable Fan Failure Interrupt */
1163         gpie |= IXGBE_SDP1_GPIEN;
1164
1165         /* Add for Module detection */
1166         if (hw->mac.type == ixgbe_mac_82599EB)
1167                 gpie |= IXGBE_SDP2_GPIEN;
1168
1169         /* Thermal Failure Detection */
1170         if (hw->mac.type == ixgbe_mac_X540)
1171                 gpie |= IXGBE_SDP0_GPIEN;
1172
1173         if (adapter->msix > 1) {
1174                 /* Enable Enhanced MSIX mode */
1175                 gpie |= IXGBE_GPIE_MSIX_MODE;
1176                 gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
1177                     IXGBE_GPIE_OCD;
1178         }
1179         IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
1180
1181         /* Set MTU size */
1182         if (ifp->if_mtu > ETHERMTU) {
1183                 mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
1184                 mhadd &= ~IXGBE_MHADD_MFS_MASK;
1185                 mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
1186                 IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
1187         }
1188         
1189         /* Now enable all the queues */
1190
1191         for (int i = 0; i < adapter->num_queues; i++) {
1192                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
1193                 txdctl |= IXGBE_TXDCTL_ENABLE;
1194                 /* Set WTHRESH to 8, burst writeback */
1195                 txdctl |= (8 << 16);
1196                 /*
1197                  * When the internal queue falls below PTHRESH (32),
1198                  * start prefetching as long as there are at least
1199                  * HTHRESH (1) buffers ready. The values are taken
1200                  * from the Intel linux driver 3.8.21.
1201                  * Prefetching enables tx line rate even with 1 queue.
1202                  */
1203                 txdctl |= (32 << 0) | (1 << 8);
1204                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
1205         }
1206
1207         for (int i = 0; i < adapter->num_queues; i++) {
1208                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
1209                 if (hw->mac.type == ixgbe_mac_82598EB) {
1210                         /*
1211                         ** PTHRESH = 21
1212                         ** HTHRESH = 4
1213                         ** WTHRESH = 8
1214                         */
1215                         rxdctl &= ~0x3FFFFF;
1216                         rxdctl |= 0x080420;
1217                 }
1218                 rxdctl |= IXGBE_RXDCTL_ENABLE;
1219                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
1220                 for (k = 0; k < 10; k++) {
1221                         if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
1222                             IXGBE_RXDCTL_ENABLE)
1223                                 break;
1224                         else
1225                                 msec_delay(1);
1226                 }
1227                 wmb();
1228 #ifdef DEV_NETMAP
1229                 /*
1230                  * In netmap mode, we must preserve the buffers made
1231                  * available to userspace before the if_init()
1232                  * (this is true by default on the TX side, because
1233                  * init makes all buffers available to userspace).
1234                  *
1235                  * netmap_reset() and the device specific routines
1236                  * (e.g. ixgbe_setup_receive_rings()) map these
1237                  * buffers at the end of the NIC ring, so here we
1238                  * must set the RDT (tail) register to make sure
1239                  * they are not overwritten.
1240                  *
1241                  * In this driver the NIC ring starts at RDH = 0,
1242                  * RDT points to the last slot available for reception (?),
1243                  * so RDT = num_rx_desc - 1 means the whole ring is available.
1244                  */
1245                 if (ifp->if_capenable & IFCAP_NETMAP) {
1246                         struct netmap_adapter *na = NA(adapter->ifp);
1247                         struct netmap_kring *kring = &na->rx_rings[i];
1248                         int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
1249
1250                         IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
1251                 } else
1252 #endif /* DEV_NETMAP */
1253                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
1254         }
1255
1256         /* Set up VLAN support and filter */
1257         ixgbe_setup_vlan_hw_support(adapter);
1258
1259         /* Enable Receive engine */
1260         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1261         if (hw->mac.type == ixgbe_mac_82598EB)
1262                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
1263         rxctrl |= IXGBE_RXCTRL_RXEN;
1264         ixgbe_enable_rx_dma(hw, rxctrl);
1265
1266         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
1267
1268         /* Set up MSI/X routing */
1269         if (ixgbe_enable_msix)  {
1270                 ixgbe_configure_ivars(adapter);
1271                 /* Set up auto-mask */
1272                 if (hw->mac.type == ixgbe_mac_82598EB)
1273                         IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1274                 else {
1275                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1276                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1277                 }
1278         } else {  /* Simple settings for Legacy/MSI */
1279                 ixgbe_set_ivar(adapter, 0, 0, 0);
1280                 ixgbe_set_ivar(adapter, 0, 0, 1);
1281                 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1282         }
1283
1284 #ifdef IXGBE_FDIR
1285         /* Init Flow director */
1286         if (hw->mac.type != ixgbe_mac_82598EB) {
1287                 u32 hdrm = 32 << fdir_pballoc;
1288
1289                 hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
1290                 ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
1291         }
1292 #endif
1293
1294         /*
1295         ** Check on any SFP devices that
1296         ** need to be kick-started
1297         */
1298         if (hw->phy.type == ixgbe_phy_none) {
1299                 int err = hw->phy.ops.identify(hw);
1300                 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1301                         device_printf(dev,
1302                             "Unsupported SFP+ module type was detected.\n");
1303                         return;
1304                 }
1305         }
1306
1307         /* Set moderation on the Link interrupt */
1308         IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
1309
1310         /* Config/Enable Link */
1311         ixgbe_config_link(adapter);
1312
1313         /* Hardware Packet Buffer & Flow Control setup */
1314         {
1315                 u32 rxpb, frame, size, tmp;
1316
1317                 frame = adapter->max_frame_size;
1318
1319                 /* Calculate High Water */
1320                 if (hw->mac.type == ixgbe_mac_X540)
1321                         tmp = IXGBE_DV_X540(frame, frame);
1322                 else
1323                         tmp = IXGBE_DV(frame, frame);
1324                 size = IXGBE_BT2KB(tmp);
1325                 rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
1326                 hw->fc.high_water[0] = rxpb - size;
1327
1328                 /* Now calculate Low Water */
1329                 if (hw->mac.type == ixgbe_mac_X540)
1330                         tmp = IXGBE_LOW_DV_X540(frame);
1331                 else
1332                         tmp = IXGBE_LOW_DV(frame);
1333                 hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1334                 
1335                 hw->fc.requested_mode = adapter->fc;
1336                 hw->fc.pause_time = IXGBE_FC_PAUSE;
1337                 hw->fc.send_xon = TRUE;
1338         }
1339         /* Initialize the FC settings */
1340         ixgbe_start_hw(hw);
1341
1342         /* And now turn on interrupts */
1343         ixgbe_enable_intr(adapter);
1344
1345         /* Now inform the stack we're ready */
1346         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1347
1348         return;
1349 }
1350
1351 static void
1352 ixgbe_init(void *arg)
1353 {
1354         struct adapter *adapter = arg;
1355
1356         IXGBE_CORE_LOCK(adapter);
1357         ixgbe_init_locked(adapter);
1358         IXGBE_CORE_UNLOCK(adapter);
1359         return;
1360 }
1361
1362
1363 /*
1364 **
1365 ** MSIX Interrupt Handlers and Tasklets
1366 **
1367 */
1368
1369 static inline void
1370 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
1371 {
1372         struct ixgbe_hw *hw = &adapter->hw;
1373         u64     queue = (u64)(1 << vector);
1374         u32     mask;
1375
1376         if (hw->mac.type == ixgbe_mac_82598EB) {
1377                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1378                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
1379         } else {
1380                 mask = (queue & 0xFFFFFFFF);
1381                 if (mask)
1382                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
1383                 mask = (queue >> 32);
1384                 if (mask)
1385                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
1386         }
1387 }
1388
1389 static inline void
1390 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
1391 {
1392         struct ixgbe_hw *hw = &adapter->hw;
1393         u64     queue = (u64)(1 << vector);
1394         u32     mask;
1395
1396         if (hw->mac.type == ixgbe_mac_82598EB) {
1397                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1398                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
1399         } else {
1400                 mask = (queue & 0xFFFFFFFF);
1401                 if (mask)
1402                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
1403                 mask = (queue >> 32);
1404                 if (mask)
1405                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
1406         }
1407 }
1408
1409 static void
1410 ixgbe_handle_que(void *context, int pending)
1411 {
1412         struct ix_queue *que = context;
1413         struct adapter  *adapter = que->adapter;
1414         struct tx_ring  *txr = que->txr;
1415         struct ifnet    *ifp = adapter->ifp;
1416         bool            more;
1417
1418         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1419                 more = ixgbe_rxeof(que);
1420                 IXGBE_TX_LOCK(txr);
1421                 ixgbe_txeof(txr);
1422 #ifndef IXGBE_LEGACY_TX
1423                 if (!drbr_empty(ifp, txr->br))
1424                         ixgbe_mq_start_locked(ifp, txr);
1425 #else
1426                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1427                         ixgbe_start_locked(txr, ifp);
1428 #endif
1429                 IXGBE_TX_UNLOCK(txr);
1430         }
1431
1432         /* Reenable this interrupt */
1433         if (que->res != NULL)
1434                 ixgbe_enable_queue(adapter, que->msix);
1435         else
1436                 ixgbe_enable_intr(adapter);
1437         return;
1438 }
1439
1440
1441 /*********************************************************************
1442  *
1443  *  Legacy Interrupt Service routine
1444  *
1445  **********************************************************************/
1446
1447 static void
1448 ixgbe_legacy_irq(void *arg)
1449 {
1450         struct ix_queue *que = arg;
1451         struct adapter  *adapter = que->adapter;
1452         struct ixgbe_hw *hw = &adapter->hw;
1453         struct ifnet    *ifp = adapter->ifp;
1454         struct          tx_ring *txr = adapter->tx_rings;
1455         bool            more;
1456         u32             reg_eicr;
1457
1458
1459         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1460
1461         ++que->irqs;
1462         if (reg_eicr == 0) {
1463                 ixgbe_enable_intr(adapter);
1464                 return;
1465         }
1466
1467         more = ixgbe_rxeof(que);
1468
1469         IXGBE_TX_LOCK(txr);
1470         ixgbe_txeof(txr);
1471 #ifdef IXGBE_LEGACY_TX
1472         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1473                 ixgbe_start_locked(txr, ifp);
1474 #else
1475         if (!drbr_empty(ifp, txr->br))
1476                 ixgbe_mq_start_locked(ifp, txr);
1477 #endif
1478         IXGBE_TX_UNLOCK(txr);
1479
1480         /* Check for fan failure */
1481         if ((hw->phy.media_type == ixgbe_media_type_copper) &&
1482             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1483                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1484                     "REPLACE IMMEDIATELY!!\n");
1485                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
1486         }
1487
1488         /* Link status change */
1489         if (reg_eicr & IXGBE_EICR_LSC)
1490                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1491
1492         if (more)
1493                 taskqueue_enqueue(que->tq, &que->que_task);
1494         else
1495                 ixgbe_enable_intr(adapter);
1496         return;
1497 }
1498
1499
1500 /*********************************************************************
1501  *
1502  *  MSIX Queue Interrupt Service routine
1503  *
1504  **********************************************************************/
1505 void
1506 ixgbe_msix_que(void *arg)
1507 {
1508         struct ix_queue *que = arg;
1509         struct adapter  *adapter = que->adapter;
1510         struct ifnet    *ifp = adapter->ifp;
1511         struct tx_ring  *txr = que->txr;
1512         struct rx_ring  *rxr = que->rxr;
1513         bool            more;
1514         u32             newitr = 0;
1515
1516         /* Protect against spurious interrupts */
1517         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1518                 return;
1519
1520         ixgbe_disable_queue(adapter, que->msix);
1521         ++que->irqs;
1522
1523         more = ixgbe_rxeof(que);
1524
1525         IXGBE_TX_LOCK(txr);
1526         ixgbe_txeof(txr);
1527 #ifdef IXGBE_LEGACY_TX
1528         if (!IFQ_DRV_IS_EMPTY(ifp->if_snd))
1529                 ixgbe_start_locked(txr, ifp);
1530 #else
1531         if (!drbr_empty(ifp, txr->br))
1532                 ixgbe_mq_start_locked(ifp, txr);
1533 #endif
1534         IXGBE_TX_UNLOCK(txr);
1535
1536         /* Do AIM now? */
1537
1538         if (ixgbe_enable_aim == FALSE)
1539                 goto no_calc;
1540         /*
1541         ** Do Adaptive Interrupt Moderation:
1542         **  - Write out last calculated setting
1543         **  - Calculate based on average size over
1544         **    the last interval.
1545         */
1546         if (que->eitr_setting)
1547                 IXGBE_WRITE_REG(&adapter->hw,
1548                     IXGBE_EITR(que->msix), que->eitr_setting);
1549  
1550         que->eitr_setting = 0;
1551
1552         /* Idle, do nothing */
1553         if ((txr->bytes == 0) && (rxr->bytes == 0))
1554                 goto no_calc;
1555                                 
1556         if ((txr->bytes) && (txr->packets))
1557                 newitr = txr->bytes/txr->packets;
1558         if ((rxr->bytes) && (rxr->packets))
1559                 newitr = max(newitr,
1560                     (rxr->bytes / rxr->packets));
1561         newitr += 24; /* account for hardware frame, crc */
1562
1563         /* set an upper boundary */
1564         newitr = min(newitr, 3000);
1565
1566         /* Be nice to the mid range */
1567         if ((newitr > 300) && (newitr < 1200))
1568                 newitr = (newitr / 3);
1569         else
1570                 newitr = (newitr / 2);
1571
1572         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
1573                 newitr |= newitr << 16;
1574         else
1575                 newitr |= IXGBE_EITR_CNT_WDIS;
1576                  
1577         /* save for next interrupt */
1578         que->eitr_setting = newitr;
1579
1580         /* Reset state */
1581         txr->bytes = 0;
1582         txr->packets = 0;
1583         rxr->bytes = 0;
1584         rxr->packets = 0;
1585
1586 no_calc:
1587         if (more)
1588                 taskqueue_enqueue(que->tq, &que->que_task);
1589         else
1590                 ixgbe_enable_queue(adapter, que->msix);
1591         return;
1592 }
1593
1594
1595 static void
1596 ixgbe_msix_link(void *arg)
1597 {
1598         struct adapter  *adapter = arg;
1599         struct ixgbe_hw *hw = &adapter->hw;
1600         u32             reg_eicr;
1601
1602         ++adapter->link_irq;
1603
1604         /* First get the cause */
1605         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
1606         /* Be sure the queue bits are not cleared */
1607         reg_eicr &= ~IXGBE_EICR_RTX_QUEUE;
1608         /* Clear interrupt with write */
1609         IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
1610
1611         /* Link status change */
1612         if (reg_eicr & IXGBE_EICR_LSC)
1613                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1614
1615         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
1616 #ifdef IXGBE_FDIR
1617                 if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
1618                         /* This is probably overkill :) */
1619                         if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
1620                                 return;
1621                         /* Disable the interrupt */
1622                         IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
1623                         taskqueue_enqueue(adapter->tq, &adapter->fdir_task);
1624                 } else
1625 #endif
1626                 if (reg_eicr & IXGBE_EICR_ECC) {
1627                         device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
1628                             "Please Reboot!!\n");
1629                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
1630                 } else
1631
1632                 if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
1633                         /* Clear the interrupt */
1634                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1635                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
1636                 } else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
1637                         /* Clear the interrupt */
1638                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
1639                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
1640                 }
1641         } 
1642
1643         /* Check for fan failure */
1644         if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
1645             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1646                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1647                     "REPLACE IMMEDIATELY!!\n");
1648                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1649         }
1650
1651         /* Check for over temp condition */
1652         if ((hw->mac.type == ixgbe_mac_X540) &&
1653             (reg_eicr & IXGBE_EICR_TS)) {
1654                 device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
1655                     "PHY IS SHUT DOWN!!\n");
1656                 device_printf(adapter->dev, "System shutdown required\n");
1657                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
1658         }
1659
1660         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
1661         return;
1662 }
1663
1664 /*********************************************************************
1665  *
1666  *  Media Ioctl callback
1667  *
1668  *  This routine is called whenever the user queries the status of
1669  *  the interface using ifconfig.
1670  *
1671  **********************************************************************/
1672 static void
1673 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
1674 {
1675         struct adapter *adapter = ifp->if_softc;
1676
1677         INIT_DEBUGOUT("ixgbe_media_status: begin");
1678         IXGBE_CORE_LOCK(adapter);
1679         ixgbe_update_link_status(adapter);
1680
1681         ifmr->ifm_status = IFM_AVALID;
1682         ifmr->ifm_active = IFM_ETHER;
1683
1684         if (!adapter->link_active) {
1685                 IXGBE_CORE_UNLOCK(adapter);
1686                 return;
1687         }
1688
1689         ifmr->ifm_status |= IFM_ACTIVE;
1690
1691         switch (adapter->link_speed) {
1692                 case IXGBE_LINK_SPEED_100_FULL:
1693                         ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1694                         break;
1695                 case IXGBE_LINK_SPEED_1GB_FULL:
1696                         ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1697                         break;
1698                 case IXGBE_LINK_SPEED_10GB_FULL:
1699                         ifmr->ifm_active |= adapter->optics | IFM_FDX;
1700                         break;
1701         }
1702
1703         IXGBE_CORE_UNLOCK(adapter);
1704
1705         return;
1706 }
1707
1708 /*********************************************************************
1709  *
1710  *  Media Ioctl callback
1711  *
1712  *  This routine is called when the user changes speed/duplex using
1713  *  media/mediopt option with ifconfig.
1714  *
1715  **********************************************************************/
1716 static int
1717 ixgbe_media_change(struct ifnet * ifp)
1718 {
1719         struct adapter *adapter = ifp->if_softc;
1720         struct ifmedia *ifm = &adapter->media;
1721
1722         INIT_DEBUGOUT("ixgbe_media_change: begin");
1723
1724         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1725                 return (EINVAL);
1726
1727         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1728         case IFM_AUTO:
1729                 adapter->hw.phy.autoneg_advertised =
1730                     IXGBE_LINK_SPEED_100_FULL |
1731                     IXGBE_LINK_SPEED_1GB_FULL |
1732                     IXGBE_LINK_SPEED_10GB_FULL;
1733                 break;
1734         default:
1735                 device_printf(adapter->dev, "Only auto media type\n");
1736                 return (EINVAL);
1737         }
1738
1739         return (0);
1740 }
1741
1742 /*********************************************************************
1743  *
1744  *  This routine maps the mbufs to tx descriptors, allowing the
1745  *  TX engine to transmit the packets. 
1746  *      - return 0 on success, positive on failure
1747  *
1748  **********************************************************************/
1749
1750 static int
1751 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1752 {
1753         struct adapter  *adapter = txr->adapter;
1754         u32             olinfo_status = 0, cmd_type_len;
1755         int             i, j, error, nsegs;
1756         int             first;
1757         bool            remap = TRUE;
1758         struct mbuf     *m_head;
1759         bus_dma_segment_t segs[adapter->num_segs];
1760         bus_dmamap_t    map;
1761         struct ixgbe_tx_buf *txbuf;
1762         union ixgbe_adv_tx_desc *txd = NULL;
1763
1764         m_head = *m_headp;
1765
1766         /* Basic descriptor defines */
1767         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1768             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1769
1770         if (m_head->m_flags & M_VLANTAG)
1771                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1772
1773         /*
1774          * Important to capture the first descriptor
1775          * used because it will contain the index of
1776          * the one we tell the hardware to report back
1777          */
1778         first = txr->next_avail_desc;
1779         txbuf = &txr->tx_buffers[first];
1780         map = txbuf->map;
1781
1782         /*
1783          * Map the packet for DMA.
1784          */
1785 retry:
1786         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1787             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1788
1789         if (__predict_false(error)) {
1790                 struct mbuf *m;
1791
1792                 switch (error) {
1793                 case EFBIG:
1794                         /* Try it again? - one try */
1795                         if (remap == TRUE) {
1796                                 remap = FALSE;
1797                                 m = m_defrag(*m_headp, M_NOWAIT);
1798                                 if (m == NULL) {
1799                                         adapter->mbuf_defrag_failed++;
1800                                         m_freem(*m_headp);
1801                                         *m_headp = NULL;
1802                                         return (ENOBUFS);
1803                                 }
1804                                 *m_headp = m;
1805                                 goto retry;
1806                         } else
1807                                 return (error);
1808                 case ENOMEM:
1809                         txr->no_tx_dma_setup++;
1810                         return (error);
1811                 default:
1812                         txr->no_tx_dma_setup++;
1813                         m_freem(*m_headp);
1814                         *m_headp = NULL;
1815                         return (error);
1816                 }
1817         }
1818
1819         /* Make certain there are enough descriptors */
1820         if (nsegs > txr->tx_avail - 2) {
1821                 txr->no_desc_avail++;
1822                 bus_dmamap_unload(txr->txtag, map);
1823                 return (ENOBUFS);
1824         }
1825         m_head = *m_headp;
1826
1827         /*
1828         ** Set up the appropriate offload context
1829         ** this will consume the first descriptor
1830         */
1831         error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1832         if (__predict_false(error)) {
1833                 if (error == ENOBUFS)
1834                         *m_headp = NULL;
1835                 return (error);
1836         }
1837
1838 #ifdef IXGBE_FDIR
1839         /* Do the flow director magic */
1840         if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
1841                 ++txr->atr_count;
1842                 if (txr->atr_count >= atr_sample_rate) {
1843                         ixgbe_atr(txr, m_head);
1844                         txr->atr_count = 0;
1845                 }
1846         }
1847 #endif
1848
1849         i = txr->next_avail_desc;
1850         for (j = 0; j < nsegs; j++) {
1851                 bus_size_t seglen;
1852                 bus_addr_t segaddr;
1853
1854                 txbuf = &txr->tx_buffers[i];
1855                 txd = &txr->tx_base[i];
1856                 seglen = segs[j].ds_len;
1857                 segaddr = htole64(segs[j].ds_addr);
1858
1859                 txd->read.buffer_addr = segaddr;
1860                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
1861                     cmd_type_len |seglen);
1862                 txd->read.olinfo_status = htole32(olinfo_status);
1863
1864                 if (++i == txr->num_desc)
1865                         i = 0;
1866         }
1867
1868         txd->read.cmd_type_len |=
1869             htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
1870         txr->tx_avail -= nsegs;
1871         txr->next_avail_desc = i;
1872
1873         txbuf->m_head = m_head;
1874         /*
1875         ** Here we swap the map so the last descriptor,
1876         ** which gets the completion interrupt has the
1877         ** real map, and the first descriptor gets the
1878         ** unused map from this descriptor.
1879         */
1880         txr->tx_buffers[first].map = txbuf->map;
1881         txbuf->map = map;
1882         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1883
1884         /* Set the EOP descriptor that will be marked done */
1885         txbuf = &txr->tx_buffers[first];
1886         txbuf->eop = txd;
1887
1888         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1889             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1890         /*
1891          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1892          * hardware that this frame is available to transmit.
1893          */
1894         ++txr->total_packets;
1895         IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
1896
1897         return (0);
1898
1899 }
1900
1901 static void
1902 ixgbe_set_promisc(struct adapter *adapter)
1903 {
1904         u_int32_t       reg_rctl;
1905         struct ifnet   *ifp = adapter->ifp;
1906         int             mcnt = 0;
1907
1908         reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1909         reg_rctl &= (~IXGBE_FCTRL_UPE);
1910         if (ifp->if_flags & IFF_ALLMULTI)
1911                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
1912         else {
1913                 struct  ifmultiaddr *ifma;
1914 #if __FreeBSD_version < 800000
1915                 IF_ADDR_LOCK(ifp);
1916 #else
1917                 if_maddr_rlock(ifp);
1918 #endif
1919                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1920                         if (ifma->ifma_addr->sa_family != AF_LINK)
1921                                 continue;
1922                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1923                                 break;
1924                         mcnt++;
1925                 }
1926 #if __FreeBSD_version < 800000
1927                 IF_ADDR_UNLOCK(ifp);
1928 #else
1929                 if_maddr_runlock(ifp);
1930 #endif
1931         }
1932         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
1933                 reg_rctl &= (~IXGBE_FCTRL_MPE);
1934         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1935
1936         if (ifp->if_flags & IFF_PROMISC) {
1937                 reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1938                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1939         } else if (ifp->if_flags & IFF_ALLMULTI) {
1940                 reg_rctl |= IXGBE_FCTRL_MPE;
1941                 reg_rctl &= ~IXGBE_FCTRL_UPE;
1942                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1943         }
1944         return;
1945 }
1946
1947
1948 /*********************************************************************
1949  *  Multicast Update
1950  *
1951  *  This routine is called whenever multicast address list is updated.
1952  *
1953  **********************************************************************/
1954 #define IXGBE_RAR_ENTRIES 16
1955
1956 static void
1957 ixgbe_set_multi(struct adapter *adapter)
1958 {
1959         u32     fctrl;
1960         u8      *mta;
1961         u8      *update_ptr;
1962         struct  ifmultiaddr *ifma;
1963         int     mcnt = 0;
1964         struct ifnet   *ifp = adapter->ifp;
1965
1966         IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
1967
1968         mta = adapter->mta;
1969         bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
1970             MAX_NUM_MULTICAST_ADDRESSES);
1971
1972 #if __FreeBSD_version < 800000
1973         IF_ADDR_LOCK(ifp);
1974 #else
1975         if_maddr_rlock(ifp);
1976 #endif
1977         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1978                 if (ifma->ifma_addr->sa_family != AF_LINK)
1979                         continue;
1980                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1981                         break;
1982                 bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr),
1983                     &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1984                     IXGBE_ETH_LENGTH_OF_ADDRESS);
1985                 mcnt++;
1986         }
1987 #if __FreeBSD_version < 800000
1988         IF_ADDR_UNLOCK(ifp);
1989 #else
1990         if_maddr_runlock(ifp);
1991 #endif
1992
1993         fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1994         fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1995         if (ifp->if_flags & IFF_PROMISC)
1996                 fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1997         else if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES ||
1998             ifp->if_flags & IFF_ALLMULTI) {
1999                 fctrl |= IXGBE_FCTRL_MPE;
2000                 fctrl &= ~IXGBE_FCTRL_UPE;
2001         } else
2002                 fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
2003         
2004         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
2005
2006         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
2007                 update_ptr = mta;
2008                 ixgbe_update_mc_addr_list(&adapter->hw,
2009                     update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
2010         }
2011
2012         return;
2013 }
2014
2015 /*
2016  * This is an iterator function now needed by the multicast
2017  * shared code. It simply feeds the shared code routine the
2018  * addresses in the array of ixgbe_set_multi() one by one.
2019  */
2020 static u8 *
2021 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
2022 {
2023         u8 *addr = *update_ptr;
2024         u8 *newptr;
2025         *vmdq = 0;
2026
2027         newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
2028         *update_ptr = newptr;
2029         return addr;
2030 }
2031
2032
2033 /*********************************************************************
2034  *  Timer routine
2035  *
2036  *  This routine checks for link status,updates statistics,
2037  *  and runs the watchdog check.
2038  *
2039  **********************************************************************/
2040
2041 static void
2042 ixgbe_local_timer(void *arg)
2043 {
2044         struct adapter  *adapter = arg;
2045         device_t        dev = adapter->dev;
2046         struct ix_queue *que = adapter->queues;
2047         struct tx_ring  *txr = adapter->tx_rings;
2048         int             hung = 0, paused = 0;
2049
2050         mtx_assert(&adapter->core_mtx, MA_OWNED);
2051
2052         /* Check for pluggable optics */
2053         if (adapter->sfp_probe)
2054                 if (!ixgbe_sfp_probe(adapter))
2055                         goto out; /* Nothing to do */
2056
2057         ixgbe_update_link_status(adapter);
2058         ixgbe_update_stats_counters(adapter);
2059
2060         /*
2061          * If the interface has been paused
2062          * then don't do the watchdog check
2063          */
2064         if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
2065                 paused = 1;
2066
2067         /*
2068         ** Check the TX queues status
2069         **      - watchdog only if all queues show hung
2070         */          
2071         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2072                 if ((txr->queue_status == IXGBE_QUEUE_HUNG) &&
2073                     (paused == 0))
2074                         ++hung;
2075                 else if (txr->queue_status == IXGBE_QUEUE_WORKING)
2076                         taskqueue_enqueue(que->tq, &txr->txq_task);
2077         }
2078         /* Only truely watchdog if all queues show hung */
2079         if (hung == adapter->num_queues)
2080                 goto watchdog;
2081
2082 out:
2083         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
2084         return;
2085
2086 watchdog:
2087         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2088         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2089             IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
2090             IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
2091         device_printf(dev,"TX(%d) desc avail = %d,"
2092             "Next TX to Clean = %d\n",
2093             txr->me, txr->tx_avail, txr->next_to_clean);
2094         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2095         adapter->watchdog_events++;
2096         ixgbe_init_locked(adapter);
2097 }
2098
2099 /*
2100 ** Note: this routine updates the OS on the link state
2101 **      the real check of the hardware only happens with
2102 **      a link interrupt.
2103 */
2104 static void
2105 ixgbe_update_link_status(struct adapter *adapter)
2106 {
2107         struct ifnet    *ifp = adapter->ifp;
2108         device_t dev = adapter->dev;
2109
2110
2111         if (adapter->link_up){ 
2112                 if (adapter->link_active == FALSE) {
2113                         if (bootverbose)
2114                                 device_printf(dev,"Link is up %d Gbps %s \n",
2115                                     ((adapter->link_speed == 128)? 10:1),
2116                                     "Full Duplex");
2117                         adapter->link_active = TRUE;
2118                         /* Update any Flow Control changes */
2119                         ixgbe_fc_enable(&adapter->hw);
2120                         if_link_state_change(ifp, LINK_STATE_UP);
2121                 }
2122         } else { /* Link down */
2123                 if (adapter->link_active == TRUE) {
2124                         if (bootverbose)
2125                                 device_printf(dev,"Link is Down\n");
2126                         if_link_state_change(ifp, LINK_STATE_DOWN);
2127                         adapter->link_active = FALSE;
2128                 }
2129         }
2130
2131         return;
2132 }
2133
2134
2135 /*********************************************************************
2136  *
2137  *  This routine disables all traffic on the adapter by issuing a
2138  *  global reset on the MAC and deallocates TX/RX buffers.
2139  *
2140  **********************************************************************/
2141
2142 static void
2143 ixgbe_stop(void *arg)
2144 {
2145         struct ifnet   *ifp;
2146         struct adapter *adapter = arg;
2147         struct ixgbe_hw *hw = &adapter->hw;
2148         ifp = adapter->ifp;
2149
2150         mtx_assert(&adapter->core_mtx, MA_OWNED);
2151
2152         INIT_DEBUGOUT("ixgbe_stop: begin\n");
2153         ixgbe_disable_intr(adapter);
2154         callout_stop(&adapter->timer);
2155
2156         /* Let the stack know...*/
2157         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2158
2159         ixgbe_reset_hw(hw);
2160         hw->adapter_stopped = FALSE;
2161         ixgbe_stop_adapter(hw);
2162         if (hw->mac.type == ixgbe_mac_82599EB)
2163                 ixgbe_stop_mac_link_on_d3_82599(hw);
2164         /* Turn off the laser - noop with no optics */
2165         ixgbe_disable_tx_laser(hw);
2166
2167         /* Update the stack */
2168         adapter->link_up = FALSE;
2169         ixgbe_update_link_status(adapter);
2170
2171         /* reprogram the RAR[0] in case user changed it. */
2172         ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
2173
2174         return;
2175 }
2176
2177
2178 /*********************************************************************
2179  *
2180  *  Determine hardware revision.
2181  *
2182  **********************************************************************/
2183 static void
2184 ixgbe_identify_hardware(struct adapter *adapter)
2185 {
2186         device_t        dev = adapter->dev;
2187         struct ixgbe_hw *hw = &adapter->hw;
2188
2189         /* Save off the information about this board */
2190         hw->vendor_id = pci_get_vendor(dev);
2191         hw->device_id = pci_get_device(dev);
2192         hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
2193         hw->subsystem_vendor_id =
2194             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2195         hw->subsystem_device_id =
2196             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2197
2198         /* We need this here to set the num_segs below */
2199         ixgbe_set_mac_type(hw);
2200
2201         /* Pick up the 82599 and VF settings */
2202         if (hw->mac.type != ixgbe_mac_82598EB) {
2203                 hw->phy.smart_speed = ixgbe_smart_speed;
2204                 adapter->num_segs = IXGBE_82599_SCATTER;
2205         } else
2206                 adapter->num_segs = IXGBE_82598_SCATTER;
2207
2208         return;
2209 }
2210
2211 /*********************************************************************
2212  *
2213  *  Determine optic type
2214  *
2215  **********************************************************************/
2216 static void
2217 ixgbe_setup_optics(struct adapter *adapter)
2218 {
2219         struct ixgbe_hw *hw = &adapter->hw;
2220         int             layer;
2221
2222         layer = ixgbe_get_supported_physical_layer(hw);
2223
2224         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
2225                 adapter->optics = IFM_10G_T;
2226                 return;
2227         }
2228
2229         if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
2230                 adapter->optics = IFM_1000_T;
2231                 return;
2232         }
2233
2234         if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
2235                 adapter->optics = IFM_1000_SX;
2236                 return;
2237         }
2238
2239         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
2240             IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
2241                 adapter->optics = IFM_10G_LR;
2242                 return;
2243         }
2244
2245         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
2246                 adapter->optics = IFM_10G_SR;
2247                 return;
2248         }
2249
2250         if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
2251                 adapter->optics = IFM_10G_TWINAX;
2252                 return;
2253         }
2254
2255         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
2256             IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
2257                 adapter->optics = IFM_10G_CX4;
2258                 return;
2259         }
2260
2261         /* If we get here just set the default */
2262         adapter->optics = IFM_ETHER | IFM_AUTO;
2263         return;
2264 }
2265
2266 /*********************************************************************
2267  *
2268  *  Setup the Legacy or MSI Interrupt handler
2269  *
2270  **********************************************************************/
2271 static int
2272 ixgbe_allocate_legacy(struct adapter *adapter)
2273 {
2274         device_t        dev = adapter->dev;
2275         struct          ix_queue *que = adapter->queues;
2276 #ifndef IXGBE_LEGACY_TX
2277         struct tx_ring          *txr = adapter->tx_rings;
2278 #endif
2279         int             error, rid = 0;
2280
2281         /* MSI RID at 1 */
2282         if (adapter->msix == 1)
2283                 rid = 1;
2284
2285         /* We allocate a single interrupt resource */
2286         adapter->res = bus_alloc_resource_any(dev,
2287             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2288         if (adapter->res == NULL) {
2289                 device_printf(dev, "Unable to allocate bus resource: "
2290                     "interrupt\n");
2291                 return (ENXIO);
2292         }
2293
2294         /*
2295          * Try allocating a fast interrupt and the associated deferred
2296          * processing contexts.
2297          */
2298 #ifndef IXGBE_LEGACY_TX
2299         TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr);
2300 #endif
2301         TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2302         que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,
2303             taskqueue_thread_enqueue, &que->tq);
2304         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s ixq",
2305             device_get_nameunit(adapter->dev));
2306
2307         /* Tasklets for Link, SFP and Multispeed Fiber */
2308         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2309         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2310         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2311 #ifdef IXGBE_FDIR
2312         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2313 #endif
2314         adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT,
2315             taskqueue_thread_enqueue, &adapter->tq);
2316         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2317             device_get_nameunit(adapter->dev));
2318
2319         if ((error = bus_setup_intr(dev, adapter->res,
2320             INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_legacy_irq,
2321             que, &adapter->tag)) != 0) {
2322                 device_printf(dev, "Failed to register fast interrupt "
2323                     "handler: %d\n", error);
2324                 taskqueue_free(que->tq);
2325                 taskqueue_free(adapter->tq);
2326                 que->tq = NULL;
2327                 adapter->tq = NULL;
2328                 return (error);
2329         }
2330         /* For simplicity in the handlers */
2331         adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
2332
2333         return (0);
2334 }
2335
2336
2337 /*********************************************************************
2338  *
2339  *  Setup MSIX Interrupt resources and handlers 
2340  *
2341  **********************************************************************/
2342 static int
2343 ixgbe_allocate_msix(struct adapter *adapter)
2344 {
2345         device_t        dev = adapter->dev;
2346         struct          ix_queue *que = adapter->queues;
2347         struct          tx_ring *txr = adapter->tx_rings;
2348         int             error, rid, vector = 0;
2349
2350         for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
2351                 rid = vector + 1;
2352                 que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2353                     RF_SHAREABLE | RF_ACTIVE);
2354                 if (que->res == NULL) {
2355                         device_printf(dev,"Unable to allocate"
2356                             " bus resource: que interrupt [%d]\n", vector);
2357                         return (ENXIO);
2358                 }
2359                 /* Set the handler function */
2360                 error = bus_setup_intr(dev, que->res,
2361                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2362                     ixgbe_msix_que, que, &que->tag);
2363                 if (error) {
2364                         que->res = NULL;
2365                         device_printf(dev, "Failed to register QUE handler");
2366                         return (error);
2367                 }
2368 #if __FreeBSD_version >= 800504
2369                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2370 #endif
2371                 que->msix = vector;
2372                 adapter->que_mask |= (u64)(1 << que->msix);
2373                 /*
2374                 ** Bind the msix vector, and thus the
2375                 ** ring to the corresponding cpu.
2376                 */
2377                 if (adapter->num_queues > 1)
2378                         bus_bind_intr(dev, que->res, i);
2379
2380 #ifndef IXGBE_LEGACY_TX
2381                 TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr);
2382 #endif
2383                 TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2384                 que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,
2385                     taskqueue_thread_enqueue, &que->tq);
2386                 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2387                     device_get_nameunit(adapter->dev));
2388         }
2389
2390         /* and Link */
2391         rid = vector + 1;
2392         adapter->res = bus_alloc_resource_any(dev,
2393             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2394         if (!adapter->res) {
2395                 device_printf(dev,"Unable to allocate"
2396             " bus resource: Link interrupt [%d]\n", rid);
2397                 return (ENXIO);
2398         }
2399         /* Set the link handler function */
2400         error = bus_setup_intr(dev, adapter->res,
2401             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2402             ixgbe_msix_link, adapter, &adapter->tag);
2403         if (error) {
2404                 adapter->res = NULL;
2405                 device_printf(dev, "Failed to register LINK handler");
2406                 return (error);
2407         }
2408 #if __FreeBSD_version >= 800504
2409         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2410 #endif
2411         adapter->linkvec = vector;
2412         /* Tasklets for Link, SFP and Multispeed Fiber */
2413         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2414         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2415         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2416 #ifdef IXGBE_FDIR
2417         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2418 #endif
2419         adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT,
2420             taskqueue_thread_enqueue, &adapter->tq);
2421         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2422             device_get_nameunit(adapter->dev));
2423
2424         return (0);
2425 }
2426
2427 /*
2428  * Setup Either MSI/X or MSI
2429  */
2430 static int
2431 ixgbe_setup_msix(struct adapter *adapter)
2432 {
2433         device_t dev = adapter->dev;
2434         int rid, want, queues, msgs;
2435
2436         /* Override by tuneable */
2437         if (ixgbe_enable_msix == 0)
2438                 goto msi;
2439
2440         /* First try MSI/X */
2441         msgs = pci_msix_count(dev); 
2442         if (msgs == 0)
2443                 goto msi;
2444         rid = PCIR_BAR(MSIX_82598_BAR);
2445         adapter->msix_mem = bus_alloc_resource_any(dev,
2446             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2447         if (adapter->msix_mem == NULL) {
2448                 rid += 4;       /* 82599 maps in higher BAR */
2449                 adapter->msix_mem = bus_alloc_resource_any(dev,
2450                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2451         }
2452         if (adapter->msix_mem == NULL) {
2453                 /* May not be enabled */
2454                 device_printf(adapter->dev,
2455                     "Unable to map MSIX table \n");
2456                 goto msi;
2457         }
2458
2459         /* Figure out a reasonable auto config value */
2460         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2461
2462         if (ixgbe_num_queues != 0)
2463                 queues = ixgbe_num_queues;
2464         /* Set max queues to 8 when autoconfiguring */
2465         else if ((ixgbe_num_queues == 0) && (queues > 8))
2466                 queues = 8;
2467
2468         /* reflect correct sysctl value */
2469         ixgbe_num_queues = queues;
2470
2471         /*
2472         ** Want one vector (RX/TX pair) per queue
2473         ** plus an additional for Link.
2474         */
2475         want = queues + 1;
2476         if (msgs >= want)
2477                 msgs = want;
2478         else {
2479                 device_printf(adapter->dev,
2480                     "MSIX Configuration Problem, "
2481                     "%d vectors but %d queues wanted!\n",
2482                     msgs, want);
2483                 goto msi;
2484         }
2485         if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2486                 device_printf(adapter->dev,
2487                     "Using MSIX interrupts with %d vectors\n", msgs);
2488                 adapter->num_queues = queues;
2489                 return (msgs);
2490         }
2491         /*
2492         ** If MSIX alloc failed or provided us with
2493         ** less than needed, free and fall through to MSI
2494         */
2495         pci_release_msi(dev);
2496
2497 msi:
2498         if (adapter->msix_mem != NULL) {
2499                 bus_release_resource(dev, SYS_RES_MEMORY,
2500                     rid, adapter->msix_mem);
2501                 adapter->msix_mem = NULL;
2502         }
2503         msgs = 1;
2504         if (pci_alloc_msi(dev, &msgs) == 0) {
2505                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2506                 return (msgs);
2507         }
2508         device_printf(adapter->dev,"Using a Legacy interrupt\n");
2509         return (0);
2510 }
2511
2512
2513 static int
2514 ixgbe_allocate_pci_resources(struct adapter *adapter)
2515 {
2516         int             rid;
2517         device_t        dev = adapter->dev;
2518
2519         rid = PCIR_BAR(0);
2520         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2521             &rid, RF_ACTIVE);
2522
2523         if (!(adapter->pci_mem)) {
2524                 device_printf(dev,"Unable to allocate bus resource: memory\n");
2525                 return (ENXIO);
2526         }
2527
2528         adapter->osdep.mem_bus_space_tag =
2529                 rman_get_bustag(adapter->pci_mem);
2530         adapter->osdep.mem_bus_space_handle =
2531                 rman_get_bushandle(adapter->pci_mem);
2532         adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle;
2533
2534         /* Legacy defaults */
2535         adapter->num_queues = 1;
2536         adapter->hw.back = &adapter->osdep;
2537
2538         /*
2539         ** Now setup MSI or MSI/X, should
2540         ** return us the number of supported
2541         ** vectors. (Will be 1 for MSI)
2542         */
2543         adapter->msix = ixgbe_setup_msix(adapter);
2544         return (0);
2545 }
2546
2547 static void
2548 ixgbe_free_pci_resources(struct adapter * adapter)
2549 {
2550         struct          ix_queue *que = adapter->queues;
2551         device_t        dev = adapter->dev;
2552         int             rid, memrid;
2553
2554         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
2555                 memrid = PCIR_BAR(MSIX_82598_BAR);
2556         else
2557                 memrid = PCIR_BAR(MSIX_82599_BAR);
2558
2559         /*
2560         ** There is a slight possibility of a failure mode
2561         ** in attach that will result in entering this function
2562         ** before interrupt resources have been initialized, and
2563         ** in that case we do not want to execute the loops below
2564         ** We can detect this reliably by the state of the adapter
2565         ** res pointer.
2566         */
2567         if (adapter->res == NULL)
2568                 goto mem;
2569
2570         /*
2571         **  Release all msix queue resources:
2572         */
2573         for (int i = 0; i < adapter->num_queues; i++, que++) {
2574                 rid = que->msix + 1;
2575                 if (que->tag != NULL) {
2576                         bus_teardown_intr(dev, que->res, que->tag);
2577                         que->tag = NULL;
2578                 }
2579                 if (que->res != NULL)
2580                         bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
2581         }
2582
2583
2584         /* Clean the Legacy or Link interrupt last */
2585         if (adapter->linkvec) /* we are doing MSIX */
2586                 rid = adapter->linkvec + 1;
2587         else
2588                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2589
2590         if (adapter->tag != NULL) {
2591                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2592                 adapter->tag = NULL;
2593         }
2594         if (adapter->res != NULL)
2595                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2596
2597 mem:
2598         if (adapter->msix)
2599                 pci_release_msi(dev);
2600
2601         if (adapter->msix_mem != NULL)
2602                 bus_release_resource(dev, SYS_RES_MEMORY,
2603                     memrid, adapter->msix_mem);
2604
2605         if (adapter->pci_mem != NULL)
2606                 bus_release_resource(dev, SYS_RES_MEMORY,
2607                     PCIR_BAR(0), adapter->pci_mem);
2608
2609         return;
2610 }
2611
2612 /*********************************************************************
2613  *
2614  *  Setup networking device structure and register an interface.
2615  *
2616  **********************************************************************/
2617 static int
2618 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
2619 {
2620         struct ixgbe_hw *hw = &adapter->hw;
2621         struct ifnet   *ifp;
2622
2623         INIT_DEBUGOUT("ixgbe_setup_interface: begin");
2624
2625         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2626         if (ifp == NULL) {
2627                 device_printf(dev, "can not allocate ifnet structure\n");
2628                 return (-1);
2629         }
2630         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2631 #if __FreeBSD_version < 1000025
2632         ifp->if_baudrate = 1000000000;
2633 #else
2634         if_initbaudrate(ifp, IF_Gbps(10));
2635 #endif
2636         ifp->if_init = ixgbe_init;
2637         ifp->if_softc = adapter;
2638         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2639         ifp->if_ioctl = ixgbe_ioctl;
2640 #ifndef IXGBE_LEGACY_TX
2641         ifp->if_transmit = ixgbe_mq_start;
2642         ifp->if_qflush = ixgbe_qflush;
2643 #else
2644         ifp->if_start = ixgbe_start;
2645         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
2646         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2;
2647         IFQ_SET_READY(&ifp->if_snd);
2648 #endif
2649
2650         ether_ifattach(ifp, adapter->hw.mac.addr);
2651
2652         adapter->max_frame_size =
2653             ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
2654
2655         /*
2656          * Tell the upper layer(s) we support long frames.
2657          */
2658         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2659
2660         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWCSUM;
2661         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2662         ifp->if_capabilities |= IFCAP_LRO;
2663         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2664                              |  IFCAP_VLAN_HWTSO
2665                              |  IFCAP_VLAN_MTU;
2666         ifp->if_capenable = ifp->if_capabilities;
2667
2668         /*
2669         ** Don't turn this on by default, if vlans are
2670         ** created on another pseudo device (eg. lagg)
2671         ** then vlan events are not passed thru, breaking
2672         ** operation, but with HW FILTER off it works. If
2673         ** using vlans directly on the ixgbe driver you can
2674         ** enable this and get full hardware tag filtering.
2675         */
2676         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2677
2678         /*
2679          * Specify the media types supported by this adapter and register
2680          * callbacks to update media and link information
2681          */
2682         ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
2683                      ixgbe_media_status);
2684         ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
2685         ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
2686         if (hw->device_id == IXGBE_DEV_ID_82598AT) {
2687                 ifmedia_add(&adapter->media,
2688                     IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2689                 ifmedia_add(&adapter->media,
2690                     IFM_ETHER | IFM_1000_T, 0, NULL);
2691         }
2692         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2693         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2694
2695         return (0);
2696 }
2697
2698 static void
2699 ixgbe_config_link(struct adapter *adapter)
2700 {
2701         struct ixgbe_hw *hw = &adapter->hw;
2702         u32     autoneg, err = 0;
2703         bool    sfp, negotiate;
2704
2705         sfp = ixgbe_is_sfp(hw);
2706
2707         if (sfp) { 
2708                 if (hw->phy.multispeed_fiber) {
2709                         hw->mac.ops.setup_sfp(hw);
2710                         ixgbe_enable_tx_laser(hw);
2711                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
2712                 } else
2713                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
2714         } else {
2715                 if (hw->mac.ops.check_link)
2716                         err = ixgbe_check_link(hw, &adapter->link_speed,
2717                             &adapter->link_up, FALSE);
2718                 if (err)
2719                         goto out;
2720                 autoneg = hw->phy.autoneg_advertised;
2721                 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
2722                         err  = hw->mac.ops.get_link_capabilities(hw,
2723                             &autoneg, &negotiate);
2724                 if (err)
2725                         goto out;
2726                 if (hw->mac.ops.setup_link)
2727                         err = hw->mac.ops.setup_link(hw,
2728                             autoneg, adapter->link_up);
2729         }
2730 out:
2731         return;
2732 }
2733
2734 /********************************************************************
2735  * Manage DMA'able memory.
2736  *******************************************************************/
2737 static void
2738 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2739 {
2740         if (error)
2741                 return;
2742         *(bus_addr_t *) arg = segs->ds_addr;
2743         return;
2744 }
2745
2746 static int
2747 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2748                 struct ixgbe_dma_alloc *dma, int mapflags)
2749 {
2750         device_t dev = adapter->dev;
2751         int             r;
2752
2753         r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),   /* parent */
2754                                DBA_ALIGN, 0,    /* alignment, bounds */
2755                                BUS_SPACE_MAXADDR,       /* lowaddr */
2756                                BUS_SPACE_MAXADDR,       /* highaddr */
2757                                NULL, NULL,      /* filter, filterarg */
2758                                size,    /* maxsize */
2759                                1,       /* nsegments */
2760                                size,    /* maxsegsize */
2761                                BUS_DMA_ALLOCNOW,        /* flags */
2762                                NULL,    /* lockfunc */
2763                                NULL,    /* lockfuncarg */
2764                                &dma->dma_tag);
2765         if (r != 0) {
2766                 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2767                        "error %u\n", r);
2768                 goto fail_0;
2769         }
2770         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2771                              BUS_DMA_NOWAIT, &dma->dma_map);
2772         if (r != 0) {
2773                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2774                        "error %u\n", r);
2775                 goto fail_1;
2776         }
2777         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2778                             size,
2779                             ixgbe_dmamap_cb,
2780                             &dma->dma_paddr,
2781                             mapflags | BUS_DMA_NOWAIT);
2782         if (r != 0) {
2783                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2784                        "error %u\n", r);
2785                 goto fail_2;
2786         }
2787         dma->dma_size = size;
2788         return (0);
2789 fail_2:
2790         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2791 fail_1:
2792         bus_dma_tag_destroy(dma->dma_tag);
2793 fail_0:
2794         dma->dma_map = NULL;
2795         dma->dma_tag = NULL;
2796         return (r);
2797 }
2798
2799 static void
2800 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2801 {
2802         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2803             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2804         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2805         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2806         bus_dma_tag_destroy(dma->dma_tag);
2807 }
2808
2809
2810 /*********************************************************************
2811  *
2812  *  Allocate memory for the transmit and receive rings, and then
2813  *  the descriptors associated with each, called only once at attach.
2814  *
2815  **********************************************************************/
2816 static int
2817 ixgbe_allocate_queues(struct adapter *adapter)
2818 {
2819         device_t        dev = adapter->dev;
2820         struct ix_queue *que;
2821         struct tx_ring  *txr;
2822         struct rx_ring  *rxr;
2823         int rsize, tsize, error = IXGBE_SUCCESS;
2824         int txconf = 0, rxconf = 0;
2825
2826         /* First allocate the top level queue structs */
2827         if (!(adapter->queues =
2828             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2829             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2830                 device_printf(dev, "Unable to allocate queue memory\n");
2831                 error = ENOMEM;
2832                 goto fail;
2833         }
2834
2835         /* First allocate the TX ring struct memory */
2836         if (!(adapter->tx_rings =
2837             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2838             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2839                 device_printf(dev, "Unable to allocate TX ring memory\n");
2840                 error = ENOMEM;
2841                 goto tx_fail;
2842         }
2843
2844         /* Next allocate the RX */
2845         if (!(adapter->rx_rings =
2846             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2847             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2848                 device_printf(dev, "Unable to allocate RX ring memory\n");
2849                 error = ENOMEM;
2850                 goto rx_fail;
2851         }
2852
2853         /* For the ring itself */
2854         tsize = roundup2(adapter->num_tx_desc *
2855             sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2856
2857         /*
2858          * Now set up the TX queues, txconf is needed to handle the
2859          * possibility that things fail midcourse and we need to
2860          * undo memory gracefully
2861          */ 
2862         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2863                 /* Set up some basics */
2864                 txr = &adapter->tx_rings[i];
2865                 txr->adapter = adapter;
2866                 txr->me = i;
2867                 txr->num_desc = adapter->num_tx_desc;
2868
2869                 /* Initialize the TX side lock */
2870                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2871                     device_get_nameunit(dev), txr->me);
2872                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2873
2874                 if (ixgbe_dma_malloc(adapter, tsize,
2875                         &txr->txdma, BUS_DMA_NOWAIT)) {
2876                         device_printf(dev,
2877                             "Unable to allocate TX Descriptor memory\n");
2878                         error = ENOMEM;
2879                         goto err_tx_desc;
2880                 }
2881                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2882                 bzero((void *)txr->tx_base, tsize);
2883
2884                 /* Now allocate transmit buffers for the ring */
2885                 if (ixgbe_allocate_transmit_buffers(txr)) {
2886                         device_printf(dev,
2887                             "Critical Failure setting up transmit buffers\n");
2888                         error = ENOMEM;
2889                         goto err_tx_desc;
2890                 }
2891 #ifndef IXGBE_LEGACY_TX
2892                 /* Allocate a buf ring */
2893                 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2894                     M_WAITOK, &txr->tx_mtx);
2895                 if (txr->br == NULL) {
2896                         device_printf(dev,
2897                             "Critical Failure setting up buf ring\n");
2898                         error = ENOMEM;
2899                         goto err_tx_desc;
2900                 }
2901 #endif
2902         }
2903
2904         /*
2905          * Next the RX queues...
2906          */ 
2907         rsize = roundup2(adapter->num_rx_desc *
2908             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2909         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2910                 rxr = &adapter->rx_rings[i];
2911                 /* Set up some basics */
2912                 rxr->adapter = adapter;
2913                 rxr->me = i;
2914                 rxr->num_desc = adapter->num_rx_desc;
2915
2916                 /* Initialize the RX side lock */
2917                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2918                     device_get_nameunit(dev), rxr->me);
2919                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2920
2921                 if (ixgbe_dma_malloc(adapter, rsize,
2922                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2923                         device_printf(dev,
2924                             "Unable to allocate RxDescriptor memory\n");
2925                         error = ENOMEM;
2926                         goto err_rx_desc;
2927                 }
2928                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2929                 bzero((void *)rxr->rx_base, rsize);
2930
2931                 /* Allocate receive buffers for the ring*/
2932                 if (ixgbe_allocate_receive_buffers(rxr)) {
2933                         device_printf(dev,
2934                             "Critical Failure setting up receive buffers\n");
2935                         error = ENOMEM;
2936                         goto err_rx_desc;
2937                 }
2938         }
2939
2940         /*
2941         ** Finally set up the queue holding structs
2942         */
2943         for (int i = 0; i < adapter->num_queues; i++) {
2944                 que = &adapter->queues[i];
2945                 que->adapter = adapter;
2946                 que->txr = &adapter->tx_rings[i];
2947                 que->rxr = &adapter->rx_rings[i];
2948         }
2949
2950         return (0);
2951
2952 err_rx_desc:
2953         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2954                 ixgbe_dma_free(adapter, &rxr->rxdma);
2955 err_tx_desc:
2956         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2957                 ixgbe_dma_free(adapter, &txr->txdma);
2958         free(adapter->rx_rings, M_DEVBUF);
2959 rx_fail:
2960         free(adapter->tx_rings, M_DEVBUF);
2961 tx_fail:
2962         free(adapter->queues, M_DEVBUF);
2963 fail:
2964         return (error);
2965 }
2966
2967 /*********************************************************************
2968  *
2969  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2970  *  the information needed to transmit a packet on the wire. This is
2971  *  called only once at attach, setup is done every reset.
2972  *
2973  **********************************************************************/
2974 static int
2975 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
2976 {
2977         struct adapter *adapter = txr->adapter;
2978         device_t dev = adapter->dev;
2979         struct ixgbe_tx_buf *txbuf;
2980         int error, i;
2981
2982         /*
2983          * Setup DMA descriptor areas.
2984          */
2985         if ((error = bus_dma_tag_create(
2986                                bus_get_dma_tag(adapter->dev),   /* parent */
2987                                1, 0,            /* alignment, bounds */
2988                                BUS_SPACE_MAXADDR,       /* lowaddr */
2989                                BUS_SPACE_MAXADDR,       /* highaddr */
2990                                NULL, NULL,              /* filter, filterarg */
2991                                IXGBE_TSO_SIZE,          /* maxsize */
2992                                adapter->num_segs,       /* nsegments */
2993                                PAGE_SIZE,               /* maxsegsize */
2994                                0,                       /* flags */
2995                                NULL,                    /* lockfunc */
2996                                NULL,                    /* lockfuncarg */
2997                                &txr->txtag))) {
2998                 device_printf(dev,"Unable to allocate TX DMA tag\n");
2999                 goto fail;
3000         }
3001
3002         if (!(txr->tx_buffers =
3003             (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
3004             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3005                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3006                 error = ENOMEM;
3007                 goto fail;
3008         }
3009
3010         /* Create the descriptor buffer dma maps */
3011         txbuf = txr->tx_buffers;
3012         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3013                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3014                 if (error != 0) {
3015                         device_printf(dev, "Unable to create TX DMA map\n");
3016                         goto fail;
3017                 }
3018         }
3019
3020         return 0;
3021 fail:
3022         /* We free all, it handles case where we are in the middle */
3023         ixgbe_free_transmit_structures(adapter);
3024         return (error);
3025 }
3026
3027 /*********************************************************************
3028  *
3029  *  Initialize a transmit ring.
3030  *
3031  **********************************************************************/
3032 static void
3033 ixgbe_setup_transmit_ring(struct tx_ring *txr)
3034 {
3035         struct adapter *adapter = txr->adapter;
3036         struct ixgbe_tx_buf *txbuf;
3037         int i;
3038 #ifdef DEV_NETMAP
3039         struct netmap_adapter *na = NA(adapter->ifp);
3040         struct netmap_slot *slot;
3041 #endif /* DEV_NETMAP */
3042
3043         /* Clear the old ring contents */
3044         IXGBE_TX_LOCK(txr);
3045 #ifdef DEV_NETMAP
3046         /*
3047          * (under lock): if in netmap mode, do some consistency
3048          * checks and set slot to entry 0 of the netmap ring.
3049          */
3050         slot = netmap_reset(na, NR_TX, txr->me, 0);
3051 #endif /* DEV_NETMAP */
3052         bzero((void *)txr->tx_base,
3053               (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
3054         /* Reset indices */
3055         txr->next_avail_desc = 0;
3056         txr->next_to_clean = 0;
3057
3058         /* Free any existing tx buffers. */
3059         txbuf = txr->tx_buffers;
3060         for (i = 0; i < txr->num_desc; i++, txbuf++) {
3061                 if (txbuf->m_head != NULL) {
3062                         bus_dmamap_sync(txr->txtag, txbuf->map,
3063                             BUS_DMASYNC_POSTWRITE);
3064                         bus_dmamap_unload(txr->txtag, txbuf->map);
3065                         m_freem(txbuf->m_head);
3066                         txbuf->m_head = NULL;
3067                 }
3068 #ifdef DEV_NETMAP
3069                 /*
3070                  * In netmap mode, set the map for the packet buffer.
3071                  * NOTE: Some drivers (not this one) also need to set
3072                  * the physical buffer address in the NIC ring.
3073                  * Slots in the netmap ring (indexed by "si") are
3074                  * kring->nkr_hwofs positions "ahead" wrt the
3075                  * corresponding slot in the NIC ring. In some drivers
3076                  * (not here) nkr_hwofs can be negative. Function
3077                  * netmap_idx_n2k() handles wraparounds properly.
3078                  */
3079                 if (slot) {
3080                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3081                         netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3082                 }
3083 #endif /* DEV_NETMAP */
3084                 /* Clear the EOP descriptor pointer */
3085                 txbuf->eop = NULL;
3086         }
3087
3088 #ifdef IXGBE_FDIR
3089         /* Set the rate at which we sample packets */
3090         if (adapter->hw.mac.type != ixgbe_mac_82598EB)
3091                 txr->atr_sample = atr_sample_rate;
3092 #endif
3093
3094         /* Set number of descriptors available */
3095         txr->tx_avail = adapter->num_tx_desc;
3096
3097         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3098             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3099         IXGBE_TX_UNLOCK(txr);
3100 }
3101
3102 /*********************************************************************
3103  *
3104  *  Initialize all transmit rings.
3105  *
3106  **********************************************************************/
3107 static int
3108 ixgbe_setup_transmit_structures(struct adapter *adapter)
3109 {
3110         struct tx_ring *txr = adapter->tx_rings;
3111
3112         for (int i = 0; i < adapter->num_queues; i++, txr++)
3113                 ixgbe_setup_transmit_ring(txr);
3114
3115         return (0);
3116 }
3117
3118 /*********************************************************************
3119  *
3120  *  Enable transmit unit.
3121  *
3122  **********************************************************************/
3123 static void
3124 ixgbe_initialize_transmit_units(struct adapter *adapter)
3125 {
3126         struct tx_ring  *txr = adapter->tx_rings;
3127         struct ixgbe_hw *hw = &adapter->hw;
3128
3129         /* Setup the Base and Length of the Tx Descriptor Ring */
3130
3131         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3132                 u64     tdba = txr->txdma.dma_paddr;
3133                 u32     txctrl;
3134
3135                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
3136                        (tdba & 0x00000000ffffffffULL));
3137                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
3138                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
3139                     adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
3140
3141                 /* Setup the HW Tx Head and Tail descriptor pointers */
3142                 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
3143                 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
3144
3145                 /* Setup Transmit Descriptor Cmd Settings */
3146                 txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
3147                 txr->queue_status = IXGBE_QUEUE_IDLE;
3148
3149                 /* Set the processing limit */
3150                 txr->process_limit = ixgbe_tx_process_limit;
3151
3152                 /* Disable Head Writeback */
3153                 switch (hw->mac.type) {
3154                 case ixgbe_mac_82598EB:
3155                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
3156                         break;
3157                 case ixgbe_mac_82599EB:
3158                 case ixgbe_mac_X540:
3159                 default:
3160                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
3161                         break;
3162                 }
3163                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3164                 switch (hw->mac.type) {
3165                 case ixgbe_mac_82598EB:
3166                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
3167                         break;
3168                 case ixgbe_mac_82599EB:
3169                 case ixgbe_mac_X540:
3170                 default:
3171                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
3172                         break;
3173                 }
3174
3175         }
3176
3177         if (hw->mac.type != ixgbe_mac_82598EB) {
3178                 u32 dmatxctl, rttdcs;
3179                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3180                 dmatxctl |= IXGBE_DMATXCTL_TE;
3181                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3182                 /* Disable arbiter to set MTQC */
3183                 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3184                 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3185                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3186                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
3187                 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3188                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3189         }
3190
3191         return;
3192 }
3193
3194 /*********************************************************************
3195  *
3196  *  Free all transmit rings.
3197  *
3198  **********************************************************************/
3199 static void
3200 ixgbe_free_transmit_structures(struct adapter *adapter)
3201 {
3202         struct tx_ring *txr = adapter->tx_rings;
3203
3204         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3205                 IXGBE_TX_LOCK(txr);
3206                 ixgbe_free_transmit_buffers(txr);
3207                 ixgbe_dma_free(adapter, &txr->txdma);
3208                 IXGBE_TX_UNLOCK(txr);
3209                 IXGBE_TX_LOCK_DESTROY(txr);
3210         }
3211         free(adapter->tx_rings, M_DEVBUF);
3212 }
3213
3214 /*********************************************************************
3215  *
3216  *  Free transmit ring related data structures.
3217  *
3218  **********************************************************************/
3219 static void
3220 ixgbe_free_transmit_buffers(struct tx_ring *txr)
3221 {
3222         struct adapter *adapter = txr->adapter;
3223         struct ixgbe_tx_buf *tx_buffer;
3224         int             i;
3225
3226         INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
3227
3228         if (txr->tx_buffers == NULL)
3229                 return;
3230
3231         tx_buffer = txr->tx_buffers;
3232         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3233                 if (tx_buffer->m_head != NULL) {
3234                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3235                             BUS_DMASYNC_POSTWRITE);
3236                         bus_dmamap_unload(txr->txtag,
3237                             tx_buffer->map);
3238                         m_freem(tx_buffer->m_head);
3239                         tx_buffer->m_head = NULL;
3240                         if (tx_buffer->map != NULL) {
3241                                 bus_dmamap_destroy(txr->txtag,
3242                                     tx_buffer->map);
3243                                 tx_buffer->map = NULL;
3244                         }
3245                 } else if (tx_buffer->map != NULL) {
3246                         bus_dmamap_unload(txr->txtag,
3247                             tx_buffer->map);
3248                         bus_dmamap_destroy(txr->txtag,
3249                             tx_buffer->map);
3250                         tx_buffer->map = NULL;
3251                 }
3252         }
3253 #ifdef IXGBE_LEGACY_TX
3254         if (txr->br != NULL)
3255                 buf_ring_free(txr->br, M_DEVBUF);
3256 #endif
3257         if (txr->tx_buffers != NULL) {
3258                 free(txr->tx_buffers, M_DEVBUF);
3259                 txr->tx_buffers = NULL;
3260         }
3261         if (txr->txtag != NULL) {
3262                 bus_dma_tag_destroy(txr->txtag);
3263                 txr->txtag = NULL;
3264         }
3265         return;
3266 }
3267
3268 /*********************************************************************
3269  *
3270  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3271  *
3272  **********************************************************************/
3273
3274 static int
3275 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3276     u32 *cmd_type_len, u32 *olinfo_status)
3277 {
3278         struct ixgbe_adv_tx_context_desc *TXD;
3279         struct ether_vlan_header *eh;
3280         struct ip *ip;
3281         struct ip6_hdr *ip6;
3282         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3283         int     ehdrlen, ip_hlen = 0;
3284         u16     etype;
3285         u8      ipproto = 0;
3286         int     offload = TRUE;
3287         int     ctxd = txr->next_avail_desc;
3288         u16     vtag = 0;
3289
3290         /* First check if TSO is to be used */
3291         if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3292                 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3293
3294         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3295                 offload = FALSE;
3296
3297         /* Indicate the whole packet as payload when not doing TSO */
3298         *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
3299
3300         /* Now ready a context descriptor */
3301         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3302
3303         /*
3304         ** In advanced descriptors the vlan tag must 
3305         ** be placed into the context descriptor. Hence
3306         ** we need to make one even if not doing offloads.
3307         */
3308         if (mp->m_flags & M_VLANTAG) {
3309                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3310                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3311         } else if (offload == FALSE) /* ... no offload to do */
3312                 return (0);
3313
3314         /*
3315          * Determine where frame payload starts.
3316          * Jump over vlan headers if already present,
3317          * helpful for QinQ too.
3318          */
3319         eh = mtod(mp, struct ether_vlan_header *);
3320         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3321                 etype = ntohs(eh->evl_proto);
3322                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3323         } else {
3324                 etype = ntohs(eh->evl_encap_proto);
3325                 ehdrlen = ETHER_HDR_LEN;
3326         }
3327
3328         /* Set the ether header length */
3329         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3330
3331         switch (etype) {
3332                 case ETHERTYPE_IP:
3333                         ip = (struct ip *)(mp->m_data + ehdrlen);
3334                         ip_hlen = ip->ip_hl << 2;
3335                         ipproto = ip->ip_p;
3336                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3337                         break;
3338                 case ETHERTYPE_IPV6:
3339                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3340                         ip_hlen = sizeof(struct ip6_hdr);
3341                         /* XXX-BZ this will go badly in case of ext hdrs. */
3342                         ipproto = ip6->ip6_nxt;
3343                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3344                         break;
3345                 default:
3346                         offload = FALSE;
3347                         break;
3348         }
3349
3350         vlan_macip_lens |= ip_hlen;
3351         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3352
3353         switch (ipproto) {
3354                 case IPPROTO_TCP:
3355                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3356                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3357                         break;
3358
3359                 case IPPROTO_UDP:
3360                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3361                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
3362                         break;
3363
3364 #if __FreeBSD_version >= 800000
3365                 case IPPROTO_SCTP:
3366                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3367                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
3368                         break;
3369 #endif
3370                 default:
3371                         offload = FALSE;
3372                         break;
3373         }
3374
3375         if (offload) /* For the TX descriptor setup */
3376                 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3377
3378         /* Now copy bits into descriptor */
3379         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3380         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3381         TXD->seqnum_seed = htole32(0);
3382         TXD->mss_l4len_idx = htole32(0);
3383
3384         /* We've consumed the first desc, adjust counters */
3385         if (++ctxd == txr->num_desc)
3386                 ctxd = 0;
3387         txr->next_avail_desc = ctxd;
3388         --txr->tx_avail;
3389
3390         return (0);
3391 }
3392
3393 /**********************************************************************
3394  *
3395  *  Setup work for hardware segmentation offload (TSO) on
3396  *  adapters using advanced tx descriptors
3397  *
3398  **********************************************************************/
3399 static int
3400 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3401     u32 *cmd_type_len, u32 *olinfo_status)
3402 {
3403         struct ixgbe_adv_tx_context_desc *TXD;
3404         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3405         u32 mss_l4len_idx = 0, paylen;
3406         u16 vtag = 0, eh_type;
3407         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3408         struct ether_vlan_header *eh;
3409 #ifdef INET6
3410         struct ip6_hdr *ip6;
3411 #endif
3412 #ifdef INET
3413         struct ip *ip;
3414 #endif
3415         struct tcphdr *th;
3416
3417
3418         /*
3419          * Determine where frame payload starts.
3420          * Jump over vlan headers if already present
3421          */
3422         eh = mtod(mp, struct ether_vlan_header *);
3423         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3424                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3425                 eh_type = eh->evl_proto;
3426         } else {
3427                 ehdrlen = ETHER_HDR_LEN;
3428                 eh_type = eh->evl_encap_proto;
3429         }
3430
3431         switch (ntohs(eh_type)) {
3432 #ifdef INET6
3433         case ETHERTYPE_IPV6:
3434                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3435                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3436                 if (ip6->ip6_nxt != IPPROTO_TCP)
3437                         return (ENXIO);
3438                 ip_hlen = sizeof(struct ip6_hdr);
3439                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3440                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3441                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3442                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3443                 break;
3444 #endif
3445 #ifdef INET
3446         case ETHERTYPE_IP:
3447                 ip = (struct ip *)(mp->m_data + ehdrlen);
3448                 if (ip->ip_p != IPPROTO_TCP)
3449                         return (ENXIO);
3450                 ip->ip_sum = 0;
3451                 ip_hlen = ip->ip_hl << 2;
3452                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3453                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3454                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3455                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3456                 /* Tell transmit desc to also do IPv4 checksum. */
3457                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3458                 break;
3459 #endif
3460         default:
3461                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3462                     __func__, ntohs(eh_type));
3463                 break;
3464         }
3465
3466         ctxd = txr->next_avail_desc;
3467         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3468
3469         tcp_hlen = th->th_off << 2;
3470
3471         /* This is used in the transmit desc in encap */
3472         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3473
3474         /* VLAN MACLEN IPLEN */
3475         if (mp->m_flags & M_VLANTAG) {
3476                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3477                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3478         }
3479
3480         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3481         vlan_macip_lens |= ip_hlen;
3482         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3483
3484         /* ADV DTYPE TUCMD */
3485         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3486         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3487         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3488
3489         /* MSS L4LEN IDX */
3490         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
3491         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
3492         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3493
3494         TXD->seqnum_seed = htole32(0);
3495
3496         if (++ctxd == txr->num_desc)
3497                 ctxd = 0;
3498
3499         txr->tx_avail--;
3500         txr->next_avail_desc = ctxd;
3501         *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
3502         *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3503         *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
3504         ++txr->tso_tx;
3505         return (0);
3506 }
3507
3508 #ifdef IXGBE_FDIR
3509 /*
3510 ** This routine parses packet headers so that Flow
3511 ** Director can make a hashed filter table entry 
3512 ** allowing traffic flows to be identified and kept
3513 ** on the same cpu.  This would be a performance
3514 ** hit, but we only do it at IXGBE_FDIR_RATE of
3515 ** packets.
3516 */
3517 static void
3518 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
3519 {
3520         struct adapter                  *adapter = txr->adapter;
3521         struct ix_queue                 *que;
3522         struct ip                       *ip;
3523         struct tcphdr                   *th;
3524         struct udphdr                   *uh;
3525         struct ether_vlan_header        *eh;
3526         union ixgbe_atr_hash_dword      input = {.dword = 0}; 
3527         union ixgbe_atr_hash_dword      common = {.dword = 0}; 
3528         int                             ehdrlen, ip_hlen;
3529         u16                             etype;
3530
3531         eh = mtod(mp, struct ether_vlan_header *);
3532         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3533                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3534                 etype = eh->evl_proto;
3535         } else {
3536                 ehdrlen = ETHER_HDR_LEN;
3537                 etype = eh->evl_encap_proto;
3538         }
3539
3540         /* Only handling IPv4 */
3541         if (etype != htons(ETHERTYPE_IP))
3542                 return;
3543
3544         ip = (struct ip *)(mp->m_data + ehdrlen);
3545         ip_hlen = ip->ip_hl << 2;
3546
3547         /* check if we're UDP or TCP */
3548         switch (ip->ip_p) {
3549         case IPPROTO_TCP:
3550                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3551                 /* src and dst are inverted */
3552                 common.port.dst ^= th->th_sport;
3553                 common.port.src ^= th->th_dport;
3554                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
3555                 break;
3556         case IPPROTO_UDP:
3557                 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
3558                 /* src and dst are inverted */
3559                 common.port.dst ^= uh->uh_sport;
3560                 common.port.src ^= uh->uh_dport;
3561                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
3562                 break;
3563         default:
3564                 return;
3565         }
3566
3567         input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
3568         if (mp->m_pkthdr.ether_vtag)
3569                 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
3570         else
3571                 common.flex_bytes ^= etype;
3572         common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
3573
3574         que = &adapter->queues[txr->me];
3575         /*
3576         ** This assumes the Rx queue and Tx
3577         ** queue are bound to the same CPU
3578         */
3579         ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
3580             input, common, que->msix);
3581 }
3582 #endif /* IXGBE_FDIR */
3583
3584 /**********************************************************************
3585  *
3586  *  Examine each tx_buffer in the used queue. If the hardware is done
3587  *  processing the packet then free associated resources. The
3588  *  tx_buffer is put back on the free queue.
3589  *
3590  **********************************************************************/
3591 static void
3592 ixgbe_txeof(struct tx_ring *txr)
3593 {
3594         struct adapter          *adapter = txr->adapter;
3595         struct ifnet            *ifp = adapter->ifp;
3596         u32                     work, processed = 0;
3597         u16                     limit = txr->process_limit;
3598         struct ixgbe_tx_buf     *buf;
3599         union ixgbe_adv_tx_desc *txd;
3600
3601         mtx_assert(&txr->tx_mtx, MA_OWNED);
3602
3603 #ifdef DEV_NETMAP
3604         if (ifp->if_capenable & IFCAP_NETMAP) {
3605                 struct netmap_adapter *na = NA(ifp);
3606                 struct netmap_kring *kring = &na->tx_rings[txr->me];
3607                 txd = txr->tx_base;
3608                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3609                     BUS_DMASYNC_POSTREAD);
3610                 /*
3611                  * In netmap mode, all the work is done in the context
3612                  * of the client thread. Interrupt handlers only wake up
3613                  * clients, which may be sleeping on individual rings
3614                  * or on a global resource for all rings.
3615                  * To implement tx interrupt mitigation, we wake up the client
3616                  * thread roughly every half ring, even if the NIC interrupts
3617                  * more frequently. This is implemented as follows:
3618                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
3619                  *   the slot that should wake up the thread (nkr_num_slots
3620                  *   means the user thread should not be woken up);
3621                  * - the driver ignores tx interrupts unless netmap_mitigate=0
3622                  *   or the slot has the DD bit set.
3623                  */
3624                 if (!netmap_mitigate ||
3625                     (kring->nr_kflags < kring->nkr_num_slots &&
3626                     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
3627                         netmap_tx_irq(ifp, txr->me);
3628                 }
3629                 return;
3630         }
3631 #endif /* DEV_NETMAP */
3632
3633         if (txr->tx_avail == txr->num_desc) {
3634                 txr->queue_status = IXGBE_QUEUE_IDLE;
3635                 return;
3636         }
3637
3638         /* Get work starting point */
3639         work = txr->next_to_clean;
3640         buf = &txr->tx_buffers[work];
3641         txd = &txr->tx_base[work];
3642         work -= txr->num_desc; /* The distance to ring end */
3643         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3644             BUS_DMASYNC_POSTREAD);
3645
3646         do {
3647                 union ixgbe_adv_tx_desc *eop= buf->eop;
3648                 if (eop == NULL) /* No work */
3649                         break;
3650
3651                 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
3652                         break;  /* I/O not complete */
3653
3654                 if (buf->m_head) {
3655                         txr->bytes +=
3656                             buf->m_head->m_pkthdr.len;
3657                         bus_dmamap_sync(txr->txtag,
3658                             buf->map,
3659                             BUS_DMASYNC_POSTWRITE);
3660                         bus_dmamap_unload(txr->txtag,
3661                             buf->map);
3662                         m_freem(buf->m_head);
3663                         buf->m_head = NULL;
3664                         buf->map = NULL;
3665                 }
3666                 buf->eop = NULL;
3667                 ++txr->tx_avail;
3668
3669                 /* We clean the range if multi segment */
3670                 while (txd != eop) {
3671                         ++txd;
3672                         ++buf;
3673                         ++work;
3674                         /* wrap the ring? */
3675                         if (__predict_false(!work)) {
3676                                 work -= txr->num_desc;
3677                                 buf = txr->tx_buffers;
3678                                 txd = txr->tx_base;
3679                         }
3680                         if (buf->m_head) {
3681                                 txr->bytes +=
3682                                     buf->m_head->m_pkthdr.len;
3683                                 bus_dmamap_sync(txr->txtag,
3684                                     buf->map,
3685                                     BUS_DMASYNC_POSTWRITE);
3686                                 bus_dmamap_unload(txr->txtag,
3687                                     buf->map);
3688                                 m_freem(buf->m_head);
3689                                 buf->m_head = NULL;
3690                                 buf->map = NULL;
3691                         }
3692                         ++txr->tx_avail;
3693                         buf->eop = NULL;
3694
3695                 }
3696                 ++txr->packets;
3697                 ++processed;
3698                 ++ifp->if_opackets;
3699                 txr->watchdog_time = ticks;
3700
3701                 /* Try the next packet */
3702                 ++txd;
3703                 ++buf;
3704                 ++work;
3705                 /* reset with a wrap */
3706                 if (__predict_false(!work)) {
3707                         work -= txr->num_desc;
3708                         buf = txr->tx_buffers;
3709                         txd = txr->tx_base;
3710                 }
3711                 prefetch(txd);
3712         } while (__predict_true(--limit));
3713
3714         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3715             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3716
3717         work += txr->num_desc;
3718         txr->next_to_clean = work;
3719
3720         /*
3721         ** Watchdog calculation, we know there's
3722         ** work outstanding or the first return
3723         ** would have been taken, so none processed
3724         ** for too long indicates a hang.
3725         */
3726         if ((!processed) && ((ticks - txr->watchdog_time) > IXGBE_WATCHDOG))
3727                 txr->queue_status = IXGBE_QUEUE_HUNG;
3728
3729         if (txr->tx_avail == txr->num_desc)
3730                 txr->queue_status = IXGBE_QUEUE_IDLE;
3731
3732         return;
3733 }
3734
3735 /*********************************************************************
3736  *
3737  *  Refresh mbuf buffers for RX descriptor rings
3738  *   - now keeps its own state so discards due to resource
3739  *     exhaustion are unnecessary, if an mbuf cannot be obtained
3740  *     it just returns, keeping its placeholder, thus it can simply
3741  *     be recalled to try again.
3742  *
3743  **********************************************************************/
3744 static void
3745 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
3746 {
3747         struct adapter          *adapter = rxr->adapter;
3748         bus_dma_segment_t       seg[1];
3749         struct ixgbe_rx_buf     *rxbuf;
3750         struct mbuf             *mp;
3751         int                     i, j, nsegs, error;
3752         bool                    refreshed = FALSE;
3753
3754         i = j = rxr->next_to_refresh;
3755         /* Control the loop with one beyond */
3756         if (++j == rxr->num_desc)
3757                 j = 0;
3758
3759         while (j != limit) {
3760                 rxbuf = &rxr->rx_buffers[i];
3761                 if (rxbuf->buf == NULL) {
3762                         mp = m_getjcl(M_NOWAIT, MT_DATA,
3763                             M_PKTHDR, rxr->mbuf_sz);
3764                         if (mp == NULL)
3765                                 goto update;
3766                         if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3767                                 m_adj(mp, ETHER_ALIGN);
3768                 } else
3769                         mp = rxbuf->buf;
3770
3771                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
3772
3773                 /* If we're dealing with an mbuf that was copied rather
3774                  * than replaced, there's no need to go through busdma.
3775                  */
3776                 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
3777                         /* Get the memory mapping */
3778                         error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3779                             rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
3780                         if (error != 0) {
3781                                 printf("Refresh mbufs: payload dmamap load"
3782                                     " failure - %d\n", error);
3783                                 m_free(mp);
3784                                 rxbuf->buf = NULL;
3785                                 goto update;
3786                         }
3787                         rxbuf->buf = mp;
3788                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3789                             BUS_DMASYNC_PREREAD);
3790                         rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
3791                             htole64(seg[0].ds_addr);
3792                 } else {
3793                         rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
3794                         rxbuf->flags &= ~IXGBE_RX_COPY;
3795                 }
3796
3797                 refreshed = TRUE;
3798                 /* Next is precalculated */
3799                 i = j;
3800                 rxr->next_to_refresh = i;
3801                 if (++j == rxr->num_desc)
3802                         j = 0;
3803         }
3804 update:
3805         if (refreshed) /* Update hardware tail index */
3806                 IXGBE_WRITE_REG(&adapter->hw,
3807                     IXGBE_RDT(rxr->me), rxr->next_to_refresh);
3808         return;
3809 }
3810
3811 /*********************************************************************
3812  *
3813  *  Allocate memory for rx_buffer structures. Since we use one
3814  *  rx_buffer per received packet, the maximum number of rx_buffer's
3815  *  that we'll need is equal to the number of receive descriptors
3816  *  that we've allocated.
3817  *
3818  **********************************************************************/
3819 static int
3820 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
3821 {
3822         struct  adapter         *adapter = rxr->adapter;
3823         device_t                dev = adapter->dev;
3824         struct ixgbe_rx_buf     *rxbuf;
3825         int                     i, bsize, error;
3826
3827         bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
3828         if (!(rxr->rx_buffers =
3829             (struct ixgbe_rx_buf *) malloc(bsize,
3830             M_DEVBUF, M_NOWAIT | M_ZERO))) {
3831                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3832                 error = ENOMEM;
3833                 goto fail;
3834         }
3835
3836         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),   /* parent */
3837                                    1, 0,        /* alignment, bounds */
3838                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3839                                    BUS_SPACE_MAXADDR,   /* highaddr */
3840                                    NULL, NULL,          /* filter, filterarg */
3841                                    MJUM16BYTES,         /* maxsize */
3842                                    1,                   /* nsegments */
3843                                    MJUM16BYTES,         /* maxsegsize */
3844                                    0,                   /* flags */
3845                                    NULL,                /* lockfunc */
3846                                    NULL,                /* lockfuncarg */
3847                                    &rxr->ptag))) {
3848                 device_printf(dev, "Unable to create RX DMA tag\n");
3849                 goto fail;
3850         }
3851
3852         for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
3853                 rxbuf = &rxr->rx_buffers[i];
3854                 error = bus_dmamap_create(rxr->ptag,
3855                     BUS_DMA_NOWAIT, &rxbuf->pmap);
3856                 if (error) {
3857                         device_printf(dev, "Unable to create RX dma map\n");
3858                         goto fail;
3859                 }
3860         }
3861
3862         return (0);
3863
3864 fail:
3865         /* Frees all, but can handle partial completion */
3866         ixgbe_free_receive_structures(adapter);
3867         return (error);
3868 }
3869
3870 /*
3871 ** Used to detect a descriptor that has
3872 ** been merged by Hardware RSC.
3873 */
3874 static inline u32
3875 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
3876 {
3877         return (le32toh(rx->wb.lower.lo_dword.data) &
3878             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
3879 }
3880
3881 /*********************************************************************
3882  *
3883  *  Initialize Hardware RSC (LRO) feature on 82599
3884  *  for an RX ring, this is toggled by the LRO capability
3885  *  even though it is transparent to the stack.
3886  *
3887  *  NOTE: since this HW feature only works with IPV4 and 
3888  *        our testing has shown soft LRO to be as effective
3889  *        I have decided to disable this by default.
3890  *
3891  **********************************************************************/
3892 static void
3893 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
3894 {
3895         struct  adapter         *adapter = rxr->adapter;
3896         struct  ixgbe_hw        *hw = &adapter->hw;
3897         u32                     rscctrl, rdrxctl;
3898
3899         /* If turning LRO/RSC off we need to disable it */
3900         if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
3901                 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
3902                 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
3903                 return;
3904         }
3905
3906         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
3907         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
3908 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
3909         if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
3910 #endif /* DEV_NETMAP */
3911         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
3912         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
3913         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
3914
3915         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
3916         rscctrl |= IXGBE_RSCCTL_RSCEN;
3917         /*
3918         ** Limit the total number of descriptors that
3919         ** can be combined, so it does not exceed 64K
3920         */
3921         if (rxr->mbuf_sz == MCLBYTES)
3922                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
3923         else if (rxr->mbuf_sz == MJUMPAGESIZE)
3924                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
3925         else if (rxr->mbuf_sz == MJUM9BYTES)
3926                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
3927         else  /* Using 16K cluster */
3928                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
3929
3930         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
3931
3932         /* Enable TCP header recognition */
3933         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
3934             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
3935             IXGBE_PSRTYPE_TCPHDR));
3936
3937         /* Disable RSC for ACK packets */
3938         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
3939             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
3940
3941         rxr->hw_rsc = TRUE;
3942 }
3943
3944
3945 static void     
3946 ixgbe_free_receive_ring(struct rx_ring *rxr)
3947
3948         struct ixgbe_rx_buf       *rxbuf;
3949         int i;
3950
3951         for (i = 0; i < rxr->num_desc; i++) {
3952                 rxbuf = &rxr->rx_buffers[i];
3953                 if (rxbuf->buf != NULL) {
3954                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3955                             BUS_DMASYNC_POSTREAD);
3956                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3957                         rxbuf->buf->m_flags |= M_PKTHDR;
3958                         m_freem(rxbuf->buf);
3959                         rxbuf->buf = NULL;
3960                         rxbuf->flags = 0;
3961                 }
3962         }
3963 }
3964
3965
3966 /*********************************************************************
3967  *
3968  *  Initialize a receive ring and its buffers.
3969  *
3970  **********************************************************************/
3971 static int
3972 ixgbe_setup_receive_ring(struct rx_ring *rxr)
3973 {
3974         struct  adapter         *adapter;
3975         struct ifnet            *ifp;
3976         device_t                dev;
3977         struct ixgbe_rx_buf     *rxbuf;
3978         bus_dma_segment_t       seg[1];
3979         struct lro_ctrl         *lro = &rxr->lro;
3980         int                     rsize, nsegs, error = 0;
3981 #ifdef DEV_NETMAP
3982         struct netmap_adapter *na = NA(rxr->adapter->ifp);
3983         struct netmap_slot *slot;
3984 #endif /* DEV_NETMAP */
3985
3986         adapter = rxr->adapter;
3987         ifp = adapter->ifp;
3988         dev = adapter->dev;
3989
3990         /* Clear the ring contents */
3991         IXGBE_RX_LOCK(rxr);
3992 #ifdef DEV_NETMAP
3993         /* same as in ixgbe_setup_transmit_ring() */
3994         slot = netmap_reset(na, NR_RX, rxr->me, 0);
3995 #endif /* DEV_NETMAP */
3996         rsize = roundup2(adapter->num_rx_desc *
3997             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
3998         bzero((void *)rxr->rx_base, rsize);
3999         /* Cache the size */
4000         rxr->mbuf_sz = adapter->rx_mbuf_sz;
4001
4002         /* Free current RX buffer structs and their mbufs */
4003         ixgbe_free_receive_ring(rxr);
4004
4005         /* Now replenish the mbufs */
4006         for (int j = 0; j != rxr->num_desc; ++j) {
4007                 struct mbuf     *mp;
4008
4009                 rxbuf = &rxr->rx_buffers[j];
4010 #ifdef DEV_NETMAP
4011                 /*
4012                  * In netmap mode, fill the map and set the buffer
4013                  * address in the NIC ring, considering the offset
4014                  * between the netmap and NIC rings (see comment in
4015                  * ixgbe_setup_transmit_ring() ). No need to allocate
4016                  * an mbuf, so end the block with a continue;
4017                  */
4018                 if (slot) {
4019                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4020                         uint64_t paddr;
4021                         void *addr;
4022
4023                         addr = PNMB(slot + sj, &paddr);
4024                         netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4025                         /* Update descriptor and the cached value */
4026                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4027                         rxbuf->addr = htole64(paddr);
4028                         continue;
4029                 }
4030 #endif /* DEV_NETMAP */
4031                 rxbuf->flags = 0; 
4032                 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
4033                     M_PKTHDR, adapter->rx_mbuf_sz);
4034                 if (rxbuf->buf == NULL) {
4035                         error = ENOBUFS;
4036                         goto fail;
4037                 }
4038                 mp = rxbuf->buf;
4039                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
4040                 /* Get the memory mapping */
4041                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4042                     rxbuf->pmap, mp, seg,
4043                     &nsegs, BUS_DMA_NOWAIT);
4044                 if (error != 0)
4045                         goto fail;
4046                 bus_dmamap_sync(rxr->ptag,
4047                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4048                 /* Update the descriptor and the cached value */
4049                 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
4050                 rxbuf->addr = htole64(seg[0].ds_addr);
4051         }
4052
4053
4054         /* Setup our descriptor indices */
4055         rxr->next_to_check = 0;
4056         rxr->next_to_refresh = 0;
4057         rxr->lro_enabled = FALSE;
4058         rxr->rx_copies = 0;
4059         rxr->rx_bytes = 0;
4060         rxr->discard = FALSE;
4061         rxr->vtag_strip = FALSE;
4062
4063         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4064             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4065
4066         /*
4067         ** Now set up the LRO interface:
4068         */
4069         if (ixgbe_rsc_enable)
4070                 ixgbe_setup_hw_rsc(rxr);
4071         else if (ifp->if_capenable & IFCAP_LRO) {
4072                 int err = tcp_lro_init(lro);
4073                 if (err) {
4074                         device_printf(dev, "LRO Initialization failed!\n");
4075                         goto fail;
4076                 }
4077                 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
4078                 rxr->lro_enabled = TRUE;
4079                 lro->ifp = adapter->ifp;
4080         }
4081
4082         IXGBE_RX_UNLOCK(rxr);
4083         return (0);
4084
4085 fail:
4086         ixgbe_free_receive_ring(rxr);
4087         IXGBE_RX_UNLOCK(rxr);
4088         return (error);
4089 }
4090
4091 /*********************************************************************
4092  *
4093  *  Initialize all receive rings.
4094  *
4095  **********************************************************************/
4096 static int
4097 ixgbe_setup_receive_structures(struct adapter *adapter)
4098 {
4099         struct rx_ring *rxr = adapter->rx_rings;
4100         int j;
4101
4102         for (j = 0; j < adapter->num_queues; j++, rxr++)
4103                 if (ixgbe_setup_receive_ring(rxr))
4104                         goto fail;
4105
4106         return (0);
4107 fail:
4108         /*
4109          * Free RX buffers allocated so far, we will only handle
4110          * the rings that completed, the failing case will have
4111          * cleaned up for itself. 'j' failed, so its the terminus.
4112          */
4113         for (int i = 0; i < j; ++i) {
4114                 rxr = &adapter->rx_rings[i];
4115                 ixgbe_free_receive_ring(rxr);
4116         }
4117
4118         return (ENOBUFS);
4119 }
4120
4121 /*********************************************************************
4122  *
4123  *  Setup receive registers and features.
4124  *
4125  **********************************************************************/
4126 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
4127
4128 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
4129         
4130 static void
4131 ixgbe_initialize_receive_units(struct adapter *adapter)
4132 {
4133         struct  rx_ring *rxr = adapter->rx_rings;
4134         struct ixgbe_hw *hw = &adapter->hw;
4135         struct ifnet   *ifp = adapter->ifp;
4136         u32             bufsz, rxctrl, fctrl, srrctl, rxcsum;
4137         u32             reta, mrqc = 0, hlreg, random[10];
4138
4139
4140         /*
4141          * Make sure receives are disabled while
4142          * setting up the descriptor ring
4143          */
4144         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4145         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
4146             rxctrl & ~IXGBE_RXCTRL_RXEN);
4147
4148         /* Enable broadcasts */
4149         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4150         fctrl |= IXGBE_FCTRL_BAM;
4151         fctrl |= IXGBE_FCTRL_DPF;
4152         fctrl |= IXGBE_FCTRL_PMCF;
4153         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4154
4155         /* Set for Jumbo Frames? */
4156         hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4157         if (ifp->if_mtu > ETHERMTU)
4158                 hlreg |= IXGBE_HLREG0_JUMBOEN;
4159         else
4160                 hlreg &= ~IXGBE_HLREG0_JUMBOEN;
4161 #ifdef DEV_NETMAP
4162         /* crcstrip is conditional in netmap (in RDRXCTL too ?) */
4163         if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
4164                 hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
4165         else
4166                 hlreg |= IXGBE_HLREG0_RXCRCSTRP;
4167 #endif /* DEV_NETMAP */
4168         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
4169
4170         bufsz = (adapter->rx_mbuf_sz +
4171             BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
4172
4173         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4174                 u64 rdba = rxr->rxdma.dma_paddr;
4175
4176                 /* Setup the Base and Length of the Rx Descriptor Ring */
4177                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
4178                                (rdba & 0x00000000ffffffffULL));
4179                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
4180                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
4181                     adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4182
4183                 /* Set up the SRRCTL register */
4184                 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
4185                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4186                 srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
4187                 srrctl |= bufsz;
4188                 srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4189                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
4190
4191                 /* Setup the HW Rx Head and Tail Descriptor Pointers */
4192                 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
4193                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
4194
4195                 /* Set the processing limit */
4196                 rxr->process_limit = ixgbe_rx_process_limit;
4197         }
4198
4199         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
4200                 u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
4201                               IXGBE_PSRTYPE_UDPHDR |
4202                               IXGBE_PSRTYPE_IPV4HDR |
4203                               IXGBE_PSRTYPE_IPV6HDR;
4204                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
4205         }
4206
4207         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4208
4209         /* Setup RSS */
4210         if (adapter->num_queues > 1) {
4211                 int i, j;
4212                 reta = 0;
4213
4214                 /* set up random bits */
4215                 arc4rand(&random, sizeof(random), 0);
4216
4217                 /* Set up the redirection table */
4218                 for (i = 0, j = 0; i < 128; i++, j++) {
4219                         if (j == adapter->num_queues) j = 0;
4220                         reta = (reta << 8) | (j * 0x11);
4221                         if ((i & 3) == 3)
4222                                 IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
4223                 }
4224
4225                 /* Now fill our hash function seeds */
4226                 for (int i = 0; i < 10; i++)
4227                         IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), random[i]);
4228
4229                 /* Perform hash on these packet types */
4230                 mrqc = IXGBE_MRQC_RSSEN
4231                      | IXGBE_MRQC_RSS_FIELD_IPV4
4232                      | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
4233                      | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
4234                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
4235                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX
4236                      | IXGBE_MRQC_RSS_FIELD_IPV6
4237                      | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
4238                      | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
4239                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
4240                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4241
4242                 /* RSS and RX IPP Checksum are mutually exclusive */
4243                 rxcsum |= IXGBE_RXCSUM_PCSD;
4244         }
4245
4246         if (ifp->if_capenable & IFCAP_RXCSUM)
4247                 rxcsum |= IXGBE_RXCSUM_PCSD;
4248
4249         if (!(rxcsum & IXGBE_RXCSUM_PCSD))
4250                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4251
4252         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4253
4254         return;
4255 }
4256
4257 /*********************************************************************
4258  *
4259  *  Free all receive rings.
4260  *
4261  **********************************************************************/
4262 static void
4263 ixgbe_free_receive_structures(struct adapter *adapter)
4264 {
4265         struct rx_ring *rxr = adapter->rx_rings;
4266
4267         INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
4268
4269         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4270                 struct lro_ctrl         *lro = &rxr->lro;
4271                 ixgbe_free_receive_buffers(rxr);
4272                 /* Free LRO memory */
4273                 tcp_lro_free(lro);
4274                 /* Free the ring memory as well */
4275                 ixgbe_dma_free(adapter, &rxr->rxdma);
4276         }
4277
4278         free(adapter->rx_rings, M_DEVBUF);
4279 }
4280
4281
4282 /*********************************************************************
4283  *
4284  *  Free receive ring data structures
4285  *
4286  **********************************************************************/
4287 static void
4288 ixgbe_free_receive_buffers(struct rx_ring *rxr)
4289 {
4290         struct adapter          *adapter = rxr->adapter;
4291         struct ixgbe_rx_buf     *rxbuf;
4292
4293         INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
4294
4295         /* Cleanup any existing buffers */
4296         if (rxr->rx_buffers != NULL) {
4297                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4298                         rxbuf = &rxr->rx_buffers[i];
4299                         if (rxbuf->buf != NULL) {
4300                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4301                                     BUS_DMASYNC_POSTREAD);
4302                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4303                                 rxbuf->buf->m_flags |= M_PKTHDR;
4304                                 m_freem(rxbuf->buf);
4305                         }
4306                         rxbuf->buf = NULL;
4307                         if (rxbuf->pmap != NULL) {
4308                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4309                                 rxbuf->pmap = NULL;
4310                         }
4311                 }
4312                 if (rxr->rx_buffers != NULL) {
4313                         free(rxr->rx_buffers, M_DEVBUF);
4314                         rxr->rx_buffers = NULL;
4315                 }
4316         }
4317
4318         if (rxr->ptag != NULL) {
4319                 bus_dma_tag_destroy(rxr->ptag);
4320                 rxr->ptag = NULL;
4321         }
4322
4323         return;
4324 }
4325
4326 static __inline void
4327 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4328 {
4329                  
4330         /*
4331          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
4332          * should be computed by hardware. Also it should not have VLAN tag in
4333          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
4334          */
4335         if (rxr->lro_enabled &&
4336             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4337             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4338             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4339             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
4340             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4341             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
4342             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4343             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4344                 /*
4345                  * Send to the stack if:
4346                  **  - LRO not enabled, or
4347                  **  - no LRO resources, or
4348                  **  - lro enqueue fails
4349                  */
4350                 if (rxr->lro.lro_cnt != 0)
4351                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4352                                 return;
4353         }
4354         IXGBE_RX_UNLOCK(rxr);
4355         (*ifp->if_input)(ifp, m);
4356         IXGBE_RX_LOCK(rxr);
4357 }
4358
4359 static __inline void
4360 ixgbe_rx_discard(struct rx_ring *rxr, int i)
4361 {
4362         struct ixgbe_rx_buf     *rbuf;
4363
4364         rbuf = &rxr->rx_buffers[i];
4365
4366         if (rbuf->fmp != NULL) {/* Partial chain ? */
4367                 rbuf->fmp->m_flags |= M_PKTHDR;
4368                 m_freem(rbuf->fmp);
4369                 rbuf->fmp = NULL;
4370         }
4371
4372         /*
4373         ** With advanced descriptors the writeback
4374         ** clobbers the buffer addrs, so its easier
4375         ** to just free the existing mbufs and take
4376         ** the normal refresh path to get new buffers
4377         ** and mapping.
4378         */
4379         if (rbuf->buf) {
4380                 m_free(rbuf->buf);
4381                 rbuf->buf = NULL;
4382         }
4383
4384         rbuf->flags = 0;
4385  
4386         return;
4387 }
4388
4389
4390 /*********************************************************************
4391  *
4392  *  This routine executes in interrupt context. It replenishes
4393  *  the mbufs in the descriptor and sends data which has been
4394  *  dma'ed into host memory to upper layer.
4395  *
4396  *  We loop at most count times if count is > 0, or until done if
4397  *  count < 0.
4398  *
4399  *  Return TRUE for more work, FALSE for all clean.
4400  *********************************************************************/
4401 static bool
4402 ixgbe_rxeof(struct ix_queue *que)
4403 {
4404         struct adapter          *adapter = que->adapter;
4405         struct rx_ring          *rxr = que->rxr;
4406         struct ifnet            *ifp = adapter->ifp;
4407         struct lro_ctrl         *lro = &rxr->lro;
4408         struct lro_entry        *queued;
4409         int                     i, nextp, processed = 0;
4410         u32                     staterr = 0;
4411         u16                     count = rxr->process_limit;
4412         union ixgbe_adv_rx_desc *cur;
4413         struct ixgbe_rx_buf     *rbuf, *nbuf;
4414
4415         IXGBE_RX_LOCK(rxr);
4416
4417 #ifdef DEV_NETMAP
4418         /* Same as the txeof routine: wakeup clients on intr. */
4419         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4420                 IXGBE_RX_UNLOCK(rxr);
4421                 return (FALSE);
4422         }
4423 #endif /* DEV_NETMAP */
4424
4425         for (i = rxr->next_to_check; count != 0;) {
4426                 struct mbuf     *sendmp, *mp;
4427                 u32             rsc, ptype;
4428                 u16             len;
4429                 u16             vtag = 0;
4430                 bool            eop;
4431  
4432                 /* Sync the ring. */
4433                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4434                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4435
4436                 cur = &rxr->rx_base[i];
4437                 staterr = le32toh(cur->wb.upper.status_error);
4438
4439                 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
4440                         break;
4441                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4442                         break;
4443
4444                 count--;
4445                 sendmp = NULL;
4446                 nbuf = NULL;
4447                 rsc = 0;
4448                 cur->wb.upper.status_error = 0;
4449                 rbuf = &rxr->rx_buffers[i];
4450                 mp = rbuf->buf;
4451
4452                 len = le16toh(cur->wb.upper.length);
4453                 ptype = le32toh(cur->wb.lower.lo_dword.data) &
4454                     IXGBE_RXDADV_PKTTYPE_MASK;
4455                 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
4456
4457                 /* Make sure bad packets are discarded */
4458                 if (((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) ||
4459                     (rxr->discard)) {
4460                         rxr->rx_discarded++;
4461                         if (eop)
4462                                 rxr->discard = FALSE;
4463                         else
4464                                 rxr->discard = TRUE;
4465                         ixgbe_rx_discard(rxr, i);
4466                         goto next_desc;
4467                 }
4468
4469                 /*
4470                 ** On 82599 which supports a hardware
4471                 ** LRO (called HW RSC), packets need
4472                 ** not be fragmented across sequential
4473                 ** descriptors, rather the next descriptor
4474                 ** is indicated in bits of the descriptor.
4475                 ** This also means that we might proceses
4476                 ** more than one packet at a time, something
4477                 ** that has never been true before, it
4478                 ** required eliminating global chain pointers
4479                 ** in favor of what we are doing here.  -jfv
4480                 */
4481                 if (!eop) {
4482                         /*
4483                         ** Figure out the next descriptor
4484                         ** of this frame.
4485                         */
4486                         if (rxr->hw_rsc == TRUE) {
4487                                 rsc = ixgbe_rsc_count(cur);
4488                                 rxr->rsc_num += (rsc - 1);
4489                         }
4490                         if (rsc) { /* Get hardware index */
4491                                 nextp = ((staterr &
4492                                     IXGBE_RXDADV_NEXTP_MASK) >>
4493                                     IXGBE_RXDADV_NEXTP_SHIFT);
4494                         } else { /* Just sequential */
4495                                 nextp = i + 1;
4496                                 if (nextp == adapter->num_rx_desc)
4497                                         nextp = 0;
4498                         }
4499                         nbuf = &rxr->rx_buffers[nextp];
4500                         prefetch(nbuf);
4501                 }
4502                 /*
4503                 ** Rather than using the fmp/lmp global pointers
4504                 ** we now keep the head of a packet chain in the
4505                 ** buffer struct and pass this along from one
4506                 ** descriptor to the next, until we get EOP.
4507                 */
4508                 mp->m_len = len;
4509                 /*
4510                 ** See if there is a stored head
4511                 ** that determines what we are
4512                 */
4513                 sendmp = rbuf->fmp;
4514                 if (sendmp != NULL) {  /* secondary frag */
4515                         rbuf->buf = rbuf->fmp = NULL;
4516                         mp->m_flags &= ~M_PKTHDR;
4517                         sendmp->m_pkthdr.len += mp->m_len;
4518                 } else {
4519                         /*
4520                          * Optimize.  This might be a small packet,
4521                          * maybe just a TCP ACK.  Do a fast copy that
4522                          * is cache aligned into a new mbuf, and
4523                          * leave the old mbuf+cluster for re-use.
4524                          */
4525                         if (eop && len <= IXGBE_RX_COPY_LEN) {
4526                                 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
4527                                 if (sendmp != NULL) {
4528                                         sendmp->m_data +=
4529                                             IXGBE_RX_COPY_ALIGN;
4530                                         ixgbe_bcopy(mp->m_data,
4531                                             sendmp->m_data, len);
4532                                         sendmp->m_len = len;
4533                                         rxr->rx_copies++;
4534                                         rbuf->flags |= IXGBE_RX_COPY;
4535                                 }
4536                         }
4537                         if (sendmp == NULL) {
4538                                 rbuf->buf = rbuf->fmp = NULL;
4539                                 sendmp = mp;
4540                         }
4541
4542                         /* first desc of a non-ps chain */
4543                         sendmp->m_flags |= M_PKTHDR;
4544                         sendmp->m_pkthdr.len = mp->m_len;
4545                 }
4546                 ++processed;
4547
4548                 /* Pass the head pointer on */
4549                 if (eop == 0) {
4550                         nbuf->fmp = sendmp;
4551                         sendmp = NULL;
4552                         mp->m_next = nbuf->buf;
4553                 } else { /* Sending this frame */
4554                         sendmp->m_pkthdr.rcvif = ifp;
4555                         ifp->if_ipackets++;
4556                         rxr->rx_packets++;
4557                         /* capture data for AIM */
4558                         rxr->bytes += sendmp->m_pkthdr.len;
4559                         rxr->rx_bytes += sendmp->m_pkthdr.len;
4560                         /* Process vlan info */
4561                         if ((rxr->vtag_strip) &&
4562                             (staterr & IXGBE_RXD_STAT_VP))
4563                                 vtag = le16toh(cur->wb.upper.vlan);
4564                         if (vtag) {
4565                                 sendmp->m_pkthdr.ether_vtag = vtag;
4566                                 sendmp->m_flags |= M_VLANTAG;
4567                         }
4568                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4569                                 ixgbe_rx_checksum(staterr, sendmp, ptype);
4570 #if __FreeBSD_version >= 800000
4571                         sendmp->m_pkthdr.flowid = que->msix;
4572                         sendmp->m_flags |= M_FLOWID;
4573 #endif
4574                 }
4575 next_desc:
4576                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4577                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4578
4579                 /* Advance our pointers to the next descriptor. */
4580                 if (++i == rxr->num_desc)
4581                         i = 0;
4582
4583                 /* Now send to the stack or do LRO */
4584                 if (sendmp != NULL) {
4585                         rxr->next_to_check = i;
4586                         ixgbe_rx_input(rxr, ifp, sendmp, ptype);
4587                         i = rxr->next_to_check;
4588                 }
4589
4590                /* Every 8 descriptors we go to refresh mbufs */
4591                 if (processed == 8) {
4592                         ixgbe_refresh_mbufs(rxr, i);
4593                         processed = 0;
4594                 }
4595         }
4596
4597         /* Refresh any remaining buf structs */
4598         if (ixgbe_rx_unrefreshed(rxr))
4599                 ixgbe_refresh_mbufs(rxr, i);
4600
4601         rxr->next_to_check = i;
4602
4603         /*
4604          * Flush any outstanding LRO work
4605          */
4606         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4607                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4608                 tcp_lro_flush(lro, queued);
4609         }
4610
4611         IXGBE_RX_UNLOCK(rxr);
4612
4613         /*
4614         ** Still have cleaning to do?
4615         */
4616         if ((staterr & IXGBE_RXD_STAT_DD) != 0)
4617                 return (TRUE);
4618         else
4619                 return (FALSE);
4620 }
4621
4622
4623 /*********************************************************************
4624  *
4625  *  Verify that the hardware indicated that the checksum is valid.
4626  *  Inform the stack about the status of checksum so that stack
4627  *  doesn't spend time verifying the checksum.
4628  *
4629  *********************************************************************/
4630 static void
4631 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
4632 {
4633         u16     status = (u16) staterr;
4634         u8      errors = (u8) (staterr >> 24);
4635         bool    sctp = FALSE;
4636
4637         if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4638             (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
4639                 sctp = TRUE;
4640
4641         if (status & IXGBE_RXD_STAT_IPCS) {
4642                 if (!(errors & IXGBE_RXD_ERR_IPE)) {
4643                         /* IP Checksum Good */
4644                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4645                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4646
4647                 } else
4648                         mp->m_pkthdr.csum_flags = 0;
4649         }
4650         if (status & IXGBE_RXD_STAT_L4CS) {
4651                 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4652 #if __FreeBSD_version >= 800000
4653                 if (sctp)
4654                         type = CSUM_SCTP_VALID;
4655 #endif
4656                 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
4657                         mp->m_pkthdr.csum_flags |= type;
4658                         if (!sctp)
4659                                 mp->m_pkthdr.csum_data = htons(0xffff);
4660                 } 
4661         }
4662         return;
4663 }
4664
4665
4666 /*
4667 ** This routine is run via an vlan config EVENT,
4668 ** it enables us to use the HW Filter table since
4669 ** we can get the vlan id. This just creates the
4670 ** entry in the soft version of the VFTA, init will
4671 ** repopulate the real table.
4672 */
4673 static void
4674 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4675 {
4676         struct adapter  *adapter = ifp->if_softc;
4677         u16             index, bit;
4678
4679         if (ifp->if_softc !=  arg)   /* Not our event */
4680                 return;
4681
4682         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4683                 return;
4684
4685         IXGBE_CORE_LOCK(adapter);
4686         index = (vtag >> 5) & 0x7F;
4687         bit = vtag & 0x1F;
4688         adapter->shadow_vfta[index] |= (1 << bit);
4689         ++adapter->num_vlans;
4690         ixgbe_init_locked(adapter);
4691         IXGBE_CORE_UNLOCK(adapter);
4692 }
4693
4694 /*
4695 ** This routine is run via an vlan
4696 ** unconfig EVENT, remove our entry
4697 ** in the soft vfta.
4698 */
4699 static void
4700 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4701 {
4702         struct adapter  *adapter = ifp->if_softc;
4703         u16             index, bit;
4704
4705         if (ifp->if_softc !=  arg)
4706                 return;
4707
4708         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4709                 return;
4710
4711         IXGBE_CORE_LOCK(adapter);
4712         index = (vtag >> 5) & 0x7F;
4713         bit = vtag & 0x1F;
4714         adapter->shadow_vfta[index] &= ~(1 << bit);
4715         --adapter->num_vlans;
4716         /* Re-init to load the changes */
4717         ixgbe_init_locked(adapter);
4718         IXGBE_CORE_UNLOCK(adapter);
4719 }
4720
4721 static void
4722 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
4723 {
4724         struct ifnet    *ifp = adapter->ifp;
4725         struct ixgbe_hw *hw = &adapter->hw;
4726         struct rx_ring  *rxr;
4727         u32             ctrl;
4728
4729
4730         /*
4731         ** We get here thru init_locked, meaning
4732         ** a soft reset, this has already cleared
4733         ** the VFTA and other state, so if there
4734         ** have been no vlan's registered do nothing.
4735         */
4736         if (adapter->num_vlans == 0)
4737                 return;
4738
4739         /*
4740         ** A soft reset zero's out the VFTA, so
4741         ** we need to repopulate it now.
4742         */
4743         for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
4744                 if (adapter->shadow_vfta[i] != 0)
4745                         IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
4746                             adapter->shadow_vfta[i]);
4747
4748         ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4749         /* Enable the Filter Table if enabled */
4750         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
4751                 ctrl &= ~IXGBE_VLNCTRL_CFIEN;
4752                 ctrl |= IXGBE_VLNCTRL_VFE;
4753         }
4754         if (hw->mac.type == ixgbe_mac_82598EB)
4755                 ctrl |= IXGBE_VLNCTRL_VME;
4756         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
4757
4758         /* Setup the queues for vlans */
4759         for (int i = 0; i < adapter->num_queues; i++) {
4760                 rxr = &adapter->rx_rings[i];
4761                 /* On 82599 the VLAN enable is per/queue in RXDCTL */
4762                 if (hw->mac.type != ixgbe_mac_82598EB) {
4763                         ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
4764                         ctrl |= IXGBE_RXDCTL_VME;
4765                         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
4766                 }
4767                 rxr->vtag_strip = TRUE;
4768         }
4769 }
4770
4771 static void
4772 ixgbe_enable_intr(struct adapter *adapter)
4773 {
4774         struct ixgbe_hw *hw = &adapter->hw;
4775         struct ix_queue *que = adapter->queues;
4776         u32             mask, fwsm;
4777
4778         mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
4779         /* Enable Fan Failure detection */
4780         if (hw->device_id == IXGBE_DEV_ID_82598AT)
4781                     mask |= IXGBE_EIMS_GPI_SDP1;
4782
4783         switch (adapter->hw.mac.type) {
4784                 case ixgbe_mac_82599EB:
4785                         mask |= IXGBE_EIMS_ECC;
4786                         mask |= IXGBE_EIMS_GPI_SDP0;
4787                         mask |= IXGBE_EIMS_GPI_SDP1;
4788                         mask |= IXGBE_EIMS_GPI_SDP2;
4789 #ifdef IXGBE_FDIR
4790                         mask |= IXGBE_EIMS_FLOW_DIR;
4791 #endif
4792                         break;
4793                 case ixgbe_mac_X540:
4794                         mask |= IXGBE_EIMS_ECC;
4795                         /* Detect if Thermal Sensor is enabled */
4796                         fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
4797                         if (fwsm & IXGBE_FWSM_TS_ENABLED)
4798                                 mask |= IXGBE_EIMS_TS;
4799 #ifdef IXGBE_FDIR
4800                         mask |= IXGBE_EIMS_FLOW_DIR;
4801 #endif
4802                 /* falls through */
4803                 default:
4804                         break;
4805         }
4806
4807         IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
4808
4809         /* With RSS we use auto clear */
4810         if (adapter->msix_mem) {
4811                 mask = IXGBE_EIMS_ENABLE_MASK;
4812                 /* Don't autoclear Link */
4813                 mask &= ~IXGBE_EIMS_OTHER;
4814                 mask &= ~IXGBE_EIMS_LSC;
4815                 IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
4816         }
4817
4818         /*
4819         ** Now enable all queues, this is done separately to
4820         ** allow for handling the extended (beyond 32) MSIX
4821         ** vectors that can be used by 82599
4822         */
4823         for (int i = 0; i < adapter->num_queues; i++, que++)
4824                 ixgbe_enable_queue(adapter, que->msix);
4825
4826         IXGBE_WRITE_FLUSH(hw);
4827
4828         return;
4829 }
4830
4831 static void
4832 ixgbe_disable_intr(struct adapter *adapter)
4833 {
4834         if (adapter->msix_mem)
4835                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
4836         if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
4837                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
4838         } else {
4839                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
4840                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
4841                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
4842         }
4843         IXGBE_WRITE_FLUSH(&adapter->hw);
4844         return;
4845 }
4846
4847 u16
4848 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
4849 {
4850         u16 value;
4851
4852         value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
4853             reg, 2);
4854
4855         return (value);
4856 }
4857
4858 void
4859 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
4860 {
4861         pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
4862             reg, value, 2);
4863
4864         return;
4865 }
4866
4867 /*
4868 ** Get the width and transaction speed of
4869 ** the slot this adapter is plugged into.
4870 */
4871 static void
4872 ixgbe_get_slot_info(struct ixgbe_hw *hw)
4873 {
4874         device_t                dev = ((struct ixgbe_osdep *)hw->back)->dev;
4875         struct ixgbe_mac_info   *mac = &hw->mac;
4876         u16                     link;
4877         u32                     offset;
4878
4879         /* For most devices simply call the shared code routine */
4880         if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
4881                 ixgbe_get_bus_info(hw);
4882                 goto display;
4883         }
4884
4885         /*
4886         ** For the Quad port adapter we need to parse back
4887         ** up the PCI tree to find the speed of the expansion
4888         ** slot into which this adapter is plugged. A bit more work.
4889         */
4890         dev = device_get_parent(device_get_parent(dev));
4891 #ifdef IXGBE_DEBUG
4892         device_printf(dev, "parent pcib = %x,%x,%x\n",
4893             pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
4894 #endif
4895         dev = device_get_parent(device_get_parent(dev));
4896 #ifdef IXGBE_DEBUG
4897         device_printf(dev, "slot pcib = %x,%x,%x\n",
4898             pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
4899 #endif
4900         /* Now get the PCI Express Capabilities offset */
4901         pci_find_cap(dev, PCIY_EXPRESS, &offset);
4902         /* ...and read the Link Status Register */
4903         link = pci_read_config(dev, offset + PCIER_LINK_STA, 2);
4904         switch (link & IXGBE_PCI_LINK_WIDTH) {
4905         case IXGBE_PCI_LINK_WIDTH_1:
4906                 hw->bus.width = ixgbe_bus_width_pcie_x1;
4907                 break;
4908         case IXGBE_PCI_LINK_WIDTH_2:
4909                 hw->bus.width = ixgbe_bus_width_pcie_x2;
4910                 break;
4911         case IXGBE_PCI_LINK_WIDTH_4:
4912                 hw->bus.width = ixgbe_bus_width_pcie_x4;
4913                 break;
4914         case IXGBE_PCI_LINK_WIDTH_8:
4915                 hw->bus.width = ixgbe_bus_width_pcie_x8;
4916                 break;
4917         default:
4918                 hw->bus.width = ixgbe_bus_width_unknown;
4919                 break;
4920         }
4921
4922         switch (link & IXGBE_PCI_LINK_SPEED) {
4923         case IXGBE_PCI_LINK_SPEED_2500:
4924                 hw->bus.speed = ixgbe_bus_speed_2500;
4925                 break;
4926         case IXGBE_PCI_LINK_SPEED_5000:
4927                 hw->bus.speed = ixgbe_bus_speed_5000;
4928                 break;
4929         case IXGBE_PCI_LINK_SPEED_8000:
4930                 hw->bus.speed = ixgbe_bus_speed_8000;
4931                 break;
4932         default:
4933                 hw->bus.speed = ixgbe_bus_speed_unknown;
4934                 break;
4935         }
4936
4937         mac->ops.set_lan_id(hw);
4938
4939 display:
4940         device_printf(dev,"PCI Express Bus: Speed %s %s\n",
4941             ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s":
4942             (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s":
4943             (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s":"Unknown"),
4944             (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
4945             (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
4946             (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
4947             ("Unknown"));
4948
4949         if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) &&
4950             ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
4951             (hw->bus.speed == ixgbe_bus_speed_2500))) {
4952                 device_printf(dev, "PCI-Express bandwidth available"
4953                     " for this card\n     is not sufficient for"
4954                     " optimal performance.\n");
4955                 device_printf(dev, "For optimal performance a x8 "
4956                     "PCIE, or x4 PCIE Gen2 slot is required.\n");
4957         }
4958         if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) &&
4959             ((hw->bus.width <= ixgbe_bus_width_pcie_x8) &&
4960             (hw->bus.speed < ixgbe_bus_speed_8000))) {
4961                 device_printf(dev, "PCI-Express bandwidth available"
4962                     " for this card\n     is not sufficient for"
4963                     " optimal performance.\n");
4964                 device_printf(dev, "For optimal performance a x8 "
4965                     "PCIE Gen3 slot is required.\n");
4966         }
4967
4968         return;
4969 }
4970
4971
4972 /*
4973 ** Setup the correct IVAR register for a particular MSIX interrupt
4974 **   (yes this is all very magic and confusing :)
4975 **  - entry is the register array entry
4976 **  - vector is the MSIX vector for this queue
4977 **  - type is RX/TX/MISC
4978 */
4979 static void
4980 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
4981 {
4982         struct ixgbe_hw *hw = &adapter->hw;
4983         u32 ivar, index;
4984
4985         vector |= IXGBE_IVAR_ALLOC_VAL;
4986
4987         switch (hw->mac.type) {
4988
4989         case ixgbe_mac_82598EB:
4990                 if (type == -1)
4991                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4992                 else
4993                         entry += (type * 64);
4994                 index = (entry >> 2) & 0x1F;
4995                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4996                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4997                 ivar |= (vector << (8 * (entry & 0x3)));
4998                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
4999                 break;
5000
5001         case ixgbe_mac_82599EB:
5002         case ixgbe_mac_X540:
5003                 if (type == -1) { /* MISC IVAR */
5004                         index = (entry & 1) * 8;
5005                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
5006                         ivar &= ~(0xFF << index);
5007                         ivar |= (vector << index);
5008                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
5009                 } else {        /* RX/TX IVARS */
5010                         index = (16 * (entry & 1)) + (8 * type);
5011                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
5012                         ivar &= ~(0xFF << index);
5013                         ivar |= (vector << index);
5014                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
5015                 }
5016
5017         default:
5018                 break;
5019         }
5020 }
5021
5022 static void
5023 ixgbe_configure_ivars(struct adapter *adapter)
5024 {
5025         struct  ix_queue *que = adapter->queues;
5026         u32 newitr;
5027
5028         if (ixgbe_max_interrupt_rate > 0)
5029                 newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
5030         else
5031                 newitr = 0;
5032
5033         for (int i = 0; i < adapter->num_queues; i++, que++) {
5034                 /* First the RX queue entry */
5035                 ixgbe_set_ivar(adapter, i, que->msix, 0);
5036                 /* ... and the TX */
5037                 ixgbe_set_ivar(adapter, i, que->msix, 1);
5038                 /* Set an Initial EITR value */
5039                 IXGBE_WRITE_REG(&adapter->hw,
5040                     IXGBE_EITR(que->msix), newitr);
5041         }
5042
5043         /* For the Link interrupt */
5044         ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
5045 }
5046
5047 /*
5048 ** ixgbe_sfp_probe - called in the local timer to
5049 ** determine if a port had optics inserted.
5050 */  
5051 static bool ixgbe_sfp_probe(struct adapter *adapter)
5052 {
5053         struct ixgbe_hw *hw = &adapter->hw;
5054         device_t        dev = adapter->dev;
5055         bool            result = FALSE;
5056
5057         if ((hw->phy.type == ixgbe_phy_nl) &&
5058             (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
5059                 s32 ret = hw->phy.ops.identify_sfp(hw);
5060                 if (ret)
5061                         goto out;
5062                 ret = hw->phy.ops.reset(hw);
5063                 if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5064                         device_printf(dev,"Unsupported SFP+ module detected!");
5065                         printf(" Reload driver with supported module.\n");
5066                         adapter->sfp_probe = FALSE;
5067                         goto out;
5068                 } else
5069                         device_printf(dev,"SFP+ module detected!\n");
5070                 /* We now have supported optics */
5071                 adapter->sfp_probe = FALSE;
5072                 /* Set the optics type so system reports correctly */
5073                 ixgbe_setup_optics(adapter);
5074                 result = TRUE;
5075         }
5076 out:
5077         return (result);
5078 }
5079
5080 /*
5081 ** Tasklet handler for MSIX Link interrupts
5082 **  - do outside interrupt since it might sleep
5083 */
5084 static void
5085 ixgbe_handle_link(void *context, int pending)
5086 {
5087         struct adapter  *adapter = context;
5088
5089         ixgbe_check_link(&adapter->hw,
5090             &adapter->link_speed, &adapter->link_up, 0);
5091         ixgbe_update_link_status(adapter);
5092 }
5093
5094 /*
5095 ** Tasklet for handling SFP module interrupts
5096 */
5097 static void
5098 ixgbe_handle_mod(void *context, int pending)
5099 {
5100         struct adapter  *adapter = context;
5101         struct ixgbe_hw *hw = &adapter->hw;
5102         device_t        dev = adapter->dev;
5103         u32 err;
5104
5105         err = hw->phy.ops.identify_sfp(hw);
5106         if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5107                 device_printf(dev,
5108                     "Unsupported SFP+ module type was detected.\n");
5109                 return;
5110         }
5111         err = hw->mac.ops.setup_sfp(hw);
5112         if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5113                 device_printf(dev,
5114                     "Setup failure - unsupported SFP+ module type.\n");
5115                 return;
5116         }
5117         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
5118         return;
5119 }
5120
5121
5122 /*
5123 ** Tasklet for handling MSF (multispeed fiber) interrupts
5124 */
5125 static void
5126 ixgbe_handle_msf(void *context, int pending)
5127 {
5128         struct adapter  *adapter = context;
5129         struct ixgbe_hw *hw = &adapter->hw;
5130         u32 autoneg;
5131         bool negotiate;
5132
5133         autoneg = hw->phy.autoneg_advertised;
5134         if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
5135                 hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
5136         if (hw->mac.ops.setup_link)
5137                 hw->mac.ops.setup_link(hw, autoneg, TRUE);
5138         return;
5139 }
5140
5141 #ifdef IXGBE_FDIR
5142 /*
5143 ** Tasklet for reinitializing the Flow Director filter table
5144 */
5145 static void
5146 ixgbe_reinit_fdir(void *context, int pending)
5147 {
5148         struct adapter  *adapter = context;
5149         struct ifnet   *ifp = adapter->ifp;
5150
5151         if (adapter->fdir_reinit != 1) /* Shouldn't happen */
5152                 return;
5153         ixgbe_reinit_fdir_tables_82599(&adapter->hw);
5154         adapter->fdir_reinit = 0;
5155         /* re-enable flow director interrupts */
5156         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
5157         /* Restart the interface */
5158         ifp->if_drv_flags |= IFF_DRV_RUNNING;
5159         return;
5160 }
5161 #endif
5162
5163 /**********************************************************************
5164  *
5165  *  Update the board statistics counters.
5166  *
5167  **********************************************************************/
5168 static void
5169 ixgbe_update_stats_counters(struct adapter *adapter)
5170 {
5171         struct ifnet   *ifp = adapter->ifp;
5172         struct ixgbe_hw *hw = &adapter->hw;
5173         u32  missed_rx = 0, bprc, lxon, lxoff, total;
5174         u64  total_missed_rx = 0;
5175
5176         adapter->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
5177         adapter->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
5178         adapter->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
5179         adapter->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
5180
5181         /*
5182         ** Note: these are for the 8 possible traffic classes,
5183         **       which in current implementation is unused,
5184         **       therefore only 0 should read real data.
5185         */
5186         for (int i = 0; i < 8; i++) {
5187                 u32 mp;
5188                 mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
5189                 /* missed_rx tallies misses for the gprc workaround */
5190                 missed_rx += mp;
5191                 /* global total per queue */
5192                 adapter->stats.mpc[i] += mp;
5193                 /* Running comprehensive total for stats display */
5194                 total_missed_rx += adapter->stats.mpc[i];
5195                 if (hw->mac.type == ixgbe_mac_82598EB) {
5196                         adapter->stats.rnbc[i] +=
5197                             IXGBE_READ_REG(hw, IXGBE_RNBC(i));
5198                         adapter->stats.qbtc[i] +=
5199                             IXGBE_READ_REG(hw, IXGBE_QBTC(i));
5200                         adapter->stats.qbrc[i] +=
5201                             IXGBE_READ_REG(hw, IXGBE_QBRC(i));
5202                         adapter->stats.pxonrxc[i] +=
5203                             IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
5204                 } else
5205                         adapter->stats.pxonrxc[i] +=
5206                             IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
5207                 adapter->stats.pxontxc[i] +=
5208                     IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
5209                 adapter->stats.pxofftxc[i] +=
5210                     IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
5211                 adapter->stats.pxoffrxc[i] +=
5212                     IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
5213                 adapter->stats.pxon2offc[i] +=
5214                     IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
5215         }
5216         for (int i = 0; i < 16; i++) {
5217                 adapter->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
5218                 adapter->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
5219                 adapter->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
5220         }
5221         adapter->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
5222         adapter->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
5223         adapter->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
5224
5225         /* Hardware workaround, gprc counts missed packets */
5226         adapter->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
5227         adapter->stats.gprc -= missed_rx;
5228
5229         if (hw->mac.type != ixgbe_mac_82598EB) {
5230                 adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
5231                     ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
5232                 adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
5233                     ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
5234                 adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
5235                     ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
5236                 adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
5237                 adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
5238         } else {
5239                 adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
5240                 adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
5241                 /* 82598 only has a counter in the high register */
5242                 adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
5243                 adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
5244                 adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
5245         }
5246
5247         /*
5248          * Workaround: mprc hardware is incorrectly counting
5249          * broadcasts, so for now we subtract those.
5250          */
5251         bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
5252         adapter->stats.bprc += bprc;
5253         adapter->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
5254         if (hw->mac.type == ixgbe_mac_82598EB)
5255                 adapter->stats.mprc -= bprc;
5256
5257         adapter->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
5258         adapter->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
5259         adapter->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
5260         adapter->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
5261         adapter->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
5262         adapter->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
5263
5264         lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
5265         adapter->stats.lxontxc += lxon;
5266         lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
5267         adapter->stats.lxofftxc += lxoff;
5268         total = lxon + lxoff;
5269
5270         adapter->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
5271         adapter->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
5272         adapter->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
5273         adapter->stats.gptc -= total;
5274         adapter->stats.mptc -= total;
5275         adapter->stats.ptc64 -= total;
5276         adapter->stats.gotc -= total * ETHER_MIN_LEN;
5277
5278         adapter->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
5279         adapter->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
5280         adapter->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
5281         adapter->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
5282         adapter->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
5283         adapter->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
5284         adapter->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
5285         adapter->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
5286         adapter->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
5287         adapter->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
5288         adapter->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
5289         adapter->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
5290         adapter->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
5291         adapter->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
5292         adapter->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
5293         adapter->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
5294         adapter->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
5295         adapter->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
5296         /* Only read FCOE on 82599 */
5297         if (hw->mac.type != ixgbe_mac_82598EB) {
5298                 adapter->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
5299                 adapter->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
5300                 adapter->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
5301                 adapter->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
5302                 adapter->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
5303         }
5304
5305         /* Fill out the OS statistics structure */
5306         ifp->if_ipackets = adapter->stats.gprc;
5307         ifp->if_opackets = adapter->stats.gptc;
5308         ifp->if_ibytes = adapter->stats.gorc;
5309         ifp->if_obytes = adapter->stats.gotc;
5310         ifp->if_imcasts = adapter->stats.mprc;
5311         ifp->if_omcasts = adapter->stats.mptc;
5312         ifp->if_collisions = 0;
5313
5314         /* Rx Errors */
5315         ifp->if_iqdrops = total_missed_rx;
5316         ifp->if_ierrors = adapter->stats.crcerrs + adapter->stats.rlec;
5317 }
5318
5319 /** ixgbe_sysctl_tdh_handler - Handler function
5320  *  Retrieves the TDH value from the hardware
5321  */
5322 static int 
5323 ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS)
5324 {
5325         int error;
5326
5327         struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
5328         if (!txr) return 0;
5329
5330         unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
5331         error = sysctl_handle_int(oidp, &val, 0, req);
5332         if (error || !req->newptr)
5333                 return error;
5334         return 0;
5335 }
5336
5337 /** ixgbe_sysctl_tdt_handler - Handler function
5338  *  Retrieves the TDT value from the hardware
5339  */
5340 static int 
5341 ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS)
5342 {
5343         int error;
5344
5345         struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
5346         if (!txr) return 0;
5347
5348         unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
5349         error = sysctl_handle_int(oidp, &val, 0, req);
5350         if (error || !req->newptr)
5351                 return error;
5352         return 0;
5353 }
5354
5355 /** ixgbe_sysctl_rdh_handler - Handler function
5356  *  Retrieves the RDH value from the hardware
5357  */
5358 static int 
5359 ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS)
5360 {
5361         int error;
5362
5363         struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
5364         if (!rxr) return 0;
5365
5366         unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
5367         error = sysctl_handle_int(oidp, &val, 0, req);
5368         if (error || !req->newptr)
5369                 return error;
5370         return 0;
5371 }
5372
5373 /** ixgbe_sysctl_rdt_handler - Handler function
5374  *  Retrieves the RDT value from the hardware
5375  */
5376 static int 
5377 ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS)
5378 {
5379         int error;
5380
5381         struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
5382         if (!rxr) return 0;
5383
5384         unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
5385         error = sysctl_handle_int(oidp, &val, 0, req);
5386         if (error || !req->newptr)
5387                 return error;
5388         return 0;
5389 }
5390
5391 static int
5392 ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5393 {
5394         int error;
5395         struct ix_queue *que = ((struct ix_queue *)oidp->oid_arg1);
5396         unsigned int reg, usec, rate;
5397
5398         reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
5399         usec = ((reg & 0x0FF8) >> 3);
5400         if (usec > 0)
5401                 rate = 500000 / usec;
5402         else
5403                 rate = 0;
5404         error = sysctl_handle_int(oidp, &rate, 0, req);
5405         if (error || !req->newptr)
5406                 return error;
5407         reg &= ~0xfff; /* default, no limitation */
5408         ixgbe_max_interrupt_rate = 0;
5409         if (rate > 0 && rate < 500000) {
5410                 if (rate < 1000)
5411                         rate = 1000;
5412                 ixgbe_max_interrupt_rate = rate;
5413                 reg |= ((4000000/rate) & 0xff8 );
5414         }
5415         IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
5416         return 0;
5417 }
5418
5419 /*
5420  * Add sysctl variables, one per statistic, to the system.
5421  */
5422 static void
5423 ixgbe_add_hw_stats(struct adapter *adapter)
5424 {
5425
5426         device_t dev = adapter->dev;
5427
5428         struct tx_ring *txr = adapter->tx_rings;
5429         struct rx_ring *rxr = adapter->rx_rings;
5430
5431         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5432         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5433         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5434         struct ixgbe_hw_stats *stats = &adapter->stats;
5435
5436         struct sysctl_oid *stat_node, *queue_node;
5437         struct sysctl_oid_list *stat_list, *queue_list;
5438
5439 #define QUEUE_NAME_LEN 32
5440         char namebuf[QUEUE_NAME_LEN];
5441
5442         /* Driver Statistics */
5443         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5444                         CTLFLAG_RD, &adapter->dropped_pkts,
5445                         "Driver dropped packets");
5446         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_failed",
5447                         CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5448                         "m_defrag() failed");
5449         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events",
5450                         CTLFLAG_RD, &adapter->watchdog_events,
5451                         "Watchdog timeouts");
5452         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5453                         CTLFLAG_RD, &adapter->link_irq,
5454                         "Link MSIX IRQ Handled");
5455
5456         for (int i = 0; i < adapter->num_queues; i++, txr++) {
5457                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5458                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5459                                             CTLFLAG_RD, NULL, "Queue Name");
5460                 queue_list = SYSCTL_CHILDREN(queue_node);
5461
5462                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5463                                 CTLTYPE_UINT | CTLFLAG_RW, &adapter->queues[i],
5464                                 sizeof(&adapter->queues[i]),
5465                                 ixgbe_sysctl_interrupt_rate_handler, "IU",
5466                                 "Interrupt Rate");
5467                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs",
5468                                 CTLFLAG_RD, &(adapter->queues[i].irqs),
5469                                 "irqs on this queue");
5470                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5471                                 CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
5472                                 ixgbe_sysctl_tdh_handler, "IU",
5473                                 "Transmit Descriptor Head");
5474                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5475                                 CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
5476                                 ixgbe_sysctl_tdt_handler, "IU",
5477                                 "Transmit Descriptor Tail");
5478                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tso_tx",
5479                                 CTLFLAG_RD, &txr->tso_tx,
5480                                 "TSO");
5481                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_tx_dma_setup",
5482                                 CTLFLAG_RD, &txr->no_tx_dma_setup,
5483                                 "Driver tx dma failure in xmit");
5484                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5485                                 CTLFLAG_RD, &txr->no_desc_avail,
5486                                 "Queue No Descriptor Available");
5487                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5488                                 CTLFLAG_RD, &txr->total_packets,
5489                                 "Queue Packets Transmitted");
5490         }
5491
5492         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
5493                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5494                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
5495                                             CTLFLAG_RD, NULL, "Queue Name");
5496                 queue_list = SYSCTL_CHILDREN(queue_node);
5497
5498                 struct lro_ctrl *lro = &rxr->lro;
5499
5500                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5501                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
5502                                             CTLFLAG_RD, NULL, "Queue Name");
5503                 queue_list = SYSCTL_CHILDREN(queue_node);
5504
5505                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5506                                 CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
5507                                 ixgbe_sysctl_rdh_handler, "IU",
5508                                 "Receive Descriptor Head");
5509                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5510                                 CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
5511                                 ixgbe_sysctl_rdt_handler, "IU",
5512                                 "Receive Descriptor Tail");
5513                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5514                                 CTLFLAG_RD, &rxr->rx_packets,
5515                                 "Queue Packets Received");
5516                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5517                                 CTLFLAG_RD, &rxr->rx_bytes,
5518                                 "Queue Bytes Received");
5519                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_copies",
5520                                 CTLFLAG_RD, &rxr->rx_copies,
5521                                 "Copied RX Frames");
5522                 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
5523                                 CTLFLAG_RD, &lro->lro_queued, 0,
5524                                 "LRO Queued");
5525                 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
5526                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5527                                 "LRO Flushed");
5528         }
5529
5530         /* MAC stats get the own sub node */
5531
5532         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5533                                     CTLFLAG_RD, NULL, "MAC Statistics");
5534         stat_list = SYSCTL_CHILDREN(stat_node);
5535
5536         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5537                         CTLFLAG_RD, &stats->crcerrs,
5538                         "CRC Errors");
5539         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
5540                         CTLFLAG_RD, &stats->illerrc,
5541                         "Illegal Byte Errors");
5542         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
5543                         CTLFLAG_RD, &stats->errbc,
5544                         "Byte Errors");
5545         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
5546                         CTLFLAG_RD, &stats->mspdc,
5547                         "MAC Short Packets Discarded");
5548         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
5549                         CTLFLAG_RD, &stats->mlfc,
5550                         "MAC Local Faults");
5551         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
5552                         CTLFLAG_RD, &stats->mrfc,
5553                         "MAC Remote Faults");
5554         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
5555                         CTLFLAG_RD, &stats->rlec,
5556                         "Receive Length Errors");
5557
5558         /* Flow Control stats */
5559         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5560                         CTLFLAG_RD, &stats->lxontxc,
5561                         "Link XON Transmitted");
5562         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5563                         CTLFLAG_RD, &stats->lxonrxc,
5564                         "Link XON Received");
5565         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5566                         CTLFLAG_RD, &stats->lxofftxc,
5567                         "Link XOFF Transmitted");
5568         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5569                         CTLFLAG_RD, &stats->lxoffrxc,
5570                         "Link XOFF Received");
5571
5572         /* Packet Reception Stats */
5573         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
5574                         CTLFLAG_RD, &stats->tor, 
5575                         "Total Octets Received"); 
5576         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
5577                         CTLFLAG_RD, &stats->gorc, 
5578                         "Good Octets Received"); 
5579         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
5580                         CTLFLAG_RD, &stats->tpr,
5581                         "Total Packets Received");
5582         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
5583                         CTLFLAG_RD, &stats->gprc,
5584                         "Good Packets Received");
5585         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
5586                         CTLFLAG_RD, &stats->mprc,
5587                         "Multicast Packets Received");
5588         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
5589                         CTLFLAG_RD, &stats->bprc,
5590                         "Broadcast Packets Received");
5591         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5592                         CTLFLAG_RD, &stats->prc64,
5593                         "64 byte frames received ");
5594         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5595                         CTLFLAG_RD, &stats->prc127,
5596                         "65-127 byte frames received");
5597         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5598                         CTLFLAG_RD, &stats->prc255,
5599                         "128-255 byte frames received");
5600         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5601                         CTLFLAG_RD, &stats->prc511,
5602                         "256-511 byte frames received");
5603         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5604                         CTLFLAG_RD, &stats->prc1023,
5605                         "512-1023 byte frames received");
5606         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5607                         CTLFLAG_RD, &stats->prc1522,
5608                         "1023-1522 byte frames received");
5609         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
5610                         CTLFLAG_RD, &stats->ruc,
5611                         "Receive Undersized");
5612         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5613                         CTLFLAG_RD, &stats->rfc,
5614                         "Fragmented Packets Received ");
5615         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
5616                         CTLFLAG_RD, &stats->roc,
5617                         "Oversized Packets Received");
5618         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
5619                         CTLFLAG_RD, &stats->rjc,
5620                         "Received Jabber");
5621         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
5622                         CTLFLAG_RD, &stats->mngprc,
5623                         "Management Packets Received");
5624         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
5625                         CTLFLAG_RD, &stats->mngptc,
5626                         "Management Packets Dropped");
5627         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
5628                         CTLFLAG_RD, &stats->xec,
5629                         "Checksum Errors");
5630
5631         /* Packet Transmission Stats */
5632         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5633                         CTLFLAG_RD, &stats->gotc, 
5634                         "Good Octets Transmitted"); 
5635         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5636                         CTLFLAG_RD, &stats->tpt,
5637                         "Total Packets Transmitted");
5638         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5639                         CTLFLAG_RD, &stats->gptc,
5640                         "Good Packets Transmitted");
5641         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5642                         CTLFLAG_RD, &stats->bptc,
5643                         "Broadcast Packets Transmitted");
5644         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5645                         CTLFLAG_RD, &stats->mptc,
5646                         "Multicast Packets Transmitted");
5647         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
5648                         CTLFLAG_RD, &stats->mngptc,
5649                         "Management Packets Transmitted");
5650         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5651                         CTLFLAG_RD, &stats->ptc64,
5652                         "64 byte frames transmitted ");
5653         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5654                         CTLFLAG_RD, &stats->ptc127,
5655                         "65-127 byte frames transmitted");
5656         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5657                         CTLFLAG_RD, &stats->ptc255,
5658                         "128-255 byte frames transmitted");
5659         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5660                         CTLFLAG_RD, &stats->ptc511,
5661                         "256-511 byte frames transmitted");
5662         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5663                         CTLFLAG_RD, &stats->ptc1023,
5664                         "512-1023 byte frames transmitted");
5665         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5666                         CTLFLAG_RD, &stats->ptc1522,
5667                         "1024-1522 byte frames transmitted");
5668 }
5669
5670 /*
5671 ** Set flow control using sysctl:
5672 ** Flow control values:
5673 **      0 - off
5674 **      1 - rx pause
5675 **      2 - tx pause
5676 **      3 - full
5677 */
5678 static int
5679 ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS)
5680 {
5681         int error, last;
5682         struct adapter *adapter = (struct adapter *) arg1;
5683
5684         last = adapter->fc;
5685         error = sysctl_handle_int(oidp, &adapter->fc, 0, req);
5686         if ((error) || (req->newptr == NULL))
5687                 return (error);
5688
5689         /* Don't bother if it's not changed */
5690         if (adapter->fc == last)
5691                 return (0);
5692
5693         switch (adapter->fc) {
5694                 case ixgbe_fc_rx_pause:
5695                 case ixgbe_fc_tx_pause:
5696                 case ixgbe_fc_full:
5697                         adapter->hw.fc.requested_mode = adapter->fc;
5698                         if (adapter->num_queues > 1)
5699                                 ixgbe_disable_rx_drop(adapter);
5700                         break;
5701                 case ixgbe_fc_none:
5702                         adapter->hw.fc.requested_mode = ixgbe_fc_none;
5703                         if (adapter->num_queues > 1)
5704                                 ixgbe_enable_rx_drop(adapter);
5705                         break;
5706                 default:
5707                         adapter->fc = last;
5708                         return (EINVAL);
5709         }
5710         /* Don't autoneg if forcing a value */
5711         adapter->hw.fc.disable_fc_autoneg = TRUE;
5712         ixgbe_fc_enable(&adapter->hw);
5713         return error;
5714 }
5715
5716 /*
5717 ** Control link advertise speed:
5718 **      1 - advertise only 1G
5719 **      2 - advertise 100Mb
5720 **      3 - advertise normal
5721 */
5722 static int
5723 ixgbe_set_advertise(SYSCTL_HANDLER_ARGS)
5724 {
5725         int                     error = 0;
5726         struct adapter          *adapter;
5727         device_t                dev;
5728         struct ixgbe_hw         *hw;
5729         ixgbe_link_speed        speed, last;
5730
5731         adapter = (struct adapter *) arg1;
5732         dev = adapter->dev;
5733         hw = &adapter->hw;
5734         last = adapter->advertise;
5735
5736         error = sysctl_handle_int(oidp, &adapter->advertise, 0, req);
5737         if ((error) || (req->newptr == NULL))
5738                 return (error);
5739
5740         if (adapter->advertise == last) /* no change */
5741                 return (0);
5742
5743         if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
5744             (hw->phy.multispeed_fiber)))
5745                 return (EINVAL);
5746
5747         if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
5748                 device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
5749                 return (EINVAL);
5750         }
5751
5752         if (adapter->advertise == 1)
5753                 speed = IXGBE_LINK_SPEED_1GB_FULL;
5754         else if (adapter->advertise == 2)
5755                 speed = IXGBE_LINK_SPEED_100_FULL;
5756         else if (adapter->advertise == 3)
5757                 speed = IXGBE_LINK_SPEED_1GB_FULL |
5758                         IXGBE_LINK_SPEED_10GB_FULL;
5759         else {  /* bogus value */
5760                 adapter->advertise = last;
5761                 return (EINVAL);
5762         }
5763
5764         hw->mac.autotry_restart = TRUE;
5765         hw->mac.ops.setup_link(hw, speed, TRUE);
5766
5767         return (error);
5768 }
5769
5770 /*
5771 ** Thermal Shutdown Trigger
5772 **   - cause a Thermal Overtemp IRQ
5773 **   - this now requires firmware enabling
5774 */
5775 static int
5776 ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS)
5777 {
5778         int             error, fire = 0;
5779         struct adapter  *adapter = (struct adapter *) arg1;
5780         struct ixgbe_hw *hw = &adapter->hw;
5781
5782
5783         if (hw->mac.type != ixgbe_mac_X540)
5784                 return (0);
5785
5786         error = sysctl_handle_int(oidp, &fire, 0, req);
5787         if ((error) || (req->newptr == NULL))
5788                 return (error);
5789
5790         if (fire) {
5791                 u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
5792                 reg |= IXGBE_EICR_TS;
5793                 IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
5794         }
5795
5796         return (0);
5797 }
5798
5799 /*
5800 ** Enable the hardware to drop packets when the buffer is
5801 ** full. This is useful when multiqueue,so that no single
5802 ** queue being full stalls the entire RX engine. We only
5803 ** enable this when Multiqueue AND when Flow Control is 
5804 ** disabled.
5805 */
5806 static void
5807 ixgbe_enable_rx_drop(struct adapter *adapter)
5808 {
5809         struct ixgbe_hw *hw = &adapter->hw;
5810
5811         for (int i = 0; i < adapter->num_queues; i++) {
5812                 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
5813                 srrctl |= IXGBE_SRRCTL_DROP_EN;
5814                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
5815         }
5816 }
5817
5818 static void
5819 ixgbe_disable_rx_drop(struct adapter *adapter)
5820 {
5821         struct ixgbe_hw *hw = &adapter->hw;
5822
5823         for (int i = 0; i < adapter->num_queues; i++) {
5824                 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
5825                 srrctl &= ~IXGBE_SRRCTL_DROP_EN;
5826                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
5827         }
5828 }