]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/ixgbe/ixgbe.c
Correctly program the RSS redirection table entries.
[FreeBSD/FreeBSD.git] / sys / dev / ixgbe / ixgbe.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2013, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #include "opt_inet.h"
37 #include "opt_inet6.h"
38 #include "opt_rss.h"
39 #include "ixgbe.h"
40
41 #ifdef  RSS
42 #include <netinet/in_rss.h>
43 #endif
44
45 /*********************************************************************
46  *  Set this to one to display debug statistics
47  *********************************************************************/
48 int             ixgbe_display_debug_stats = 0;
49
50 /*********************************************************************
51  *  Driver version
52  *********************************************************************/
53 char ixgbe_driver_version[] = "2.5.15";
54
55 /*********************************************************************
56  *  PCI Device ID Table
57  *
58  *  Used by probe to select devices to load on
59  *  Last field stores an index into ixgbe_strings
60  *  Last entry must be all 0s
61  *
62  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
63  *********************************************************************/
64
65 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
66 {
67         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
68         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
69         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
70         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
71         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
72         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
73         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
74         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
75         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
76         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
77         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
78         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
79         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
80         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
81         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
82         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
83         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
84         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
85         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
86         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
87         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
88         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
89         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0},
90         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
91         /* required last entry */
92         {0, 0, 0, 0, 0}
93 };
94
95 /*********************************************************************
96  *  Table of branding strings
97  *********************************************************************/
98
99 static char    *ixgbe_strings[] = {
100         "Intel(R) PRO/10GbE PCI-Express Network Driver"
101 };
102
103 /*********************************************************************
104  *  Function prototypes
105  *********************************************************************/
106 static int      ixgbe_probe(device_t);
107 static int      ixgbe_attach(device_t);
108 static int      ixgbe_detach(device_t);
109 static int      ixgbe_shutdown(device_t);
110 #ifdef IXGBE_LEGACY_TX
111 static void     ixgbe_start(struct ifnet *);
112 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
113 #else /* ! IXGBE_LEGACY_TX */
114 static int      ixgbe_mq_start(struct ifnet *, struct mbuf *);
115 static int      ixgbe_mq_start_locked(struct ifnet *, struct tx_ring *);
116 static void     ixgbe_qflush(struct ifnet *);
117 static void     ixgbe_deferred_mq_start(void *, int);
118 #endif /* IXGBE_LEGACY_TX */
119 static int      ixgbe_ioctl(struct ifnet *, u_long, caddr_t);
120 static void     ixgbe_init(void *);
121 static void     ixgbe_init_locked(struct adapter *);
122 static void     ixgbe_stop(void *);
123 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
124 static int      ixgbe_media_change(struct ifnet *);
125 static void     ixgbe_identify_hardware(struct adapter *);
126 static int      ixgbe_allocate_pci_resources(struct adapter *);
127 static void     ixgbe_get_slot_info(struct ixgbe_hw *);
128 static int      ixgbe_allocate_msix(struct adapter *);
129 static int      ixgbe_allocate_legacy(struct adapter *);
130 static int      ixgbe_allocate_queues(struct adapter *);
131 static int      ixgbe_setup_msix(struct adapter *);
132 static void     ixgbe_free_pci_resources(struct adapter *);
133 static void     ixgbe_local_timer(void *);
134 static int      ixgbe_setup_interface(device_t, struct adapter *);
135 static void     ixgbe_config_link(struct adapter *);
136
137 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
138 static int      ixgbe_setup_transmit_structures(struct adapter *);
139 static void     ixgbe_setup_transmit_ring(struct tx_ring *);
140 static void     ixgbe_initialize_transmit_units(struct adapter *);
141 static void     ixgbe_free_transmit_structures(struct adapter *);
142 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
143
144 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
145 static int      ixgbe_setup_receive_structures(struct adapter *);
146 static int      ixgbe_setup_receive_ring(struct rx_ring *);
147 static void     ixgbe_initialize_receive_units(struct adapter *);
148 static void     ixgbe_free_receive_structures(struct adapter *);
149 static void     ixgbe_free_receive_buffers(struct rx_ring *);
150 static void     ixgbe_setup_hw_rsc(struct rx_ring *);
151
152 static void     ixgbe_enable_intr(struct adapter *);
153 static void     ixgbe_disable_intr(struct adapter *);
154 static void     ixgbe_update_stats_counters(struct adapter *);
155 static void     ixgbe_txeof(struct tx_ring *);
156 static bool     ixgbe_rxeof(struct ix_queue *);
157 static void     ixgbe_rx_checksum(u32, struct mbuf *, u32);
158 static void     ixgbe_set_promisc(struct adapter *);
159 static void     ixgbe_set_multi(struct adapter *);
160 static void     ixgbe_update_link_status(struct adapter *);
161 static void     ixgbe_refresh_mbufs(struct rx_ring *, int);
162 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
163 static int      ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS);
164 static int      ixgbe_set_advertise(SYSCTL_HANDLER_ARGS);
165 static int      ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS);
166 static int      ixgbe_dma_malloc(struct adapter *, bus_size_t,
167                     struct ixgbe_dma_alloc *, int);
168 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
169 static int      ixgbe_tx_ctx_setup(struct tx_ring *,
170                     struct mbuf *, u32 *, u32 *);
171 static int      ixgbe_tso_setup(struct tx_ring *,
172                     struct mbuf *, u32 *, u32 *);
173 static void     ixgbe_set_ivar(struct adapter *, u8, u8, s8);
174 static void     ixgbe_configure_ivars(struct adapter *);
175 static u8 *     ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
176
177 static void     ixgbe_setup_vlan_hw_support(struct adapter *);
178 static void     ixgbe_register_vlan(void *, struct ifnet *, u16);
179 static void     ixgbe_unregister_vlan(void *, struct ifnet *, u16);
180
181 static void     ixgbe_add_hw_stats(struct adapter *adapter);
182
183 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
184 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
185                     struct mbuf *, u32);
186
187 static void     ixgbe_enable_rx_drop(struct adapter *);
188 static void     ixgbe_disable_rx_drop(struct adapter *);
189
190 /* Support for pluggable optic modules */
191 static bool     ixgbe_sfp_probe(struct adapter *);
192 static void     ixgbe_setup_optics(struct adapter *);
193
194 /* Legacy (single vector interrupt handler */
195 static void     ixgbe_legacy_irq(void *);
196
197 /* The MSI/X Interrupt handlers */
198 static void     ixgbe_msix_que(void *);
199 static void     ixgbe_msix_link(void *);
200
201 /* Deferred interrupt tasklets */
202 static void     ixgbe_handle_que(void *, int);
203 static void     ixgbe_handle_link(void *, int);
204 static void     ixgbe_handle_msf(void *, int);
205 static void     ixgbe_handle_mod(void *, int);
206
207 #ifdef IXGBE_FDIR
208 static void     ixgbe_atr(struct tx_ring *, struct mbuf *);
209 static void     ixgbe_reinit_fdir(void *, int);
210 #endif
211
212 /* Missing shared code prototype */
213 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw);
214
215 /*********************************************************************
216  *  FreeBSD Device Interface Entry Points
217  *********************************************************************/
218
219 static device_method_t ixgbe_methods[] = {
220         /* Device interface */
221         DEVMETHOD(device_probe, ixgbe_probe),
222         DEVMETHOD(device_attach, ixgbe_attach),
223         DEVMETHOD(device_detach, ixgbe_detach),
224         DEVMETHOD(device_shutdown, ixgbe_shutdown),
225         DEVMETHOD_END
226 };
227
228 static driver_t ixgbe_driver = {
229         "ix", ixgbe_methods, sizeof(struct adapter),
230 };
231
232 devclass_t ixgbe_devclass;
233 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
234
235 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
236 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
237
238 /*
239 ** TUNEABLE PARAMETERS:
240 */
241
242 static SYSCTL_NODE(_hw, OID_AUTO, ix, CTLFLAG_RD, 0,
243                    "IXGBE driver parameters");
244
245 /*
246 ** AIM: Adaptive Interrupt Moderation
247 ** which means that the interrupt rate
248 ** is varied over time based on the
249 ** traffic for that interrupt vector
250 */
251 static int ixgbe_enable_aim = TRUE;
252 SYSCTL_INT(_hw_ix, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &ixgbe_enable_aim, 0,
253     "Enable adaptive interrupt moderation");
254
255 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
256 SYSCTL_INT(_hw_ix, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
257     &ixgbe_max_interrupt_rate, 0, "Maximum interrupts per second");
258
259 /* How many packets rxeof tries to clean at a time */
260 static int ixgbe_rx_process_limit = 256;
261 SYSCTL_INT(_hw_ix, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
262     &ixgbe_rx_process_limit, 0,
263     "Maximum number of received packets to process at a time,"
264     "-1 means unlimited");
265
266 /* How many packets txeof tries to clean at a time */
267 static int ixgbe_tx_process_limit = 256;
268 SYSCTL_INT(_hw_ix, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
269     &ixgbe_tx_process_limit, 0,
270     "Maximum number of sent packets to process at a time,"
271     "-1 means unlimited");
272
273 /*
274 ** Smart speed setting, default to on
275 ** this only works as a compile option
276 ** right now as its during attach, set
277 ** this to 'ixgbe_smart_speed_off' to
278 ** disable.
279 */
280 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
281
282 /*
283  * MSIX should be the default for best performance,
284  * but this allows it to be forced off for testing.
285  */
286 static int ixgbe_enable_msix = 1;
287 SYSCTL_INT(_hw_ix, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixgbe_enable_msix, 0,
288     "Enable MSI-X interrupts");
289
290 /*
291  * Number of Queues, can be set to 0,
292  * it then autoconfigures based on the
293  * number of cpus with a max of 8. This
294  * can be overriden manually here.
295  */
296 static int ixgbe_num_queues = 0;
297 SYSCTL_INT(_hw_ix, OID_AUTO, num_queues, CTLFLAG_RDTUN, &ixgbe_num_queues, 0,
298     "Number of queues to configure, 0 indicates autoconfigure");
299
300 /*
301 ** Number of TX descriptors per ring,
302 ** setting higher than RX as this seems
303 ** the better performing choice.
304 */
305 static int ixgbe_txd = PERFORM_TXD;
306 SYSCTL_INT(_hw_ix, OID_AUTO, txd, CTLFLAG_RDTUN, &ixgbe_txd, 0,
307     "Number of transmit descriptors per queue");
308
309 /* Number of RX descriptors per ring */
310 static int ixgbe_rxd = PERFORM_RXD;
311 SYSCTL_INT(_hw_ix, OID_AUTO, rxd, CTLFLAG_RDTUN, &ixgbe_rxd, 0,
312     "Number of receive descriptors per queue");
313
314 /*
315 ** Defining this on will allow the use
316 ** of unsupported SFP+ modules, note that
317 ** doing so you are on your own :)
318 */
319 static int allow_unsupported_sfp = FALSE;
320 TUNABLE_INT("hw.ixgbe.unsupported_sfp", &allow_unsupported_sfp);
321
322 /*
323 ** HW RSC control: 
324 **  this feature only works with
325 **  IPv4, and only on 82599 and later.
326 **  Also this will cause IP forwarding to
327 **  fail and that can't be controlled by
328 **  the stack as LRO can. For all these
329 **  reasons I've deemed it best to leave
330 **  this off and not bother with a tuneable
331 **  interface, this would need to be compiled
332 **  to enable.
333 */
334 static bool ixgbe_rsc_enable = FALSE;
335
336 /* Keep running tab on them for sanity check */
337 static int ixgbe_total_ports;
338
339 #ifdef IXGBE_FDIR
340 /*
341 ** For Flow Director: this is the
342 ** number of TX packets we sample
343 ** for the filter pool, this means
344 ** every 20th packet will be probed.
345 **
346 ** This feature can be disabled by 
347 ** setting this to 0.
348 */
349 static int atr_sample_rate = 20;
350 /* 
351 ** Flow Director actually 'steals'
352 ** part of the packet buffer as its
353 ** filter pool, this variable controls
354 ** how much it uses:
355 **  0 = 64K, 1 = 128K, 2 = 256K
356 */
357 static int fdir_pballoc = 1;
358 #endif
359
360 #ifdef DEV_NETMAP
361 /*
362  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
363  * be a reference on how to implement netmap support in a driver.
364  * Additional comments are in ixgbe_netmap.h .
365  *
366  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
367  * that extend the standard driver.
368  */
369 #include <dev/netmap/ixgbe_netmap.h>
370 #endif /* DEV_NETMAP */
371
372 /*********************************************************************
373  *  Device identification routine
374  *
375  *  ixgbe_probe determines if the driver should be loaded on
376  *  adapter based on PCI vendor/device id of the adapter.
377  *
378  *  return BUS_PROBE_DEFAULT on success, positive on failure
379  *********************************************************************/
380
381 static int
382 ixgbe_probe(device_t dev)
383 {
384         ixgbe_vendor_info_t *ent;
385
386         u16     pci_vendor_id = 0;
387         u16     pci_device_id = 0;
388         u16     pci_subvendor_id = 0;
389         u16     pci_subdevice_id = 0;
390         char    adapter_name[256];
391
392         INIT_DEBUGOUT("ixgbe_probe: begin");
393
394         pci_vendor_id = pci_get_vendor(dev);
395         if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID)
396                 return (ENXIO);
397
398         pci_device_id = pci_get_device(dev);
399         pci_subvendor_id = pci_get_subvendor(dev);
400         pci_subdevice_id = pci_get_subdevice(dev);
401
402         ent = ixgbe_vendor_info_array;
403         while (ent->vendor_id != 0) {
404                 if ((pci_vendor_id == ent->vendor_id) &&
405                     (pci_device_id == ent->device_id) &&
406
407                     ((pci_subvendor_id == ent->subvendor_id) ||
408                      (ent->subvendor_id == 0)) &&
409
410                     ((pci_subdevice_id == ent->subdevice_id) ||
411                      (ent->subdevice_id == 0))) {
412                         sprintf(adapter_name, "%s, Version - %s",
413                                 ixgbe_strings[ent->index],
414                                 ixgbe_driver_version);
415                         device_set_desc_copy(dev, adapter_name);
416                         ++ixgbe_total_ports;
417                         return (BUS_PROBE_DEFAULT);
418                 }
419                 ent++;
420         }
421         return (ENXIO);
422 }
423
424 /*********************************************************************
425  *  Device initialization routine
426  *
427  *  The attach entry point is called when the driver is being loaded.
428  *  This routine identifies the type of hardware, allocates all resources
429  *  and initializes the hardware.
430  *
431  *  return 0 on success, positive on failure
432  *********************************************************************/
433
434 static int
435 ixgbe_attach(device_t dev)
436 {
437         struct adapter *adapter;
438         struct ixgbe_hw *hw;
439         int             error = 0;
440         u16             csum;
441         u32             ctrl_ext;
442
443         INIT_DEBUGOUT("ixgbe_attach: begin");
444
445         /* Allocate, clear, and link in our adapter structure */
446         adapter = device_get_softc(dev);
447         adapter->dev = adapter->osdep.dev = dev;
448         hw = &adapter->hw;
449
450         /* Core Lock Init*/
451         IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
452
453         /* SYSCTL APIs */
454
455         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
456                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
457                         OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW,
458                         adapter, 0, ixgbe_set_flowcntl, "I", "Flow Control");
459
460         SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
461                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
462                         OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
463                         &ixgbe_enable_aim, 1, "Interrupt Moderation");
464
465         /*
466         ** Allow a kind of speed control by forcing the autoneg
467         ** advertised speed list to only a certain value, this
468         ** supports 1G on 82599 devices, and 100Mb on x540.
469         */
470         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
471                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
472                         OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW,
473                         adapter, 0, ixgbe_set_advertise, "I", "Link Speed");
474
475         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
476                         SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
477                         OID_AUTO, "ts", CTLTYPE_INT | CTLFLAG_RW, adapter,
478                         0, ixgbe_set_thermal_test, "I", "Thermal Test");
479
480         /* Set up the timer callout */
481         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
482
483         /* Determine hardware revision */
484         ixgbe_identify_hardware(adapter);
485
486         /* Do base PCI setup - map BAR0 */
487         if (ixgbe_allocate_pci_resources(adapter)) {
488                 device_printf(dev, "Allocation of PCI resources failed\n");
489                 error = ENXIO;
490                 goto err_out;
491         }
492
493         /* Do descriptor calc and sanity checks */
494         if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
495             ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
496                 device_printf(dev, "TXD config issue, using default!\n");
497                 adapter->num_tx_desc = DEFAULT_TXD;
498         } else
499                 adapter->num_tx_desc = ixgbe_txd;
500
501         /*
502         ** With many RX rings it is easy to exceed the
503         ** system mbuf allocation. Tuning nmbclusters
504         ** can alleviate this.
505         */
506         if (nmbclusters > 0 ) {
507                 int s;
508                 s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
509                 if (s > nmbclusters) {
510                         device_printf(dev, "RX Descriptors exceed "
511                             "system mbuf max, using default instead!\n");
512                         ixgbe_rxd = DEFAULT_RXD;
513                 }
514         }
515
516         if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
517             ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) {
518                 device_printf(dev, "RXD config issue, using default!\n");
519                 adapter->num_rx_desc = DEFAULT_RXD;
520         } else
521                 adapter->num_rx_desc = ixgbe_rxd;
522
523         /* Allocate our TX/RX Queues */
524         if (ixgbe_allocate_queues(adapter)) {
525                 error = ENOMEM;
526                 goto err_out;
527         }
528
529         /* Allocate multicast array memory. */
530         adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
531             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
532         if (adapter->mta == NULL) {
533                 device_printf(dev, "Can not allocate multicast setup array\n");
534                 error = ENOMEM;
535                 goto err_late;
536         }
537
538         /* Initialize the shared code */
539         hw->allow_unsupported_sfp = allow_unsupported_sfp;
540         error = ixgbe_init_shared_code(hw);
541         if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
542                 /*
543                 ** No optics in this port, set up
544                 ** so the timer routine will probe 
545                 ** for later insertion.
546                 */
547                 adapter->sfp_probe = TRUE;
548                 error = 0;
549         } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
550                 device_printf(dev,"Unsupported SFP+ module detected!\n");
551                 error = EIO;
552                 goto err_late;
553         } else if (error) {
554                 device_printf(dev,"Unable to initialize the shared code\n");
555                 error = EIO;
556                 goto err_late;
557         }
558
559         /* Make sure we have a good EEPROM before we read from it */
560         if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
561                 device_printf(dev,"The EEPROM Checksum Is Not Valid\n");
562                 error = EIO;
563                 goto err_late;
564         }
565
566         error = ixgbe_init_hw(hw);
567         switch (error) {
568         case IXGBE_ERR_EEPROM_VERSION:
569                 device_printf(dev, "This device is a pre-production adapter/"
570                     "LOM.  Please be aware there may be issues associated "
571                     "with your hardware.\n If you are experiencing problems "
572                     "please contact your Intel or hardware representative "
573                     "who provided you with this hardware.\n");
574                 break;
575         case IXGBE_ERR_SFP_NOT_SUPPORTED:
576                 device_printf(dev,"Unsupported SFP+ Module\n");
577                 error = EIO;
578                 goto err_late;
579         case IXGBE_ERR_SFP_NOT_PRESENT:
580                 device_printf(dev,"No SFP+ Module found\n");
581                 /* falls thru */
582         default:
583                 break;
584         }
585
586         /* Detect and set physical type */
587         ixgbe_setup_optics(adapter);
588
589         if ((adapter->msix > 1) && (ixgbe_enable_msix))
590                 error = ixgbe_allocate_msix(adapter); 
591         else
592                 error = ixgbe_allocate_legacy(adapter); 
593         if (error) 
594                 goto err_late;
595
596         /* Setup OS specific network interface */
597         if (ixgbe_setup_interface(dev, adapter) != 0)
598                 goto err_late;
599
600         /* Initialize statistics */
601         ixgbe_update_stats_counters(adapter);
602
603         /* Register for VLAN events */
604         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
605             ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
606         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
607             ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
608
609         /*
610         ** Check PCIE slot type/speed/width
611         */
612         ixgbe_get_slot_info(hw);
613
614         /* Set an initial default flow control value */
615         adapter->fc =  ixgbe_fc_full;
616
617         /* let hardware know driver is loaded */
618         ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
619         ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
620         IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
621
622         ixgbe_add_hw_stats(adapter);
623
624 #ifdef DEV_NETMAP
625         ixgbe_netmap_attach(adapter);
626 #endif /* DEV_NETMAP */
627         INIT_DEBUGOUT("ixgbe_attach: end");
628         return (0);
629 err_late:
630         ixgbe_free_transmit_structures(adapter);
631         ixgbe_free_receive_structures(adapter);
632 err_out:
633         if (adapter->ifp != NULL)
634                 if_free(adapter->ifp);
635         ixgbe_free_pci_resources(adapter);
636         free(adapter->mta, M_DEVBUF);
637         return (error);
638
639 }
640
641 /*********************************************************************
642  *  Device removal routine
643  *
644  *  The detach entry point is called when the driver is being removed.
645  *  This routine stops the adapter and deallocates all the resources
646  *  that were allocated for driver operation.
647  *
648  *  return 0 on success, positive on failure
649  *********************************************************************/
650
651 static int
652 ixgbe_detach(device_t dev)
653 {
654         struct adapter *adapter = device_get_softc(dev);
655         struct ix_queue *que = adapter->queues;
656         struct tx_ring *txr = adapter->tx_rings;
657         u32     ctrl_ext;
658
659         INIT_DEBUGOUT("ixgbe_detach: begin");
660
661         /* Make sure VLANS are not using driver */
662         if (adapter->ifp->if_vlantrunk != NULL) {
663                 device_printf(dev,"Vlan in use, detach first\n");
664                 return (EBUSY);
665         }
666
667         IXGBE_CORE_LOCK(adapter);
668         ixgbe_stop(adapter);
669         IXGBE_CORE_UNLOCK(adapter);
670
671         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
672                 if (que->tq) {
673 #ifndef IXGBE_LEGACY_TX
674                         taskqueue_drain(que->tq, &txr->txq_task);
675 #endif
676                         taskqueue_drain(que->tq, &que->que_task);
677                         taskqueue_free(que->tq);
678                 }
679         }
680
681         /* Drain the Link queue */
682         if (adapter->tq) {
683                 taskqueue_drain(adapter->tq, &adapter->link_task);
684                 taskqueue_drain(adapter->tq, &adapter->mod_task);
685                 taskqueue_drain(adapter->tq, &adapter->msf_task);
686 #ifdef IXGBE_FDIR
687                 taskqueue_drain(adapter->tq, &adapter->fdir_task);
688 #endif
689                 taskqueue_free(adapter->tq);
690         }
691
692         /* let hardware know driver is unloading */
693         ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
694         ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
695         IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
696
697         /* Unregister VLAN events */
698         if (adapter->vlan_attach != NULL)
699                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
700         if (adapter->vlan_detach != NULL)
701                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
702
703         ether_ifdetach(adapter->ifp);
704         callout_drain(&adapter->timer);
705 #ifdef DEV_NETMAP
706         netmap_detach(adapter->ifp);
707 #endif /* DEV_NETMAP */
708         ixgbe_free_pci_resources(adapter);
709         bus_generic_detach(dev);
710         if_free(adapter->ifp);
711
712         ixgbe_free_transmit_structures(adapter);
713         ixgbe_free_receive_structures(adapter);
714         free(adapter->mta, M_DEVBUF);
715
716         IXGBE_CORE_LOCK_DESTROY(adapter);
717         return (0);
718 }
719
720 /*********************************************************************
721  *
722  *  Shutdown entry point
723  *
724  **********************************************************************/
725
726 static int
727 ixgbe_shutdown(device_t dev)
728 {
729         struct adapter *adapter = device_get_softc(dev);
730         IXGBE_CORE_LOCK(adapter);
731         ixgbe_stop(adapter);
732         IXGBE_CORE_UNLOCK(adapter);
733         return (0);
734 }
735
736
737 #ifdef IXGBE_LEGACY_TX
738 /*********************************************************************
739  *  Transmit entry point
740  *
741  *  ixgbe_start is called by the stack to initiate a transmit.
742  *  The driver will remain in this routine as long as there are
743  *  packets to transmit and transmit resources are available.
744  *  In case resources are not available stack is notified and
745  *  the packet is requeued.
746  **********************************************************************/
747
748 static void
749 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
750 {
751         struct mbuf    *m_head;
752         struct adapter *adapter = txr->adapter;
753
754         IXGBE_TX_LOCK_ASSERT(txr);
755
756         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
757                 return;
758         if (!adapter->link_active)
759                 return;
760
761         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
762                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
763                         break;
764
765                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
766                 if (m_head == NULL)
767                         break;
768
769                 if (ixgbe_xmit(txr, &m_head)) {
770                         if (m_head != NULL)
771                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
772                         break;
773                 }
774                 /* Send a copy of the frame to the BPF listener */
775                 ETHER_BPF_MTAP(ifp, m_head);
776
777                 /* Set watchdog on */
778                 txr->watchdog_time = ticks;
779                 txr->queue_status = IXGBE_QUEUE_WORKING;
780
781         }
782         return;
783 }
784
785 /*
786  * Legacy TX start - called by the stack, this
787  * always uses the first tx ring, and should
788  * not be used with multiqueue tx enabled.
789  */
790 static void
791 ixgbe_start(struct ifnet *ifp)
792 {
793         struct adapter *adapter = ifp->if_softc;
794         struct tx_ring  *txr = adapter->tx_rings;
795
796         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
797                 IXGBE_TX_LOCK(txr);
798                 ixgbe_start_locked(txr, ifp);
799                 IXGBE_TX_UNLOCK(txr);
800         }
801         return;
802 }
803
804 #else /* ! IXGBE_LEGACY_TX */
805
806 /*
807 ** Multiqueue Transmit driver
808 **
809 */
810 static int
811 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
812 {
813         struct adapter  *adapter = ifp->if_softc;
814         struct ix_queue *que;
815         struct tx_ring  *txr;
816         int             i, err = 0;
817 #ifdef  RSS
818         uint32_t bucket_id;
819 #endif
820
821         /* Which queue to use */
822         /*
823          * When doing RSS, map it to the same outbound queue
824          * as the incoming flow would be mapped to.
825          *
826          * If everything is setup correctly, it should be the
827          * same bucket that the current CPU we're on is.
828          */
829         if ((m->m_flags & M_FLOWID) != 0) {
830 #ifdef  RSS
831                 if (rss_hash2bucket(m->m_pkthdr.flowid,
832                     M_HASHTYPE_GET(m), &bucket_id) == 0) {
833                         /* XXX TODO: spit out something if bucket_id > num_queues? */
834                         i = bucket_id % adapter->num_queues;
835                 } else {
836 #endif
837                         i = m->m_pkthdr.flowid % adapter->num_queues;
838 #ifdef  RSS
839                 }
840 #endif
841         } else {
842                 i = curcpu % adapter->num_queues;
843         }
844
845         txr = &adapter->tx_rings[i];
846         que = &adapter->queues[i];
847
848         err = drbr_enqueue(ifp, txr->br, m);
849         if (err)
850                 return (err);
851         if (IXGBE_TX_TRYLOCK(txr)) {
852                 ixgbe_mq_start_locked(ifp, txr);
853                 IXGBE_TX_UNLOCK(txr);
854         } else
855                 taskqueue_enqueue(que->tq, &txr->txq_task);
856
857         return (0);
858 }
859
860 static int
861 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
862 {
863         struct adapter  *adapter = txr->adapter;
864         struct mbuf     *next;
865         int             enqueued = 0, err = 0;
866
867         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
868             adapter->link_active == 0)
869                 return (ENETDOWN);
870
871         /* Process the queue */
872 #if __FreeBSD_version < 901504
873         next = drbr_dequeue(ifp, txr->br);
874         while (next != NULL) {
875                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
876                         if (next != NULL)
877                                 err = drbr_enqueue(ifp, txr->br, next);
878 #else
879         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
880                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
881                         if (next == NULL) {
882                                 drbr_advance(ifp, txr->br);
883                         } else {
884                                 drbr_putback(ifp, txr->br, next);
885                         }
886 #endif
887                         break;
888                 }
889 #if __FreeBSD_version >= 901504
890                 drbr_advance(ifp, txr->br);
891 #endif
892                 enqueued++;
893                 /* Send a copy of the frame to the BPF listener */
894                 ETHER_BPF_MTAP(ifp, next);
895                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
896                         break;
897 #if __FreeBSD_version < 901504
898                 next = drbr_dequeue(ifp, txr->br);
899 #endif
900         }
901
902         if (enqueued > 0) {
903                 /* Set watchdog on */
904                 txr->queue_status = IXGBE_QUEUE_WORKING;
905                 txr->watchdog_time = ticks;
906         }
907
908         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
909                 ixgbe_txeof(txr);
910
911         return (err);
912 }
913
914 /*
915  * Called from a taskqueue to drain queued transmit packets.
916  */
917 static void
918 ixgbe_deferred_mq_start(void *arg, int pending)
919 {
920         struct tx_ring *txr = arg;
921         struct adapter *adapter = txr->adapter;
922         struct ifnet *ifp = adapter->ifp;
923
924         IXGBE_TX_LOCK(txr);
925         if (!drbr_empty(ifp, txr->br))
926                 ixgbe_mq_start_locked(ifp, txr);
927         IXGBE_TX_UNLOCK(txr);
928 }
929
930 /*
931 ** Flush all ring buffers
932 */
933 static void
934 ixgbe_qflush(struct ifnet *ifp)
935 {
936         struct adapter  *adapter = ifp->if_softc;
937         struct tx_ring  *txr = adapter->tx_rings;
938         struct mbuf     *m;
939
940         for (int i = 0; i < adapter->num_queues; i++, txr++) {
941                 IXGBE_TX_LOCK(txr);
942                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
943                         m_freem(m);
944                 IXGBE_TX_UNLOCK(txr);
945         }
946         if_qflush(ifp);
947 }
948 #endif /* IXGBE_LEGACY_TX */
949
950 /*********************************************************************
951  *  Ioctl entry point
952  *
953  *  ixgbe_ioctl is called when the user wants to configure the
954  *  interface.
955  *
956  *  return 0 on success, positive on failure
957  **********************************************************************/
958
959 static int
960 ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data)
961 {
962         struct adapter  *adapter = ifp->if_softc;
963         struct ixgbe_hw *hw = &adapter->hw;
964         struct ifreq    *ifr = (struct ifreq *) data;
965 #if defined(INET) || defined(INET6)
966         struct ifaddr *ifa = (struct ifaddr *)data;
967         bool            avoid_reset = FALSE;
968 #endif
969         int             error = 0;
970
971         switch (command) {
972
973         case SIOCSIFADDR:
974 #ifdef INET
975                 if (ifa->ifa_addr->sa_family == AF_INET)
976                         avoid_reset = TRUE;
977 #endif
978 #ifdef INET6
979                 if (ifa->ifa_addr->sa_family == AF_INET6)
980                         avoid_reset = TRUE;
981 #endif
982 #if defined(INET) || defined(INET6)
983                 /*
984                 ** Calling init results in link renegotiation,
985                 ** so we avoid doing it when possible.
986                 */
987                 if (avoid_reset) {
988                         ifp->if_flags |= IFF_UP;
989                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
990                                 ixgbe_init(adapter);
991                         if (!(ifp->if_flags & IFF_NOARP))
992                                 arp_ifinit(ifp, ifa);
993                 } else
994                         error = ether_ioctl(ifp, command, data);
995 #endif
996                 break;
997         case SIOCSIFMTU:
998                 IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
999                 if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
1000                         error = EINVAL;
1001                 } else {
1002                         IXGBE_CORE_LOCK(adapter);
1003                         ifp->if_mtu = ifr->ifr_mtu;
1004                         adapter->max_frame_size =
1005                                 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1006                         ixgbe_init_locked(adapter);
1007                         IXGBE_CORE_UNLOCK(adapter);
1008                 }
1009                 break;
1010         case SIOCSIFFLAGS:
1011                 IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
1012                 IXGBE_CORE_LOCK(adapter);
1013                 if (ifp->if_flags & IFF_UP) {
1014                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1015                                 if ((ifp->if_flags ^ adapter->if_flags) &
1016                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1017                                         ixgbe_set_promisc(adapter);
1018                                 }
1019                         } else
1020                                 ixgbe_init_locked(adapter);
1021                 } else
1022                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1023                                 ixgbe_stop(adapter);
1024                 adapter->if_flags = ifp->if_flags;
1025                 IXGBE_CORE_UNLOCK(adapter);
1026                 break;
1027         case SIOCADDMULTI:
1028         case SIOCDELMULTI:
1029                 IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
1030                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1031                         IXGBE_CORE_LOCK(adapter);
1032                         ixgbe_disable_intr(adapter);
1033                         ixgbe_set_multi(adapter);
1034                         ixgbe_enable_intr(adapter);
1035                         IXGBE_CORE_UNLOCK(adapter);
1036                 }
1037                 break;
1038         case SIOCSIFMEDIA:
1039         case SIOCGIFMEDIA:
1040                 IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
1041                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1042                 break;
1043         case SIOCSIFCAP:
1044         {
1045                 int mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1046                 IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
1047                 if (mask & IFCAP_HWCSUM)
1048                         ifp->if_capenable ^= IFCAP_HWCSUM;
1049                 if (mask & IFCAP_TSO4)
1050                         ifp->if_capenable ^= IFCAP_TSO4;
1051                 if (mask & IFCAP_TSO6)
1052                         ifp->if_capenable ^= IFCAP_TSO6;
1053                 if (mask & IFCAP_LRO)
1054                         ifp->if_capenable ^= IFCAP_LRO;
1055                 if (mask & IFCAP_VLAN_HWTAGGING)
1056                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1057                 if (mask & IFCAP_VLAN_HWFILTER)
1058                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1059                 if (mask & IFCAP_VLAN_HWTSO)
1060                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1061                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1062                         IXGBE_CORE_LOCK(adapter);
1063                         ixgbe_init_locked(adapter);
1064                         IXGBE_CORE_UNLOCK(adapter);
1065                 }
1066                 VLAN_CAPABILITIES(ifp);
1067                 break;
1068         }
1069         case SIOCGI2C:
1070         {
1071                 struct ixgbe_i2c_req    i2c;
1072                 IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)");
1073                 error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
1074                 if (error)
1075                         break;
1076                 if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
1077                         error = EINVAL;
1078                         break;
1079                 }
1080                 hw->phy.ops.read_i2c_byte(hw, i2c.offset,
1081                     i2c.dev_addr, i2c.data);
1082                 error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
1083                 break;
1084         }
1085         default:
1086                 IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
1087                 error = ether_ioctl(ifp, command, data);
1088                 break;
1089         }
1090
1091         return (error);
1092 }
1093
1094 /*********************************************************************
1095  *  Init entry point
1096  *
1097  *  This routine is used in two ways. It is used by the stack as
1098  *  init entry point in network interface structure. It is also used
1099  *  by the driver as a hw/sw initialization routine to get to a
1100  *  consistent state.
1101  *
1102  *  return 0 on success, positive on failure
1103  **********************************************************************/
1104 #define IXGBE_MHADD_MFS_SHIFT 16
1105
1106 static void
1107 ixgbe_init_locked(struct adapter *adapter)
1108 {
1109         struct ifnet   *ifp = adapter->ifp;
1110         device_t        dev = adapter->dev;
1111         struct ixgbe_hw *hw = &adapter->hw;
1112         u32             k, txdctl, mhadd, gpie;
1113         u32             rxdctl, rxctrl;
1114
1115         mtx_assert(&adapter->core_mtx, MA_OWNED);
1116         INIT_DEBUGOUT("ixgbe_init_locked: begin");
1117         hw->adapter_stopped = FALSE;
1118         ixgbe_stop_adapter(hw);
1119         callout_stop(&adapter->timer);
1120
1121         /* reprogram the RAR[0] in case user changed it. */
1122         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
1123
1124         /* Get the latest mac address, User can use a LAA */
1125         bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr,
1126               IXGBE_ETH_LENGTH_OF_ADDRESS);
1127         ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
1128         hw->addr_ctrl.rar_used_count = 1;
1129
1130         /* Set the various hardware offload abilities */
1131         ifp->if_hwassist = 0;
1132         if (ifp->if_capenable & IFCAP_TSO)
1133                 ifp->if_hwassist |= CSUM_TSO;
1134         if (ifp->if_capenable & IFCAP_TXCSUM) {
1135                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1136 #if __FreeBSD_version >= 800000
1137                 if (hw->mac.type != ixgbe_mac_82598EB)
1138                         ifp->if_hwassist |= CSUM_SCTP;
1139 #endif
1140         }
1141
1142         /* Prepare transmit descriptors and buffers */
1143         if (ixgbe_setup_transmit_structures(adapter)) {
1144                 device_printf(dev,"Could not setup transmit structures\n");
1145                 ixgbe_stop(adapter);
1146                 return;
1147         }
1148
1149         ixgbe_init_hw(hw);
1150         ixgbe_initialize_transmit_units(adapter);
1151
1152         /* Setup Multicast table */
1153         ixgbe_set_multi(adapter);
1154
1155         /*
1156         ** Determine the correct mbuf pool
1157         ** for doing jumbo frames
1158         */
1159         if (adapter->max_frame_size <= 2048)
1160                 adapter->rx_mbuf_sz = MCLBYTES;
1161         else if (adapter->max_frame_size <= 4096)
1162                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1163         else if (adapter->max_frame_size <= 9216)
1164                 adapter->rx_mbuf_sz = MJUM9BYTES;
1165         else
1166                 adapter->rx_mbuf_sz = MJUM16BYTES;
1167
1168         /* Prepare receive descriptors and buffers */
1169         if (ixgbe_setup_receive_structures(adapter)) {
1170                 device_printf(dev,"Could not setup receive structures\n");
1171                 ixgbe_stop(adapter);
1172                 return;
1173         }
1174
1175         /* Configure RX settings */
1176         ixgbe_initialize_receive_units(adapter);
1177
1178         gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
1179
1180         /* Enable Fan Failure Interrupt */
1181         gpie |= IXGBE_SDP1_GPIEN;
1182
1183         /* Add for Module detection */
1184         if (hw->mac.type == ixgbe_mac_82599EB)
1185                 gpie |= IXGBE_SDP2_GPIEN;
1186
1187         /* Thermal Failure Detection */
1188         if (hw->mac.type == ixgbe_mac_X540)
1189                 gpie |= IXGBE_SDP0_GPIEN;
1190
1191         if (adapter->msix > 1) {
1192                 /* Enable Enhanced MSIX mode */
1193                 gpie |= IXGBE_GPIE_MSIX_MODE;
1194                 gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
1195                     IXGBE_GPIE_OCD;
1196         }
1197         IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
1198
1199         /* Set MTU size */
1200         if (ifp->if_mtu > ETHERMTU) {
1201                 mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
1202                 mhadd &= ~IXGBE_MHADD_MFS_MASK;
1203                 mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
1204                 IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
1205         }
1206         
1207         /* Now enable all the queues */
1208
1209         for (int i = 0; i < adapter->num_queues; i++) {
1210                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
1211                 txdctl |= IXGBE_TXDCTL_ENABLE;
1212                 /* Set WTHRESH to 8, burst writeback */
1213                 txdctl |= (8 << 16);
1214                 /*
1215                  * When the internal queue falls below PTHRESH (32),
1216                  * start prefetching as long as there are at least
1217                  * HTHRESH (1) buffers ready. The values are taken
1218                  * from the Intel linux driver 3.8.21.
1219                  * Prefetching enables tx line rate even with 1 queue.
1220                  */
1221                 txdctl |= (32 << 0) | (1 << 8);
1222                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
1223         }
1224
1225         for (int i = 0; i < adapter->num_queues; i++) {
1226                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
1227                 if (hw->mac.type == ixgbe_mac_82598EB) {
1228                         /*
1229                         ** PTHRESH = 21
1230                         ** HTHRESH = 4
1231                         ** WTHRESH = 8
1232                         */
1233                         rxdctl &= ~0x3FFFFF;
1234                         rxdctl |= 0x080420;
1235                 }
1236                 rxdctl |= IXGBE_RXDCTL_ENABLE;
1237                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
1238                 for (k = 0; k < 10; k++) {
1239                         if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
1240                             IXGBE_RXDCTL_ENABLE)
1241                                 break;
1242                         else
1243                                 msec_delay(1);
1244                 }
1245                 wmb();
1246 #ifdef DEV_NETMAP
1247                 /*
1248                  * In netmap mode, we must preserve the buffers made
1249                  * available to userspace before the if_init()
1250                  * (this is true by default on the TX side, because
1251                  * init makes all buffers available to userspace).
1252                  *
1253                  * netmap_reset() and the device specific routines
1254                  * (e.g. ixgbe_setup_receive_rings()) map these
1255                  * buffers at the end of the NIC ring, so here we
1256                  * must set the RDT (tail) register to make sure
1257                  * they are not overwritten.
1258                  *
1259                  * In this driver the NIC ring starts at RDH = 0,
1260                  * RDT points to the last slot available for reception (?),
1261                  * so RDT = num_rx_desc - 1 means the whole ring is available.
1262                  */
1263                 if (ifp->if_capenable & IFCAP_NETMAP) {
1264                         struct netmap_adapter *na = NA(adapter->ifp);
1265                         struct netmap_kring *kring = &na->rx_rings[i];
1266                         int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
1267
1268                         IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
1269                 } else
1270 #endif /* DEV_NETMAP */
1271                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
1272         }
1273
1274         /* Enable Receive engine */
1275         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1276         if (hw->mac.type == ixgbe_mac_82598EB)
1277                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
1278         rxctrl |= IXGBE_RXCTRL_RXEN;
1279         ixgbe_enable_rx_dma(hw, rxctrl);
1280
1281         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
1282
1283         /* Set up MSI/X routing */
1284         if (ixgbe_enable_msix)  {
1285                 ixgbe_configure_ivars(adapter);
1286                 /* Set up auto-mask */
1287                 if (hw->mac.type == ixgbe_mac_82598EB)
1288                         IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1289                 else {
1290                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1291                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1292                 }
1293         } else {  /* Simple settings for Legacy/MSI */
1294                 ixgbe_set_ivar(adapter, 0, 0, 0);
1295                 ixgbe_set_ivar(adapter, 0, 0, 1);
1296                 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1297         }
1298
1299 #ifdef IXGBE_FDIR
1300         /* Init Flow director */
1301         if (hw->mac.type != ixgbe_mac_82598EB) {
1302                 u32 hdrm = 32 << fdir_pballoc;
1303
1304                 hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
1305                 ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
1306         }
1307 #endif
1308
1309         /*
1310         ** Check on any SFP devices that
1311         ** need to be kick-started
1312         */
1313         if (hw->phy.type == ixgbe_phy_none) {
1314                 int err = hw->phy.ops.identify(hw);
1315                 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1316                         device_printf(dev,
1317                             "Unsupported SFP+ module type was detected.\n");
1318                         return;
1319                 }
1320         }
1321
1322         /* Set moderation on the Link interrupt */
1323         IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
1324
1325         /* Config/Enable Link */
1326         ixgbe_config_link(adapter);
1327
1328         /* Hardware Packet Buffer & Flow Control setup */
1329         {
1330                 u32 rxpb, frame, size, tmp;
1331
1332                 frame = adapter->max_frame_size;
1333
1334                 /* Calculate High Water */
1335                 if (hw->mac.type == ixgbe_mac_X540)
1336                         tmp = IXGBE_DV_X540(frame, frame);
1337                 else
1338                         tmp = IXGBE_DV(frame, frame);
1339                 size = IXGBE_BT2KB(tmp);
1340                 rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
1341                 hw->fc.high_water[0] = rxpb - size;
1342
1343                 /* Now calculate Low Water */
1344                 if (hw->mac.type == ixgbe_mac_X540)
1345                         tmp = IXGBE_LOW_DV_X540(frame);
1346                 else
1347                         tmp = IXGBE_LOW_DV(frame);
1348                 hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1349                 
1350                 hw->fc.requested_mode = adapter->fc;
1351                 hw->fc.pause_time = IXGBE_FC_PAUSE;
1352                 hw->fc.send_xon = TRUE;
1353         }
1354         /* Initialize the FC settings */
1355         ixgbe_start_hw(hw);
1356
1357         /* Set up VLAN support and filter */
1358         ixgbe_setup_vlan_hw_support(adapter);
1359
1360         /* And now turn on interrupts */
1361         ixgbe_enable_intr(adapter);
1362
1363         /* Now inform the stack we're ready */
1364         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1365
1366         return;
1367 }
1368
1369 static void
1370 ixgbe_init(void *arg)
1371 {
1372         struct adapter *adapter = arg;
1373
1374         IXGBE_CORE_LOCK(adapter);
1375         ixgbe_init_locked(adapter);
1376         IXGBE_CORE_UNLOCK(adapter);
1377         return;
1378 }
1379
1380
1381 /*
1382 **
1383 ** MSIX Interrupt Handlers and Tasklets
1384 **
1385 */
1386
1387 static inline void
1388 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
1389 {
1390         struct ixgbe_hw *hw = &adapter->hw;
1391         u64     queue = (u64)(1 << vector);
1392         u32     mask;
1393
1394         if (hw->mac.type == ixgbe_mac_82598EB) {
1395                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1396                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
1397         } else {
1398                 mask = (queue & 0xFFFFFFFF);
1399                 if (mask)
1400                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
1401                 mask = (queue >> 32);
1402                 if (mask)
1403                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
1404         }
1405 }
1406
1407 static inline void
1408 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
1409 {
1410         struct ixgbe_hw *hw = &adapter->hw;
1411         u64     queue = (u64)(1 << vector);
1412         u32     mask;
1413
1414         if (hw->mac.type == ixgbe_mac_82598EB) {
1415                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1416                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
1417         } else {
1418                 mask = (queue & 0xFFFFFFFF);
1419                 if (mask)
1420                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
1421                 mask = (queue >> 32);
1422                 if (mask)
1423                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
1424         }
1425 }
1426
1427 static void
1428 ixgbe_handle_que(void *context, int pending)
1429 {
1430         struct ix_queue *que = context;
1431         struct adapter  *adapter = que->adapter;
1432         struct tx_ring  *txr = que->txr;
1433         struct ifnet    *ifp = adapter->ifp;
1434         bool            more;
1435
1436         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1437                 more = ixgbe_rxeof(que);
1438                 IXGBE_TX_LOCK(txr);
1439                 ixgbe_txeof(txr);
1440 #ifndef IXGBE_LEGACY_TX
1441                 if (!drbr_empty(ifp, txr->br))
1442                         ixgbe_mq_start_locked(ifp, txr);
1443 #else
1444                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1445                         ixgbe_start_locked(txr, ifp);
1446 #endif
1447                 IXGBE_TX_UNLOCK(txr);
1448         }
1449
1450         /* Reenable this interrupt */
1451         if (que->res != NULL)
1452                 ixgbe_enable_queue(adapter, que->msix);
1453         else
1454                 ixgbe_enable_intr(adapter);
1455         return;
1456 }
1457
1458
1459 /*********************************************************************
1460  *
1461  *  Legacy Interrupt Service routine
1462  *
1463  **********************************************************************/
1464
1465 static void
1466 ixgbe_legacy_irq(void *arg)
1467 {
1468         struct ix_queue *que = arg;
1469         struct adapter  *adapter = que->adapter;
1470         struct ixgbe_hw *hw = &adapter->hw;
1471         struct ifnet    *ifp = adapter->ifp;
1472         struct          tx_ring *txr = adapter->tx_rings;
1473         bool            more;
1474         u32             reg_eicr;
1475
1476
1477         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1478
1479         ++que->irqs;
1480         if (reg_eicr == 0) {
1481                 ixgbe_enable_intr(adapter);
1482                 return;
1483         }
1484
1485         more = ixgbe_rxeof(que);
1486
1487         IXGBE_TX_LOCK(txr);
1488         ixgbe_txeof(txr);
1489 #ifdef IXGBE_LEGACY_TX
1490         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1491                 ixgbe_start_locked(txr, ifp);
1492 #else
1493         if (!drbr_empty(ifp, txr->br))
1494                 ixgbe_mq_start_locked(ifp, txr);
1495 #endif
1496         IXGBE_TX_UNLOCK(txr);
1497
1498         /* Check for fan failure */
1499         if ((hw->phy.media_type == ixgbe_media_type_copper) &&
1500             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1501                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1502                     "REPLACE IMMEDIATELY!!\n");
1503                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
1504         }
1505
1506         /* Link status change */
1507         if (reg_eicr & IXGBE_EICR_LSC)
1508                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1509
1510         if (more)
1511                 taskqueue_enqueue(que->tq, &que->que_task);
1512         else
1513                 ixgbe_enable_intr(adapter);
1514         return;
1515 }
1516
1517
1518 /*********************************************************************
1519  *
1520  *  MSIX Queue Interrupt Service routine
1521  *
1522  **********************************************************************/
1523 void
1524 ixgbe_msix_que(void *arg)
1525 {
1526         struct ix_queue *que = arg;
1527         struct adapter  *adapter = que->adapter;
1528         struct ifnet    *ifp = adapter->ifp;
1529         struct tx_ring  *txr = que->txr;
1530         struct rx_ring  *rxr = que->rxr;
1531         bool            more;
1532         u32             newitr = 0;
1533
1534         /* Protect against spurious interrupts */
1535         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1536                 return;
1537
1538         ixgbe_disable_queue(adapter, que->msix);
1539         ++que->irqs;
1540
1541         more = ixgbe_rxeof(que);
1542
1543         IXGBE_TX_LOCK(txr);
1544         ixgbe_txeof(txr);
1545 #ifdef IXGBE_LEGACY_TX
1546         if (!IFQ_DRV_IS_EMPTY(ifp->if_snd))
1547                 ixgbe_start_locked(txr, ifp);
1548 #else
1549         if (!drbr_empty(ifp, txr->br))
1550                 ixgbe_mq_start_locked(ifp, txr);
1551 #endif
1552         IXGBE_TX_UNLOCK(txr);
1553
1554         /* Do AIM now? */
1555
1556         if (ixgbe_enable_aim == FALSE)
1557                 goto no_calc;
1558         /*
1559         ** Do Adaptive Interrupt Moderation:
1560         **  - Write out last calculated setting
1561         **  - Calculate based on average size over
1562         **    the last interval.
1563         */
1564         if (que->eitr_setting)
1565                 IXGBE_WRITE_REG(&adapter->hw,
1566                     IXGBE_EITR(que->msix), que->eitr_setting);
1567  
1568         que->eitr_setting = 0;
1569
1570         /* Idle, do nothing */
1571         if ((txr->bytes == 0) && (rxr->bytes == 0))
1572                 goto no_calc;
1573                                 
1574         if ((txr->bytes) && (txr->packets))
1575                 newitr = txr->bytes/txr->packets;
1576         if ((rxr->bytes) && (rxr->packets))
1577                 newitr = max(newitr,
1578                     (rxr->bytes / rxr->packets));
1579         newitr += 24; /* account for hardware frame, crc */
1580
1581         /* set an upper boundary */
1582         newitr = min(newitr, 3000);
1583
1584         /* Be nice to the mid range */
1585         if ((newitr > 300) && (newitr < 1200))
1586                 newitr = (newitr / 3);
1587         else
1588                 newitr = (newitr / 2);
1589
1590         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
1591                 newitr |= newitr << 16;
1592         else
1593                 newitr |= IXGBE_EITR_CNT_WDIS;
1594                  
1595         /* save for next interrupt */
1596         que->eitr_setting = newitr;
1597
1598         /* Reset state */
1599         txr->bytes = 0;
1600         txr->packets = 0;
1601         rxr->bytes = 0;
1602         rxr->packets = 0;
1603
1604 no_calc:
1605         if (more)
1606                 taskqueue_enqueue(que->tq, &que->que_task);
1607         else
1608                 ixgbe_enable_queue(adapter, que->msix);
1609         return;
1610 }
1611
1612
1613 static void
1614 ixgbe_msix_link(void *arg)
1615 {
1616         struct adapter  *adapter = arg;
1617         struct ixgbe_hw *hw = &adapter->hw;
1618         u32             reg_eicr;
1619
1620         ++adapter->link_irq;
1621
1622         /* First get the cause */
1623         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
1624         /* Be sure the queue bits are not cleared */
1625         reg_eicr &= ~IXGBE_EICR_RTX_QUEUE;
1626         /* Clear interrupt with write */
1627         IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
1628
1629         /* Link status change */
1630         if (reg_eicr & IXGBE_EICR_LSC)
1631                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1632
1633         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
1634 #ifdef IXGBE_FDIR
1635                 if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
1636                         /* This is probably overkill :) */
1637                         if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
1638                                 return;
1639                         /* Disable the interrupt */
1640                         IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
1641                         taskqueue_enqueue(adapter->tq, &adapter->fdir_task);
1642                 } else
1643 #endif
1644                 if (reg_eicr & IXGBE_EICR_ECC) {
1645                         device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
1646                             "Please Reboot!!\n");
1647                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
1648                 } else
1649
1650                 if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
1651                         /* Clear the interrupt */
1652                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1653                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
1654                 } else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
1655                         /* Clear the interrupt */
1656                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
1657                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
1658                 }
1659         } 
1660
1661         /* Check for fan failure */
1662         if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
1663             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1664                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1665                     "REPLACE IMMEDIATELY!!\n");
1666                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1667         }
1668
1669         /* Check for over temp condition */
1670         if ((hw->mac.type == ixgbe_mac_X540) &&
1671             (reg_eicr & IXGBE_EICR_TS)) {
1672                 device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
1673                     "PHY IS SHUT DOWN!!\n");
1674                 device_printf(adapter->dev, "System shutdown required\n");
1675                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
1676         }
1677
1678         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
1679         return;
1680 }
1681
1682 /*********************************************************************
1683  *
1684  *  Media Ioctl callback
1685  *
1686  *  This routine is called whenever the user queries the status of
1687  *  the interface using ifconfig.
1688  *
1689  **********************************************************************/
1690 static void
1691 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
1692 {
1693         struct adapter *adapter = ifp->if_softc;
1694
1695         INIT_DEBUGOUT("ixgbe_media_status: begin");
1696         IXGBE_CORE_LOCK(adapter);
1697         ixgbe_update_link_status(adapter);
1698
1699         ifmr->ifm_status = IFM_AVALID;
1700         ifmr->ifm_active = IFM_ETHER;
1701
1702         if (!adapter->link_active) {
1703                 IXGBE_CORE_UNLOCK(adapter);
1704                 return;
1705         }
1706
1707         ifmr->ifm_status |= IFM_ACTIVE;
1708
1709         switch (adapter->link_speed) {
1710                 case IXGBE_LINK_SPEED_100_FULL:
1711                         ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1712                         break;
1713                 case IXGBE_LINK_SPEED_1GB_FULL:
1714                         ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1715                         break;
1716                 case IXGBE_LINK_SPEED_10GB_FULL:
1717                         ifmr->ifm_active |= adapter->optics | IFM_FDX;
1718                         break;
1719         }
1720
1721         IXGBE_CORE_UNLOCK(adapter);
1722
1723         return;
1724 }
1725
1726 /*********************************************************************
1727  *
1728  *  Media Ioctl callback
1729  *
1730  *  This routine is called when the user changes speed/duplex using
1731  *  media/mediopt option with ifconfig.
1732  *
1733  **********************************************************************/
1734 static int
1735 ixgbe_media_change(struct ifnet * ifp)
1736 {
1737         struct adapter *adapter = ifp->if_softc;
1738         struct ifmedia *ifm = &adapter->media;
1739
1740         INIT_DEBUGOUT("ixgbe_media_change: begin");
1741
1742         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1743                 return (EINVAL);
1744
1745         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1746         case IFM_AUTO:
1747                 adapter->hw.phy.autoneg_advertised =
1748                     IXGBE_LINK_SPEED_100_FULL |
1749                     IXGBE_LINK_SPEED_1GB_FULL |
1750                     IXGBE_LINK_SPEED_10GB_FULL;
1751                 break;
1752         default:
1753                 device_printf(adapter->dev, "Only auto media type\n");
1754                 return (EINVAL);
1755         }
1756
1757         return (0);
1758 }
1759
1760 /*********************************************************************
1761  *
1762  *  This routine maps the mbufs to tx descriptors, allowing the
1763  *  TX engine to transmit the packets. 
1764  *      - return 0 on success, positive on failure
1765  *
1766  **********************************************************************/
1767
1768 static int
1769 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1770 {
1771         struct adapter  *adapter = txr->adapter;
1772         u32             olinfo_status = 0, cmd_type_len;
1773         int             i, j, error, nsegs;
1774         int             first;
1775         bool            remap = TRUE;
1776         struct mbuf     *m_head;
1777         bus_dma_segment_t segs[adapter->num_segs];
1778         bus_dmamap_t    map;
1779         struct ixgbe_tx_buf *txbuf;
1780         union ixgbe_adv_tx_desc *txd = NULL;
1781
1782         m_head = *m_headp;
1783
1784         /* Basic descriptor defines */
1785         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1786             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1787
1788         if (m_head->m_flags & M_VLANTAG)
1789                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1790
1791         /*
1792          * Important to capture the first descriptor
1793          * used because it will contain the index of
1794          * the one we tell the hardware to report back
1795          */
1796         first = txr->next_avail_desc;
1797         txbuf = &txr->tx_buffers[first];
1798         map = txbuf->map;
1799
1800         /*
1801          * Map the packet for DMA.
1802          */
1803 retry:
1804         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1805             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1806
1807         if (__predict_false(error)) {
1808                 struct mbuf *m;
1809
1810                 switch (error) {
1811                 case EFBIG:
1812                         /* Try it again? - one try */
1813                         if (remap == TRUE) {
1814                                 remap = FALSE;
1815                                 m = m_defrag(*m_headp, M_NOWAIT);
1816                                 if (m == NULL) {
1817                                         adapter->mbuf_defrag_failed++;
1818                                         m_freem(*m_headp);
1819                                         *m_headp = NULL;
1820                                         return (ENOBUFS);
1821                                 }
1822                                 *m_headp = m;
1823                                 goto retry;
1824                         } else
1825                                 return (error);
1826                 case ENOMEM:
1827                         txr->no_tx_dma_setup++;
1828                         return (error);
1829                 default:
1830                         txr->no_tx_dma_setup++;
1831                         m_freem(*m_headp);
1832                         *m_headp = NULL;
1833                         return (error);
1834                 }
1835         }
1836
1837         /* Make certain there are enough descriptors */
1838         if (nsegs > txr->tx_avail - 2) {
1839                 txr->no_desc_avail++;
1840                 bus_dmamap_unload(txr->txtag, map);
1841                 return (ENOBUFS);
1842         }
1843         m_head = *m_headp;
1844
1845         /*
1846         ** Set up the appropriate offload context
1847         ** this will consume the first descriptor
1848         */
1849         error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1850         if (__predict_false(error)) {
1851                 if (error == ENOBUFS)
1852                         *m_headp = NULL;
1853                 return (error);
1854         }
1855
1856 #ifdef IXGBE_FDIR
1857         /* Do the flow director magic */
1858         if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
1859                 ++txr->atr_count;
1860                 if (txr->atr_count >= atr_sample_rate) {
1861                         ixgbe_atr(txr, m_head);
1862                         txr->atr_count = 0;
1863                 }
1864         }
1865 #endif
1866
1867         i = txr->next_avail_desc;
1868         for (j = 0; j < nsegs; j++) {
1869                 bus_size_t seglen;
1870                 bus_addr_t segaddr;
1871
1872                 txbuf = &txr->tx_buffers[i];
1873                 txd = &txr->tx_base[i];
1874                 seglen = segs[j].ds_len;
1875                 segaddr = htole64(segs[j].ds_addr);
1876
1877                 txd->read.buffer_addr = segaddr;
1878                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
1879                     cmd_type_len |seglen);
1880                 txd->read.olinfo_status = htole32(olinfo_status);
1881
1882                 if (++i == txr->num_desc)
1883                         i = 0;
1884         }
1885
1886         txd->read.cmd_type_len |=
1887             htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
1888         txr->tx_avail -= nsegs;
1889         txr->next_avail_desc = i;
1890
1891         txbuf->m_head = m_head;
1892         /*
1893         ** Here we swap the map so the last descriptor,
1894         ** which gets the completion interrupt has the
1895         ** real map, and the first descriptor gets the
1896         ** unused map from this descriptor.
1897         */
1898         txr->tx_buffers[first].map = txbuf->map;
1899         txbuf->map = map;
1900         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1901
1902         /* Set the EOP descriptor that will be marked done */
1903         txbuf = &txr->tx_buffers[first];
1904         txbuf->eop = txd;
1905
1906         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1907             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1908         /*
1909          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1910          * hardware that this frame is available to transmit.
1911          */
1912         ++txr->total_packets;
1913         IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
1914
1915         return (0);
1916
1917 }
1918
1919 static void
1920 ixgbe_set_promisc(struct adapter *adapter)
1921 {
1922         u_int32_t       reg_rctl;
1923         struct ifnet   *ifp = adapter->ifp;
1924         int             mcnt = 0;
1925
1926         reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1927         reg_rctl &= (~IXGBE_FCTRL_UPE);
1928         if (ifp->if_flags & IFF_ALLMULTI)
1929                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
1930         else {
1931                 struct  ifmultiaddr *ifma;
1932 #if __FreeBSD_version < 800000
1933                 IF_ADDR_LOCK(ifp);
1934 #else
1935                 if_maddr_rlock(ifp);
1936 #endif
1937                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1938                         if (ifma->ifma_addr->sa_family != AF_LINK)
1939                                 continue;
1940                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1941                                 break;
1942                         mcnt++;
1943                 }
1944 #if __FreeBSD_version < 800000
1945                 IF_ADDR_UNLOCK(ifp);
1946 #else
1947                 if_maddr_runlock(ifp);
1948 #endif
1949         }
1950         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
1951                 reg_rctl &= (~IXGBE_FCTRL_MPE);
1952         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1953
1954         if (ifp->if_flags & IFF_PROMISC) {
1955                 reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1956                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1957         } else if (ifp->if_flags & IFF_ALLMULTI) {
1958                 reg_rctl |= IXGBE_FCTRL_MPE;
1959                 reg_rctl &= ~IXGBE_FCTRL_UPE;
1960                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1961         }
1962         return;
1963 }
1964
1965
1966 /*********************************************************************
1967  *  Multicast Update
1968  *
1969  *  This routine is called whenever multicast address list is updated.
1970  *
1971  **********************************************************************/
1972 #define IXGBE_RAR_ENTRIES 16
1973
1974 static void
1975 ixgbe_set_multi(struct adapter *adapter)
1976 {
1977         u32     fctrl;
1978         u8      *mta;
1979         u8      *update_ptr;
1980         struct  ifmultiaddr *ifma;
1981         int     mcnt = 0;
1982         struct ifnet   *ifp = adapter->ifp;
1983
1984         IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
1985
1986         mta = adapter->mta;
1987         bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
1988             MAX_NUM_MULTICAST_ADDRESSES);
1989
1990 #if __FreeBSD_version < 800000
1991         IF_ADDR_LOCK(ifp);
1992 #else
1993         if_maddr_rlock(ifp);
1994 #endif
1995         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1996                 if (ifma->ifma_addr->sa_family != AF_LINK)
1997                         continue;
1998                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1999                         break;
2000                 bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr),
2001                     &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
2002                     IXGBE_ETH_LENGTH_OF_ADDRESS);
2003                 mcnt++;
2004         }
2005 #if __FreeBSD_version < 800000
2006         IF_ADDR_UNLOCK(ifp);
2007 #else
2008         if_maddr_runlock(ifp);
2009 #endif
2010
2011         fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
2012         fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
2013         if (ifp->if_flags & IFF_PROMISC)
2014                 fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
2015         else if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES ||
2016             ifp->if_flags & IFF_ALLMULTI) {
2017                 fctrl |= IXGBE_FCTRL_MPE;
2018                 fctrl &= ~IXGBE_FCTRL_UPE;
2019         } else
2020                 fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
2021         
2022         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
2023
2024         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
2025                 update_ptr = mta;
2026                 ixgbe_update_mc_addr_list(&adapter->hw,
2027                     update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
2028         }
2029
2030         return;
2031 }
2032
2033 /*
2034  * This is an iterator function now needed by the multicast
2035  * shared code. It simply feeds the shared code routine the
2036  * addresses in the array of ixgbe_set_multi() one by one.
2037  */
2038 static u8 *
2039 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
2040 {
2041         u8 *addr = *update_ptr;
2042         u8 *newptr;
2043         *vmdq = 0;
2044
2045         newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
2046         *update_ptr = newptr;
2047         return addr;
2048 }
2049
2050
2051 /*********************************************************************
2052  *  Timer routine
2053  *
2054  *  This routine checks for link status,updates statistics,
2055  *  and runs the watchdog check.
2056  *
2057  **********************************************************************/
2058
2059 static void
2060 ixgbe_local_timer(void *arg)
2061 {
2062         struct adapter  *adapter = arg;
2063         device_t        dev = adapter->dev;
2064         struct ix_queue *que = adapter->queues;
2065         struct tx_ring  *txr = adapter->tx_rings;
2066         int             hung = 0, paused = 0;
2067
2068         mtx_assert(&adapter->core_mtx, MA_OWNED);
2069
2070         /* Check for pluggable optics */
2071         if (adapter->sfp_probe)
2072                 if (!ixgbe_sfp_probe(adapter))
2073                         goto out; /* Nothing to do */
2074
2075         ixgbe_update_link_status(adapter);
2076         ixgbe_update_stats_counters(adapter);
2077
2078         /*
2079          * If the interface has been paused
2080          * then don't do the watchdog check
2081          */
2082         if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
2083                 paused = 1;
2084
2085         /*
2086         ** Check the TX queues status
2087         **      - watchdog only if all queues show hung
2088         */          
2089         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2090                 if ((txr->queue_status == IXGBE_QUEUE_HUNG) &&
2091                     (paused == 0))
2092                         ++hung;
2093                 else if (txr->queue_status == IXGBE_QUEUE_WORKING)
2094                         taskqueue_enqueue(que->tq, &txr->txq_task);
2095         }
2096         /* Only truely watchdog if all queues show hung */
2097         if (hung == adapter->num_queues)
2098                 goto watchdog;
2099
2100 out:
2101         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
2102         return;
2103
2104 watchdog:
2105         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2106         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2107             IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
2108             IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
2109         device_printf(dev,"TX(%d) desc avail = %d,"
2110             "Next TX to Clean = %d\n",
2111             txr->me, txr->tx_avail, txr->next_to_clean);
2112         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2113         adapter->watchdog_events++;
2114         ixgbe_init_locked(adapter);
2115 }
2116
2117 /*
2118 ** Note: this routine updates the OS on the link state
2119 **      the real check of the hardware only happens with
2120 **      a link interrupt.
2121 */
2122 static void
2123 ixgbe_update_link_status(struct adapter *adapter)
2124 {
2125         struct ifnet    *ifp = adapter->ifp;
2126         device_t dev = adapter->dev;
2127
2128
2129         if (adapter->link_up){ 
2130                 if (adapter->link_active == FALSE) {
2131                         if (bootverbose)
2132                                 device_printf(dev,"Link is up %d Gbps %s \n",
2133                                     ((adapter->link_speed == 128)? 10:1),
2134                                     "Full Duplex");
2135                         adapter->link_active = TRUE;
2136                         /* Update any Flow Control changes */
2137                         ixgbe_fc_enable(&adapter->hw);
2138                         if_link_state_change(ifp, LINK_STATE_UP);
2139                 }
2140         } else { /* Link down */
2141                 if (adapter->link_active == TRUE) {
2142                         if (bootverbose)
2143                                 device_printf(dev,"Link is Down\n");
2144                         if_link_state_change(ifp, LINK_STATE_DOWN);
2145                         adapter->link_active = FALSE;
2146                 }
2147         }
2148
2149         return;
2150 }
2151
2152
2153 /*********************************************************************
2154  *
2155  *  This routine disables all traffic on the adapter by issuing a
2156  *  global reset on the MAC and deallocates TX/RX buffers.
2157  *
2158  **********************************************************************/
2159
2160 static void
2161 ixgbe_stop(void *arg)
2162 {
2163         struct ifnet   *ifp;
2164         struct adapter *adapter = arg;
2165         struct ixgbe_hw *hw = &adapter->hw;
2166         ifp = adapter->ifp;
2167
2168         mtx_assert(&adapter->core_mtx, MA_OWNED);
2169
2170         INIT_DEBUGOUT("ixgbe_stop: begin\n");
2171         ixgbe_disable_intr(adapter);
2172         callout_stop(&adapter->timer);
2173
2174         /* Let the stack know...*/
2175         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2176
2177         ixgbe_reset_hw(hw);
2178         hw->adapter_stopped = FALSE;
2179         ixgbe_stop_adapter(hw);
2180         if (hw->mac.type == ixgbe_mac_82599EB)
2181                 ixgbe_stop_mac_link_on_d3_82599(hw);
2182         /* Turn off the laser - noop with no optics */
2183         ixgbe_disable_tx_laser(hw);
2184
2185         /* Update the stack */
2186         adapter->link_up = FALSE;
2187         ixgbe_update_link_status(adapter);
2188
2189         /* reprogram the RAR[0] in case user changed it. */
2190         ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
2191
2192         return;
2193 }
2194
2195
2196 /*********************************************************************
2197  *
2198  *  Determine hardware revision.
2199  *
2200  **********************************************************************/
2201 static void
2202 ixgbe_identify_hardware(struct adapter *adapter)
2203 {
2204         device_t        dev = adapter->dev;
2205         struct ixgbe_hw *hw = &adapter->hw;
2206
2207         /* Save off the information about this board */
2208         hw->vendor_id = pci_get_vendor(dev);
2209         hw->device_id = pci_get_device(dev);
2210         hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
2211         hw->subsystem_vendor_id =
2212             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2213         hw->subsystem_device_id =
2214             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2215
2216         /* We need this here to set the num_segs below */
2217         ixgbe_set_mac_type(hw);
2218
2219         /* Pick up the 82599 and VF settings */
2220         if (hw->mac.type != ixgbe_mac_82598EB) {
2221                 hw->phy.smart_speed = ixgbe_smart_speed;
2222                 adapter->num_segs = IXGBE_82599_SCATTER;
2223         } else
2224                 adapter->num_segs = IXGBE_82598_SCATTER;
2225
2226         return;
2227 }
2228
2229 /*********************************************************************
2230  *
2231  *  Determine optic type
2232  *
2233  **********************************************************************/
2234 static void
2235 ixgbe_setup_optics(struct adapter *adapter)
2236 {
2237         struct ixgbe_hw *hw = &adapter->hw;
2238         int             layer;
2239
2240         layer = ixgbe_get_supported_physical_layer(hw);
2241
2242         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
2243                 adapter->optics = IFM_10G_T;
2244                 return;
2245         }
2246
2247         if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
2248                 adapter->optics = IFM_1000_T;
2249                 return;
2250         }
2251
2252         if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
2253                 adapter->optics = IFM_1000_SX;
2254                 return;
2255         }
2256
2257         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
2258             IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
2259                 adapter->optics = IFM_10G_LR;
2260                 return;
2261         }
2262
2263         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
2264                 adapter->optics = IFM_10G_SR;
2265                 return;
2266         }
2267
2268         if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
2269                 adapter->optics = IFM_10G_TWINAX;
2270                 return;
2271         }
2272
2273         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
2274             IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
2275                 adapter->optics = IFM_10G_CX4;
2276                 return;
2277         }
2278
2279         /* If we get here just set the default */
2280         adapter->optics = IFM_ETHER | IFM_AUTO;
2281         return;
2282 }
2283
2284 /*********************************************************************
2285  *
2286  *  Setup the Legacy or MSI Interrupt handler
2287  *
2288  **********************************************************************/
2289 static int
2290 ixgbe_allocate_legacy(struct adapter *adapter)
2291 {
2292         device_t        dev = adapter->dev;
2293         struct          ix_queue *que = adapter->queues;
2294 #ifndef IXGBE_LEGACY_TX
2295         struct tx_ring          *txr = adapter->tx_rings;
2296 #endif
2297         int             error, rid = 0;
2298
2299         /* MSI RID at 1 */
2300         if (adapter->msix == 1)
2301                 rid = 1;
2302
2303         /* We allocate a single interrupt resource */
2304         adapter->res = bus_alloc_resource_any(dev,
2305             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2306         if (adapter->res == NULL) {
2307                 device_printf(dev, "Unable to allocate bus resource: "
2308                     "interrupt\n");
2309                 return (ENXIO);
2310         }
2311
2312         /*
2313          * Try allocating a fast interrupt and the associated deferred
2314          * processing contexts.
2315          */
2316 #ifndef IXGBE_LEGACY_TX
2317         TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr);
2318 #endif
2319         TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2320         que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,
2321             taskqueue_thread_enqueue, &que->tq);
2322         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s ixq",
2323             device_get_nameunit(adapter->dev));
2324
2325         /* Tasklets for Link, SFP and Multispeed Fiber */
2326         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2327         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2328         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2329 #ifdef IXGBE_FDIR
2330         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2331 #endif
2332         adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT,
2333             taskqueue_thread_enqueue, &adapter->tq);
2334         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2335             device_get_nameunit(adapter->dev));
2336
2337         if ((error = bus_setup_intr(dev, adapter->res,
2338             INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_legacy_irq,
2339             que, &adapter->tag)) != 0) {
2340                 device_printf(dev, "Failed to register fast interrupt "
2341                     "handler: %d\n", error);
2342                 taskqueue_free(que->tq);
2343                 taskqueue_free(adapter->tq);
2344                 que->tq = NULL;
2345                 adapter->tq = NULL;
2346                 return (error);
2347         }
2348         /* For simplicity in the handlers */
2349         adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
2350
2351         return (0);
2352 }
2353
2354
2355 /*********************************************************************
2356  *
2357  *  Setup MSIX Interrupt resources and handlers 
2358  *
2359  **********************************************************************/
2360 static int
2361 ixgbe_allocate_msix(struct adapter *adapter)
2362 {
2363         device_t        dev = adapter->dev;
2364         struct          ix_queue *que = adapter->queues;
2365         struct          tx_ring *txr = adapter->tx_rings;
2366         int             error, rid, vector = 0;
2367         int             cpu_id = 0;
2368
2369 #ifdef  RSS
2370         /*
2371          * If we're doing RSS, the number of queues needs to
2372          * match the number of RSS buckets that are configured.
2373          *
2374          * + If there's more queues than RSS buckets, we'll end
2375          *   up with queues that get no traffic.
2376          *
2377          * + If there's more RSS buckets than queues, we'll end
2378          *   up having multiple RSS buckets map to the same queue,
2379          *   so there'll be some contention.
2380          */
2381         if (adapter->num_queues != rss_getnumbuckets()) {
2382                 device_printf(dev,
2383                     "%s: number of queues (%d) != number of RSS buckets (%d)"
2384                     "; performance will be impacted.\n",
2385                     __func__,
2386                     adapter->num_queues,
2387                     rss_getnumbuckets());
2388         }
2389 #endif
2390
2391
2392
2393         for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
2394                 rid = vector + 1;
2395                 que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2396                     RF_SHAREABLE | RF_ACTIVE);
2397                 if (que->res == NULL) {
2398                         device_printf(dev,"Unable to allocate"
2399                             " bus resource: que interrupt [%d]\n", vector);
2400                         return (ENXIO);
2401                 }
2402                 /* Set the handler function */
2403                 error = bus_setup_intr(dev, que->res,
2404                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2405                     ixgbe_msix_que, que, &que->tag);
2406                 if (error) {
2407                         que->res = NULL;
2408                         device_printf(dev, "Failed to register QUE handler");
2409                         return (error);
2410                 }
2411 #if __FreeBSD_version >= 800504
2412                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2413 #endif
2414                 que->msix = vector;
2415                 adapter->que_mask |= (u64)(1 << que->msix);
2416 #ifdef  RSS
2417                 /*
2418                  * The queue ID is used as the RSS layer bucket ID.
2419                  * We look up the queue ID -> RSS CPU ID and select
2420                  * that.
2421                  */
2422                 cpu_id = rss_getcpu(i % rss_getnumbuckets());
2423 #else
2424                 /*
2425                  * Bind the msix vector, and thus the
2426                  * rings to the corresponding cpu.
2427                  *
2428                  * This just happens to match the default RSS round-robin
2429                  * bucket -> queue -> CPU allocation.
2430                  */
2431                 if (adapter->num_queues > 1)
2432                         cpu_id = i;
2433 #endif
2434                 if (adapter->num_queues > 1)
2435                         bus_bind_intr(dev, que->res, cpu_id);
2436
2437 #ifdef  RSS
2438                 device_printf(dev,
2439                     "Bound RSS bucket %d to CPU %d\n",
2440                     i, cpu_id);
2441 #else
2442                 device_printf(dev,
2443                     "Bound queue %d to cpu %d\n",
2444                     i, cpu_id);
2445 #endif
2446
2447
2448 #ifndef IXGBE_LEGACY_TX
2449                 TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr);
2450 #endif
2451                 TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2452                 que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,
2453                     taskqueue_thread_enqueue, &que->tq);
2454 #ifdef  RSS
2455                 taskqueue_start_threads_pinned(&que->tq, 1, PI_NET,
2456                     cpu_id,
2457                     "%s (bucket %d)",
2458                     device_get_nameunit(adapter->dev),
2459                     cpu_id);
2460 #else
2461                 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2462                     device_get_nameunit(adapter->dev));
2463 #endif
2464         }
2465
2466         /* and Link */
2467         rid = vector + 1;
2468         adapter->res = bus_alloc_resource_any(dev,
2469             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2470         if (!adapter->res) {
2471                 device_printf(dev,"Unable to allocate"
2472             " bus resource: Link interrupt [%d]\n", rid);
2473                 return (ENXIO);
2474         }
2475         /* Set the link handler function */
2476         error = bus_setup_intr(dev, adapter->res,
2477             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2478             ixgbe_msix_link, adapter, &adapter->tag);
2479         if (error) {
2480                 adapter->res = NULL;
2481                 device_printf(dev, "Failed to register LINK handler");
2482                 return (error);
2483         }
2484 #if __FreeBSD_version >= 800504
2485         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2486 #endif
2487         adapter->linkvec = vector;
2488         /* Tasklets for Link, SFP and Multispeed Fiber */
2489         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2490         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2491         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2492 #ifdef IXGBE_FDIR
2493         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2494 #endif
2495         adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT,
2496             taskqueue_thread_enqueue, &adapter->tq);
2497         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2498             device_get_nameunit(adapter->dev));
2499
2500         return (0);
2501 }
2502
2503 /*
2504  * Setup Either MSI/X or MSI
2505  */
2506 static int
2507 ixgbe_setup_msix(struct adapter *adapter)
2508 {
2509         device_t dev = adapter->dev;
2510         int rid, want, queues, msgs;
2511
2512         /* Override by tuneable */
2513         if (ixgbe_enable_msix == 0)
2514                 goto msi;
2515
2516         /* First try MSI/X */
2517         msgs = pci_msix_count(dev); 
2518         if (msgs == 0)
2519                 goto msi;
2520         rid = PCIR_BAR(MSIX_82598_BAR);
2521         adapter->msix_mem = bus_alloc_resource_any(dev,
2522             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2523         if (adapter->msix_mem == NULL) {
2524                 rid += 4;       /* 82599 maps in higher BAR */
2525                 adapter->msix_mem = bus_alloc_resource_any(dev,
2526                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2527         }
2528         if (adapter->msix_mem == NULL) {
2529                 /* May not be enabled */
2530                 device_printf(adapter->dev,
2531                     "Unable to map MSIX table \n");
2532                 goto msi;
2533         }
2534
2535         /* Figure out a reasonable auto config value */
2536         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2537 #ifdef  RSS
2538         /* If we're doing RSS, clamp at the number of RSS buckets */
2539         if (queues > rss_getnumbuckets())
2540                 queues = rss_getnumbuckets();
2541 #endif
2542
2543         if (ixgbe_num_queues != 0)
2544                 queues = ixgbe_num_queues;
2545         /* Set max queues to 8 when autoconfiguring */
2546         else if ((ixgbe_num_queues == 0) && (queues > 8))
2547                 queues = 8;
2548
2549         /* reflect correct sysctl value */
2550         ixgbe_num_queues = queues;
2551
2552         /*
2553         ** Want one vector (RX/TX pair) per queue
2554         ** plus an additional for Link.
2555         */
2556         want = queues + 1;
2557         if (msgs >= want)
2558                 msgs = want;
2559         else {
2560                 device_printf(adapter->dev,
2561                     "MSIX Configuration Problem, "
2562                     "%d vectors but %d queues wanted!\n",
2563                     msgs, want);
2564                 goto msi;
2565         }
2566         if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2567                 device_printf(adapter->dev,
2568                     "Using MSIX interrupts with %d vectors\n", msgs);
2569                 adapter->num_queues = queues;
2570                 return (msgs);
2571         }
2572         /*
2573         ** If MSIX alloc failed or provided us with
2574         ** less than needed, free and fall through to MSI
2575         */
2576         pci_release_msi(dev);
2577
2578 msi:
2579         if (adapter->msix_mem != NULL) {
2580                 bus_release_resource(dev, SYS_RES_MEMORY,
2581                     rid, adapter->msix_mem);
2582                 adapter->msix_mem = NULL;
2583         }
2584         msgs = 1;
2585         if (pci_alloc_msi(dev, &msgs) == 0) {
2586                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2587                 return (msgs);
2588         }
2589         device_printf(adapter->dev,"Using a Legacy interrupt\n");
2590         return (0);
2591 }
2592
2593
2594 static int
2595 ixgbe_allocate_pci_resources(struct adapter *adapter)
2596 {
2597         int             rid;
2598         device_t        dev = adapter->dev;
2599
2600         rid = PCIR_BAR(0);
2601         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2602             &rid, RF_ACTIVE);
2603
2604         if (!(adapter->pci_mem)) {
2605                 device_printf(dev,"Unable to allocate bus resource: memory\n");
2606                 return (ENXIO);
2607         }
2608
2609         adapter->osdep.mem_bus_space_tag =
2610                 rman_get_bustag(adapter->pci_mem);
2611         adapter->osdep.mem_bus_space_handle =
2612                 rman_get_bushandle(adapter->pci_mem);
2613         adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle;
2614
2615         /* Legacy defaults */
2616         adapter->num_queues = 1;
2617         adapter->hw.back = &adapter->osdep;
2618
2619         /*
2620         ** Now setup MSI or MSI/X, should
2621         ** return us the number of supported
2622         ** vectors. (Will be 1 for MSI)
2623         */
2624         adapter->msix = ixgbe_setup_msix(adapter);
2625         return (0);
2626 }
2627
2628 static void
2629 ixgbe_free_pci_resources(struct adapter * adapter)
2630 {
2631         struct          ix_queue *que = adapter->queues;
2632         device_t        dev = adapter->dev;
2633         int             rid, memrid;
2634
2635         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
2636                 memrid = PCIR_BAR(MSIX_82598_BAR);
2637         else
2638                 memrid = PCIR_BAR(MSIX_82599_BAR);
2639
2640         /*
2641         ** There is a slight possibility of a failure mode
2642         ** in attach that will result in entering this function
2643         ** before interrupt resources have been initialized, and
2644         ** in that case we do not want to execute the loops below
2645         ** We can detect this reliably by the state of the adapter
2646         ** res pointer.
2647         */
2648         if (adapter->res == NULL)
2649                 goto mem;
2650
2651         /*
2652         **  Release all msix queue resources:
2653         */
2654         for (int i = 0; i < adapter->num_queues; i++, que++) {
2655                 rid = que->msix + 1;
2656                 if (que->tag != NULL) {
2657                         bus_teardown_intr(dev, que->res, que->tag);
2658                         que->tag = NULL;
2659                 }
2660                 if (que->res != NULL)
2661                         bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
2662         }
2663
2664
2665         /* Clean the Legacy or Link interrupt last */
2666         if (adapter->linkvec) /* we are doing MSIX */
2667                 rid = adapter->linkvec + 1;
2668         else
2669                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2670
2671         if (adapter->tag != NULL) {
2672                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2673                 adapter->tag = NULL;
2674         }
2675         if (adapter->res != NULL)
2676                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2677
2678 mem:
2679         if (adapter->msix)
2680                 pci_release_msi(dev);
2681
2682         if (adapter->msix_mem != NULL)
2683                 bus_release_resource(dev, SYS_RES_MEMORY,
2684                     memrid, adapter->msix_mem);
2685
2686         if (adapter->pci_mem != NULL)
2687                 bus_release_resource(dev, SYS_RES_MEMORY,
2688                     PCIR_BAR(0), adapter->pci_mem);
2689
2690         return;
2691 }
2692
2693 /*********************************************************************
2694  *
2695  *  Setup networking device structure and register an interface.
2696  *
2697  **********************************************************************/
2698 static int
2699 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
2700 {
2701         struct ixgbe_hw *hw = &adapter->hw;
2702         struct ifnet   *ifp;
2703
2704         INIT_DEBUGOUT("ixgbe_setup_interface: begin");
2705
2706         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2707         if (ifp == NULL) {
2708                 device_printf(dev, "can not allocate ifnet structure\n");
2709                 return (-1);
2710         }
2711         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2712         ifp->if_baudrate = IF_Gbps(10);
2713         ifp->if_init = ixgbe_init;
2714         ifp->if_softc = adapter;
2715         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2716         ifp->if_ioctl = ixgbe_ioctl;
2717 #ifndef IXGBE_LEGACY_TX
2718         ifp->if_transmit = ixgbe_mq_start;
2719         ifp->if_qflush = ixgbe_qflush;
2720 #else
2721         ifp->if_start = ixgbe_start;
2722         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
2723         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2;
2724         IFQ_SET_READY(&ifp->if_snd);
2725 #endif
2726
2727         ether_ifattach(ifp, adapter->hw.mac.addr);
2728
2729         adapter->max_frame_size =
2730             ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
2731
2732         /*
2733          * Tell the upper layer(s) we support long frames.
2734          */
2735         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2736
2737         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWCSUM;
2738         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2739         ifp->if_capabilities |= IFCAP_LRO;
2740         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2741                              |  IFCAP_VLAN_HWTSO
2742                              |  IFCAP_VLAN_MTU
2743                              |  IFCAP_HWSTATS;
2744         ifp->if_capenable = ifp->if_capabilities;
2745
2746         /*
2747         ** Don't turn this on by default, if vlans are
2748         ** created on another pseudo device (eg. lagg)
2749         ** then vlan events are not passed thru, breaking
2750         ** operation, but with HW FILTER off it works. If
2751         ** using vlans directly on the ixgbe driver you can
2752         ** enable this and get full hardware tag filtering.
2753         */
2754         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2755
2756         /*
2757          * Specify the media types supported by this adapter and register
2758          * callbacks to update media and link information
2759          */
2760         ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
2761                      ixgbe_media_status);
2762         ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
2763         ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
2764         if (hw->device_id == IXGBE_DEV_ID_82598AT) {
2765                 ifmedia_add(&adapter->media,
2766                     IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2767                 ifmedia_add(&adapter->media,
2768                     IFM_ETHER | IFM_1000_T, 0, NULL);
2769         }
2770         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2771         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2772
2773         return (0);
2774 }
2775
2776 static void
2777 ixgbe_config_link(struct adapter *adapter)
2778 {
2779         struct ixgbe_hw *hw = &adapter->hw;
2780         u32     autoneg, err = 0;
2781         bool    sfp, negotiate;
2782
2783         sfp = ixgbe_is_sfp(hw);
2784
2785         if (sfp) { 
2786                 if (hw->phy.multispeed_fiber) {
2787                         hw->mac.ops.setup_sfp(hw);
2788                         ixgbe_enable_tx_laser(hw);
2789                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
2790                 } else
2791                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
2792         } else {
2793                 if (hw->mac.ops.check_link)
2794                         err = ixgbe_check_link(hw, &adapter->link_speed,
2795                             &adapter->link_up, FALSE);
2796                 if (err)
2797                         goto out;
2798                 autoneg = hw->phy.autoneg_advertised;
2799                 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
2800                         err  = hw->mac.ops.get_link_capabilities(hw,
2801                             &autoneg, &negotiate);
2802                 if (err)
2803                         goto out;
2804                 if (hw->mac.ops.setup_link)
2805                         err = hw->mac.ops.setup_link(hw,
2806                             autoneg, adapter->link_up);
2807         }
2808 out:
2809         return;
2810 }
2811
2812 /********************************************************************
2813  * Manage DMA'able memory.
2814  *******************************************************************/
2815 static void
2816 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2817 {
2818         if (error)
2819                 return;
2820         *(bus_addr_t *) arg = segs->ds_addr;
2821         return;
2822 }
2823
2824 static int
2825 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2826                 struct ixgbe_dma_alloc *dma, int mapflags)
2827 {
2828         device_t dev = adapter->dev;
2829         int             r;
2830
2831         r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),   /* parent */
2832                                DBA_ALIGN, 0,    /* alignment, bounds */
2833                                BUS_SPACE_MAXADDR,       /* lowaddr */
2834                                BUS_SPACE_MAXADDR,       /* highaddr */
2835                                NULL, NULL,      /* filter, filterarg */
2836                                size,    /* maxsize */
2837                                1,       /* nsegments */
2838                                size,    /* maxsegsize */
2839                                BUS_DMA_ALLOCNOW,        /* flags */
2840                                NULL,    /* lockfunc */
2841                                NULL,    /* lockfuncarg */
2842                                &dma->dma_tag);
2843         if (r != 0) {
2844                 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2845                        "error %u\n", r);
2846                 goto fail_0;
2847         }
2848         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2849                              BUS_DMA_NOWAIT, &dma->dma_map);
2850         if (r != 0) {
2851                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2852                        "error %u\n", r);
2853                 goto fail_1;
2854         }
2855         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2856                             size,
2857                             ixgbe_dmamap_cb,
2858                             &dma->dma_paddr,
2859                             mapflags | BUS_DMA_NOWAIT);
2860         if (r != 0) {
2861                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2862                        "error %u\n", r);
2863                 goto fail_2;
2864         }
2865         dma->dma_size = size;
2866         return (0);
2867 fail_2:
2868         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2869 fail_1:
2870         bus_dma_tag_destroy(dma->dma_tag);
2871 fail_0:
2872         dma->dma_tag = NULL;
2873         return (r);
2874 }
2875
2876 static void
2877 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2878 {
2879         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2880             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2881         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2882         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2883         bus_dma_tag_destroy(dma->dma_tag);
2884 }
2885
2886
2887 /*********************************************************************
2888  *
2889  *  Allocate memory for the transmit and receive rings, and then
2890  *  the descriptors associated with each, called only once at attach.
2891  *
2892  **********************************************************************/
2893 static int
2894 ixgbe_allocate_queues(struct adapter *adapter)
2895 {
2896         device_t        dev = adapter->dev;
2897         struct ix_queue *que;
2898         struct tx_ring  *txr;
2899         struct rx_ring  *rxr;
2900         int rsize, tsize, error = IXGBE_SUCCESS;
2901         int txconf = 0, rxconf = 0;
2902
2903         /* First allocate the top level queue structs */
2904         if (!(adapter->queues =
2905             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2906             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2907                 device_printf(dev, "Unable to allocate queue memory\n");
2908                 error = ENOMEM;
2909                 goto fail;
2910         }
2911
2912         /* First allocate the TX ring struct memory */
2913         if (!(adapter->tx_rings =
2914             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2915             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2916                 device_printf(dev, "Unable to allocate TX ring memory\n");
2917                 error = ENOMEM;
2918                 goto tx_fail;
2919         }
2920
2921         /* Next allocate the RX */
2922         if (!(adapter->rx_rings =
2923             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2924             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2925                 device_printf(dev, "Unable to allocate RX ring memory\n");
2926                 error = ENOMEM;
2927                 goto rx_fail;
2928         }
2929
2930         /* For the ring itself */
2931         tsize = roundup2(adapter->num_tx_desc *
2932             sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2933
2934         /*
2935          * Now set up the TX queues, txconf is needed to handle the
2936          * possibility that things fail midcourse and we need to
2937          * undo memory gracefully
2938          */ 
2939         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2940                 /* Set up some basics */
2941                 txr = &adapter->tx_rings[i];
2942                 txr->adapter = adapter;
2943                 txr->me = i;
2944                 txr->num_desc = adapter->num_tx_desc;
2945
2946                 /* Initialize the TX side lock */
2947                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2948                     device_get_nameunit(dev), txr->me);
2949                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2950
2951                 if (ixgbe_dma_malloc(adapter, tsize,
2952                         &txr->txdma, BUS_DMA_NOWAIT)) {
2953                         device_printf(dev,
2954                             "Unable to allocate TX Descriptor memory\n");
2955                         error = ENOMEM;
2956                         goto err_tx_desc;
2957                 }
2958                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2959                 bzero((void *)txr->tx_base, tsize);
2960
2961                 /* Now allocate transmit buffers for the ring */
2962                 if (ixgbe_allocate_transmit_buffers(txr)) {
2963                         device_printf(dev,
2964                             "Critical Failure setting up transmit buffers\n");
2965                         error = ENOMEM;
2966                         goto err_tx_desc;
2967                 }
2968 #ifndef IXGBE_LEGACY_TX
2969                 /* Allocate a buf ring */
2970                 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2971                     M_WAITOK, &txr->tx_mtx);
2972                 if (txr->br == NULL) {
2973                         device_printf(dev,
2974                             "Critical Failure setting up buf ring\n");
2975                         error = ENOMEM;
2976                         goto err_tx_desc;
2977                 }
2978 #endif
2979         }
2980
2981         /*
2982          * Next the RX queues...
2983          */ 
2984         rsize = roundup2(adapter->num_rx_desc *
2985             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2986         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2987                 rxr = &adapter->rx_rings[i];
2988                 /* Set up some basics */
2989                 rxr->adapter = adapter;
2990                 rxr->me = i;
2991                 rxr->num_desc = adapter->num_rx_desc;
2992
2993                 /* Initialize the RX side lock */
2994                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2995                     device_get_nameunit(dev), rxr->me);
2996                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2997
2998                 if (ixgbe_dma_malloc(adapter, rsize,
2999                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3000                         device_printf(dev,
3001                             "Unable to allocate RxDescriptor memory\n");
3002                         error = ENOMEM;
3003                         goto err_rx_desc;
3004                 }
3005                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3006                 bzero((void *)rxr->rx_base, rsize);
3007
3008                 /* Allocate receive buffers for the ring*/
3009                 if (ixgbe_allocate_receive_buffers(rxr)) {
3010                         device_printf(dev,
3011                             "Critical Failure setting up receive buffers\n");
3012                         error = ENOMEM;
3013                         goto err_rx_desc;
3014                 }
3015         }
3016
3017         /*
3018         ** Finally set up the queue holding structs
3019         */
3020         for (int i = 0; i < adapter->num_queues; i++) {
3021                 que = &adapter->queues[i];
3022                 que->adapter = adapter;
3023                 que->txr = &adapter->tx_rings[i];
3024                 que->rxr = &adapter->rx_rings[i];
3025         }
3026
3027         return (0);
3028
3029 err_rx_desc:
3030         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3031                 ixgbe_dma_free(adapter, &rxr->rxdma);
3032 err_tx_desc:
3033         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3034                 ixgbe_dma_free(adapter, &txr->txdma);
3035         free(adapter->rx_rings, M_DEVBUF);
3036 rx_fail:
3037         free(adapter->tx_rings, M_DEVBUF);
3038 tx_fail:
3039         free(adapter->queues, M_DEVBUF);
3040 fail:
3041         return (error);
3042 }
3043
3044 /*********************************************************************
3045  *
3046  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3047  *  the information needed to transmit a packet on the wire. This is
3048  *  called only once at attach, setup is done every reset.
3049  *
3050  **********************************************************************/
3051 static int
3052 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
3053 {
3054         struct adapter *adapter = txr->adapter;
3055         device_t dev = adapter->dev;
3056         struct ixgbe_tx_buf *txbuf;
3057         int error, i;
3058
3059         /*
3060          * Setup DMA descriptor areas.
3061          */
3062         if ((error = bus_dma_tag_create(
3063                                bus_get_dma_tag(adapter->dev),   /* parent */
3064                                1, 0,            /* alignment, bounds */
3065                                BUS_SPACE_MAXADDR,       /* lowaddr */
3066                                BUS_SPACE_MAXADDR,       /* highaddr */
3067                                NULL, NULL,              /* filter, filterarg */
3068                                IXGBE_TSO_SIZE,          /* maxsize */
3069                                adapter->num_segs,       /* nsegments */
3070                                PAGE_SIZE,               /* maxsegsize */
3071                                0,                       /* flags */
3072                                NULL,                    /* lockfunc */
3073                                NULL,                    /* lockfuncarg */
3074                                &txr->txtag))) {
3075                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3076                 goto fail;
3077         }
3078
3079         if (!(txr->tx_buffers =
3080             (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
3081             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3082                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3083                 error = ENOMEM;
3084                 goto fail;
3085         }
3086
3087         /* Create the descriptor buffer dma maps */
3088         txbuf = txr->tx_buffers;
3089         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3090                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3091                 if (error != 0) {
3092                         device_printf(dev, "Unable to create TX DMA map\n");
3093                         goto fail;
3094                 }
3095         }
3096
3097         return 0;
3098 fail:
3099         /* We free all, it handles case where we are in the middle */
3100         ixgbe_free_transmit_structures(adapter);
3101         return (error);
3102 }
3103
3104 /*********************************************************************
3105  *
3106  *  Initialize a transmit ring.
3107  *
3108  **********************************************************************/
3109 static void
3110 ixgbe_setup_transmit_ring(struct tx_ring *txr)
3111 {
3112         struct adapter *adapter = txr->adapter;
3113         struct ixgbe_tx_buf *txbuf;
3114         int i;
3115 #ifdef DEV_NETMAP
3116         struct netmap_adapter *na = NA(adapter->ifp);
3117         struct netmap_slot *slot;
3118 #endif /* DEV_NETMAP */
3119
3120         /* Clear the old ring contents */
3121         IXGBE_TX_LOCK(txr);
3122 #ifdef DEV_NETMAP
3123         /*
3124          * (under lock): if in netmap mode, do some consistency
3125          * checks and set slot to entry 0 of the netmap ring.
3126          */
3127         slot = netmap_reset(na, NR_TX, txr->me, 0);
3128 #endif /* DEV_NETMAP */
3129         bzero((void *)txr->tx_base,
3130               (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
3131         /* Reset indices */
3132         txr->next_avail_desc = 0;
3133         txr->next_to_clean = 0;
3134
3135         /* Free any existing tx buffers. */
3136         txbuf = txr->tx_buffers;
3137         for (i = 0; i < txr->num_desc; i++, txbuf++) {
3138                 if (txbuf->m_head != NULL) {
3139                         bus_dmamap_sync(txr->txtag, txbuf->map,
3140                             BUS_DMASYNC_POSTWRITE);
3141                         bus_dmamap_unload(txr->txtag, txbuf->map);
3142                         m_freem(txbuf->m_head);
3143                         txbuf->m_head = NULL;
3144                 }
3145 #ifdef DEV_NETMAP
3146                 /*
3147                  * In netmap mode, set the map for the packet buffer.
3148                  * NOTE: Some drivers (not this one) also need to set
3149                  * the physical buffer address in the NIC ring.
3150                  * Slots in the netmap ring (indexed by "si") are
3151                  * kring->nkr_hwofs positions "ahead" wrt the
3152                  * corresponding slot in the NIC ring. In some drivers
3153                  * (not here) nkr_hwofs can be negative. Function
3154                  * netmap_idx_n2k() handles wraparounds properly.
3155                  */
3156                 if (slot) {
3157                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3158                         netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3159                 }
3160 #endif /* DEV_NETMAP */
3161                 /* Clear the EOP descriptor pointer */
3162                 txbuf->eop = NULL;
3163         }
3164
3165 #ifdef IXGBE_FDIR
3166         /* Set the rate at which we sample packets */
3167         if (adapter->hw.mac.type != ixgbe_mac_82598EB)
3168                 txr->atr_sample = atr_sample_rate;
3169 #endif
3170
3171         /* Set number of descriptors available */
3172         txr->tx_avail = adapter->num_tx_desc;
3173
3174         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3175             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3176         IXGBE_TX_UNLOCK(txr);
3177 }
3178
3179 /*********************************************************************
3180  *
3181  *  Initialize all transmit rings.
3182  *
3183  **********************************************************************/
3184 static int
3185 ixgbe_setup_transmit_structures(struct adapter *adapter)
3186 {
3187         struct tx_ring *txr = adapter->tx_rings;
3188
3189         for (int i = 0; i < adapter->num_queues; i++, txr++)
3190                 ixgbe_setup_transmit_ring(txr);
3191
3192         return (0);
3193 }
3194
3195 /*********************************************************************
3196  *
3197  *  Enable transmit unit.
3198  *
3199  **********************************************************************/
3200 static void
3201 ixgbe_initialize_transmit_units(struct adapter *adapter)
3202 {
3203         struct tx_ring  *txr = adapter->tx_rings;
3204         struct ixgbe_hw *hw = &adapter->hw;
3205
3206         /* Setup the Base and Length of the Tx Descriptor Ring */
3207
3208         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3209                 u64     tdba = txr->txdma.dma_paddr;
3210                 u32     txctrl;
3211
3212                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
3213                        (tdba & 0x00000000ffffffffULL));
3214                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
3215                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
3216                     adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
3217
3218                 /* Setup the HW Tx Head and Tail descriptor pointers */
3219                 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
3220                 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
3221
3222                 /* Setup Transmit Descriptor Cmd Settings */
3223                 txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
3224                 txr->queue_status = IXGBE_QUEUE_IDLE;
3225
3226                 /* Set the processing limit */
3227                 txr->process_limit = ixgbe_tx_process_limit;
3228
3229                 /* Disable Head Writeback */
3230                 switch (hw->mac.type) {
3231                 case ixgbe_mac_82598EB:
3232                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
3233                         break;
3234                 case ixgbe_mac_82599EB:
3235                 case ixgbe_mac_X540:
3236                 default:
3237                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
3238                         break;
3239                 }
3240                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3241                 switch (hw->mac.type) {
3242                 case ixgbe_mac_82598EB:
3243                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
3244                         break;
3245                 case ixgbe_mac_82599EB:
3246                 case ixgbe_mac_X540:
3247                 default:
3248                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
3249                         break;
3250                 }
3251
3252         }
3253
3254         if (hw->mac.type != ixgbe_mac_82598EB) {
3255                 u32 dmatxctl, rttdcs;
3256                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3257                 dmatxctl |= IXGBE_DMATXCTL_TE;
3258                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3259                 /* Disable arbiter to set MTQC */
3260                 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3261                 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3262                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3263                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
3264                 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3265                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3266         }
3267
3268         return;
3269 }
3270
3271 /*********************************************************************
3272  *
3273  *  Free all transmit rings.
3274  *
3275  **********************************************************************/
3276 static void
3277 ixgbe_free_transmit_structures(struct adapter *adapter)
3278 {
3279         struct tx_ring *txr = adapter->tx_rings;
3280
3281         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3282                 IXGBE_TX_LOCK(txr);
3283                 ixgbe_free_transmit_buffers(txr);
3284                 ixgbe_dma_free(adapter, &txr->txdma);
3285                 IXGBE_TX_UNLOCK(txr);
3286                 IXGBE_TX_LOCK_DESTROY(txr);
3287         }
3288         free(adapter->tx_rings, M_DEVBUF);
3289 }
3290
3291 /*********************************************************************
3292  *
3293  *  Free transmit ring related data structures.
3294  *
3295  **********************************************************************/
3296 static void
3297 ixgbe_free_transmit_buffers(struct tx_ring *txr)
3298 {
3299         struct adapter *adapter = txr->adapter;
3300         struct ixgbe_tx_buf *tx_buffer;
3301         int             i;
3302
3303         INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
3304
3305         if (txr->tx_buffers == NULL)
3306                 return;
3307
3308         tx_buffer = txr->tx_buffers;
3309         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3310                 if (tx_buffer->m_head != NULL) {
3311                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3312                             BUS_DMASYNC_POSTWRITE);
3313                         bus_dmamap_unload(txr->txtag,
3314                             tx_buffer->map);
3315                         m_freem(tx_buffer->m_head);
3316                         tx_buffer->m_head = NULL;
3317                         if (tx_buffer->map != NULL) {
3318                                 bus_dmamap_destroy(txr->txtag,
3319                                     tx_buffer->map);
3320                                 tx_buffer->map = NULL;
3321                         }
3322                 } else if (tx_buffer->map != NULL) {
3323                         bus_dmamap_unload(txr->txtag,
3324                             tx_buffer->map);
3325                         bus_dmamap_destroy(txr->txtag,
3326                             tx_buffer->map);
3327                         tx_buffer->map = NULL;
3328                 }
3329         }
3330 #ifdef IXGBE_LEGACY_TX
3331         if (txr->br != NULL)
3332                 buf_ring_free(txr->br, M_DEVBUF);
3333 #endif
3334         if (txr->tx_buffers != NULL) {
3335                 free(txr->tx_buffers, M_DEVBUF);
3336                 txr->tx_buffers = NULL;
3337         }
3338         if (txr->txtag != NULL) {
3339                 bus_dma_tag_destroy(txr->txtag);
3340                 txr->txtag = NULL;
3341         }
3342         return;
3343 }
3344
3345 /*********************************************************************
3346  *
3347  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3348  *
3349  **********************************************************************/
3350
3351 static int
3352 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3353     u32 *cmd_type_len, u32 *olinfo_status)
3354 {
3355         struct ixgbe_adv_tx_context_desc *TXD;
3356         struct ether_vlan_header *eh;
3357         struct ip *ip;
3358         struct ip6_hdr *ip6;
3359         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3360         int     ehdrlen, ip_hlen = 0;
3361         u16     etype;
3362         u8      ipproto = 0;
3363         int     offload = TRUE;
3364         int     ctxd = txr->next_avail_desc;
3365         u16     vtag = 0;
3366
3367         /* First check if TSO is to be used */
3368         if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3369                 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3370
3371         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3372                 offload = FALSE;
3373
3374         /* Indicate the whole packet as payload when not doing TSO */
3375         *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
3376
3377         /* Now ready a context descriptor */
3378         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3379
3380         /*
3381         ** In advanced descriptors the vlan tag must 
3382         ** be placed into the context descriptor. Hence
3383         ** we need to make one even if not doing offloads.
3384         */
3385         if (mp->m_flags & M_VLANTAG) {
3386                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3387                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3388         } else if (offload == FALSE) /* ... no offload to do */
3389                 return (0);
3390
3391         /*
3392          * Determine where frame payload starts.
3393          * Jump over vlan headers if already present,
3394          * helpful for QinQ too.
3395          */
3396         eh = mtod(mp, struct ether_vlan_header *);
3397         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3398                 etype = ntohs(eh->evl_proto);
3399                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3400         } else {
3401                 etype = ntohs(eh->evl_encap_proto);
3402                 ehdrlen = ETHER_HDR_LEN;
3403         }
3404
3405         /* Set the ether header length */
3406         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3407
3408         switch (etype) {
3409                 case ETHERTYPE_IP:
3410                         ip = (struct ip *)(mp->m_data + ehdrlen);
3411                         ip_hlen = ip->ip_hl << 2;
3412                         ipproto = ip->ip_p;
3413                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3414                         break;
3415                 case ETHERTYPE_IPV6:
3416                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3417                         ip_hlen = sizeof(struct ip6_hdr);
3418                         /* XXX-BZ this will go badly in case of ext hdrs. */
3419                         ipproto = ip6->ip6_nxt;
3420                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3421                         break;
3422                 default:
3423                         offload = FALSE;
3424                         break;
3425         }
3426
3427         vlan_macip_lens |= ip_hlen;
3428         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3429
3430         switch (ipproto) {
3431                 case IPPROTO_TCP:
3432                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3433                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3434                         break;
3435
3436                 case IPPROTO_UDP:
3437                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3438                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
3439                         break;
3440
3441 #if __FreeBSD_version >= 800000
3442                 case IPPROTO_SCTP:
3443                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3444                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
3445                         break;
3446 #endif
3447                 default:
3448                         offload = FALSE;
3449                         break;
3450         }
3451
3452         if (offload) /* For the TX descriptor setup */
3453                 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3454
3455         /* Now copy bits into descriptor */
3456         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3457         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3458         TXD->seqnum_seed = htole32(0);
3459         TXD->mss_l4len_idx = htole32(0);
3460
3461         /* We've consumed the first desc, adjust counters */
3462         if (++ctxd == txr->num_desc)
3463                 ctxd = 0;
3464         txr->next_avail_desc = ctxd;
3465         --txr->tx_avail;
3466
3467         return (0);
3468 }
3469
3470 /**********************************************************************
3471  *
3472  *  Setup work for hardware segmentation offload (TSO) on
3473  *  adapters using advanced tx descriptors
3474  *
3475  **********************************************************************/
3476 static int
3477 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3478     u32 *cmd_type_len, u32 *olinfo_status)
3479 {
3480         struct ixgbe_adv_tx_context_desc *TXD;
3481         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3482         u32 mss_l4len_idx = 0, paylen;
3483         u16 vtag = 0, eh_type;
3484         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3485         struct ether_vlan_header *eh;
3486 #ifdef INET6
3487         struct ip6_hdr *ip6;
3488 #endif
3489 #ifdef INET
3490         struct ip *ip;
3491 #endif
3492         struct tcphdr *th;
3493
3494
3495         /*
3496          * Determine where frame payload starts.
3497          * Jump over vlan headers if already present
3498          */
3499         eh = mtod(mp, struct ether_vlan_header *);
3500         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3501                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3502                 eh_type = eh->evl_proto;
3503         } else {
3504                 ehdrlen = ETHER_HDR_LEN;
3505                 eh_type = eh->evl_encap_proto;
3506         }
3507
3508         switch (ntohs(eh_type)) {
3509 #ifdef INET6
3510         case ETHERTYPE_IPV6:
3511                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3512                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3513                 if (ip6->ip6_nxt != IPPROTO_TCP)
3514                         return (ENXIO);
3515                 ip_hlen = sizeof(struct ip6_hdr);
3516                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3517                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3518                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3519                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3520                 break;
3521 #endif
3522 #ifdef INET
3523         case ETHERTYPE_IP:
3524                 ip = (struct ip *)(mp->m_data + ehdrlen);
3525                 if (ip->ip_p != IPPROTO_TCP)
3526                         return (ENXIO);
3527                 ip->ip_sum = 0;
3528                 ip_hlen = ip->ip_hl << 2;
3529                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3530                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3531                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3532                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3533                 /* Tell transmit desc to also do IPv4 checksum. */
3534                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3535                 break;
3536 #endif
3537         default:
3538                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3539                     __func__, ntohs(eh_type));
3540                 break;
3541         }
3542
3543         ctxd = txr->next_avail_desc;
3544         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3545
3546         tcp_hlen = th->th_off << 2;
3547
3548         /* This is used in the transmit desc in encap */
3549         paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3550
3551         /* VLAN MACLEN IPLEN */
3552         if (mp->m_flags & M_VLANTAG) {
3553                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3554                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3555         }
3556
3557         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3558         vlan_macip_lens |= ip_hlen;
3559         TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3560
3561         /* ADV DTYPE TUCMD */
3562         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3563         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3564         TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3565
3566         /* MSS L4LEN IDX */
3567         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
3568         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
3569         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3570
3571         TXD->seqnum_seed = htole32(0);
3572
3573         if (++ctxd == txr->num_desc)
3574                 ctxd = 0;
3575
3576         txr->tx_avail--;
3577         txr->next_avail_desc = ctxd;
3578         *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
3579         *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3580         *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
3581         ++txr->tso_tx;
3582         return (0);
3583 }
3584
3585 #ifdef IXGBE_FDIR
3586 /*
3587 ** This routine parses packet headers so that Flow
3588 ** Director can make a hashed filter table entry 
3589 ** allowing traffic flows to be identified and kept
3590 ** on the same cpu.  This would be a performance
3591 ** hit, but we only do it at IXGBE_FDIR_RATE of
3592 ** packets.
3593 */
3594 static void
3595 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
3596 {
3597         struct adapter                  *adapter = txr->adapter;
3598         struct ix_queue                 *que;
3599         struct ip                       *ip;
3600         struct tcphdr                   *th;
3601         struct udphdr                   *uh;
3602         struct ether_vlan_header        *eh;
3603         union ixgbe_atr_hash_dword      input = {.dword = 0}; 
3604         union ixgbe_atr_hash_dword      common = {.dword = 0}; 
3605         int                             ehdrlen, ip_hlen;
3606         u16                             etype;
3607
3608         eh = mtod(mp, struct ether_vlan_header *);
3609         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3610                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3611                 etype = eh->evl_proto;
3612         } else {
3613                 ehdrlen = ETHER_HDR_LEN;
3614                 etype = eh->evl_encap_proto;
3615         }
3616
3617         /* Only handling IPv4 */
3618         if (etype != htons(ETHERTYPE_IP))
3619                 return;
3620
3621         ip = (struct ip *)(mp->m_data + ehdrlen);
3622         ip_hlen = ip->ip_hl << 2;
3623
3624         /* check if we're UDP or TCP */
3625         switch (ip->ip_p) {
3626         case IPPROTO_TCP:
3627                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3628                 /* src and dst are inverted */
3629                 common.port.dst ^= th->th_sport;
3630                 common.port.src ^= th->th_dport;
3631                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
3632                 break;
3633         case IPPROTO_UDP:
3634                 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
3635                 /* src and dst are inverted */
3636                 common.port.dst ^= uh->uh_sport;
3637                 common.port.src ^= uh->uh_dport;
3638                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
3639                 break;
3640         default:
3641                 return;
3642         }
3643
3644         input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
3645         if (mp->m_pkthdr.ether_vtag)
3646                 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
3647         else
3648                 common.flex_bytes ^= etype;
3649         common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
3650
3651         que = &adapter->queues[txr->me];
3652         /*
3653         ** This assumes the Rx queue and Tx
3654         ** queue are bound to the same CPU
3655         */
3656         ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
3657             input, common, que->msix);
3658 }
3659 #endif /* IXGBE_FDIR */
3660
3661 /**********************************************************************
3662  *
3663  *  Examine each tx_buffer in the used queue. If the hardware is done
3664  *  processing the packet then free associated resources. The
3665  *  tx_buffer is put back on the free queue.
3666  *
3667  **********************************************************************/
3668 static void
3669 ixgbe_txeof(struct tx_ring *txr)
3670 {
3671 #ifdef DEV_NETMAP
3672         struct adapter          *adapter = txr->adapter;
3673         struct ifnet            *ifp = adapter->ifp;
3674 #endif
3675         u32                     work, processed = 0;
3676         u16                     limit = txr->process_limit;
3677         struct ixgbe_tx_buf     *buf;
3678         union ixgbe_adv_tx_desc *txd;
3679
3680         mtx_assert(&txr->tx_mtx, MA_OWNED);
3681
3682 #ifdef DEV_NETMAP
3683         if (ifp->if_capenable & IFCAP_NETMAP) {
3684                 struct netmap_adapter *na = NA(ifp);
3685                 struct netmap_kring *kring = &na->tx_rings[txr->me];
3686                 txd = txr->tx_base;
3687                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3688                     BUS_DMASYNC_POSTREAD);
3689                 /*
3690                  * In netmap mode, all the work is done in the context
3691                  * of the client thread. Interrupt handlers only wake up
3692                  * clients, which may be sleeping on individual rings
3693                  * or on a global resource for all rings.
3694                  * To implement tx interrupt mitigation, we wake up the client
3695                  * thread roughly every half ring, even if the NIC interrupts
3696                  * more frequently. This is implemented as follows:
3697                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
3698                  *   the slot that should wake up the thread (nkr_num_slots
3699                  *   means the user thread should not be woken up);
3700                  * - the driver ignores tx interrupts unless netmap_mitigate=0
3701                  *   or the slot has the DD bit set.
3702                  */
3703                 if (!netmap_mitigate ||
3704                     (kring->nr_kflags < kring->nkr_num_slots &&
3705                     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
3706                         netmap_tx_irq(ifp, txr->me);
3707                 }
3708                 return;
3709         }
3710 #endif /* DEV_NETMAP */
3711
3712         if (txr->tx_avail == txr->num_desc) {
3713                 txr->queue_status = IXGBE_QUEUE_IDLE;
3714                 return;
3715         }
3716
3717         /* Get work starting point */
3718         work = txr->next_to_clean;
3719         buf = &txr->tx_buffers[work];
3720         txd = &txr->tx_base[work];
3721         work -= txr->num_desc; /* The distance to ring end */
3722         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3723             BUS_DMASYNC_POSTREAD);
3724
3725         do {
3726                 union ixgbe_adv_tx_desc *eop= buf->eop;
3727                 if (eop == NULL) /* No work */
3728                         break;
3729
3730                 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
3731                         break;  /* I/O not complete */
3732
3733                 if (buf->m_head) {
3734                         txr->bytes +=
3735                             buf->m_head->m_pkthdr.len;
3736                         bus_dmamap_sync(txr->txtag,
3737                             buf->map,
3738                             BUS_DMASYNC_POSTWRITE);
3739                         bus_dmamap_unload(txr->txtag,
3740                             buf->map);
3741                         m_freem(buf->m_head);
3742                         buf->m_head = NULL;
3743                         buf->map = NULL;
3744                 }
3745                 buf->eop = NULL;
3746                 ++txr->tx_avail;
3747
3748                 /* We clean the range if multi segment */
3749                 while (txd != eop) {
3750                         ++txd;
3751                         ++buf;
3752                         ++work;
3753                         /* wrap the ring? */
3754                         if (__predict_false(!work)) {
3755                                 work -= txr->num_desc;
3756                                 buf = txr->tx_buffers;
3757                                 txd = txr->tx_base;
3758                         }
3759                         if (buf->m_head) {
3760                                 txr->bytes +=
3761                                     buf->m_head->m_pkthdr.len;
3762                                 bus_dmamap_sync(txr->txtag,
3763                                     buf->map,
3764                                     BUS_DMASYNC_POSTWRITE);
3765                                 bus_dmamap_unload(txr->txtag,
3766                                     buf->map);
3767                                 m_freem(buf->m_head);
3768                                 buf->m_head = NULL;
3769                                 buf->map = NULL;
3770                         }
3771                         ++txr->tx_avail;
3772                         buf->eop = NULL;
3773
3774                 }
3775                 ++txr->packets;
3776                 ++processed;
3777                 txr->watchdog_time = ticks;
3778
3779                 /* Try the next packet */
3780                 ++txd;
3781                 ++buf;
3782                 ++work;
3783                 /* reset with a wrap */
3784                 if (__predict_false(!work)) {
3785                         work -= txr->num_desc;
3786                         buf = txr->tx_buffers;
3787                         txd = txr->tx_base;
3788                 }
3789                 prefetch(txd);
3790         } while (__predict_true(--limit));
3791
3792         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3793             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3794
3795         work += txr->num_desc;
3796         txr->next_to_clean = work;
3797
3798         /*
3799         ** Watchdog calculation, we know there's
3800         ** work outstanding or the first return
3801         ** would have been taken, so none processed
3802         ** for too long indicates a hang.
3803         */
3804         if ((!processed) && ((ticks - txr->watchdog_time) > IXGBE_WATCHDOG))
3805                 txr->queue_status = IXGBE_QUEUE_HUNG;
3806
3807         if (txr->tx_avail == txr->num_desc)
3808                 txr->queue_status = IXGBE_QUEUE_IDLE;
3809
3810         return;
3811 }
3812
3813 /*********************************************************************
3814  *
3815  *  Refresh mbuf buffers for RX descriptor rings
3816  *   - now keeps its own state so discards due to resource
3817  *     exhaustion are unnecessary, if an mbuf cannot be obtained
3818  *     it just returns, keeping its placeholder, thus it can simply
3819  *     be recalled to try again.
3820  *
3821  **********************************************************************/
3822 static void
3823 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
3824 {
3825         struct adapter          *adapter = rxr->adapter;
3826         bus_dma_segment_t       seg[1];
3827         struct ixgbe_rx_buf     *rxbuf;
3828         struct mbuf             *mp;
3829         int                     i, j, nsegs, error;
3830         bool                    refreshed = FALSE;
3831
3832         i = j = rxr->next_to_refresh;
3833         /* Control the loop with one beyond */
3834         if (++j == rxr->num_desc)
3835                 j = 0;
3836
3837         while (j != limit) {
3838                 rxbuf = &rxr->rx_buffers[i];
3839                 if (rxbuf->buf == NULL) {
3840                         mp = m_getjcl(M_NOWAIT, MT_DATA,
3841                             M_PKTHDR, rxr->mbuf_sz);
3842                         if (mp == NULL)
3843                                 goto update;
3844                         if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3845                                 m_adj(mp, ETHER_ALIGN);
3846                 } else
3847                         mp = rxbuf->buf;
3848
3849                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
3850
3851                 /* If we're dealing with an mbuf that was copied rather
3852                  * than replaced, there's no need to go through busdma.
3853                  */
3854                 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
3855                         /* Get the memory mapping */
3856                         error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3857                             rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
3858                         if (error != 0) {
3859                                 printf("Refresh mbufs: payload dmamap load"
3860                                     " failure - %d\n", error);
3861                                 m_free(mp);
3862                                 rxbuf->buf = NULL;
3863                                 goto update;
3864                         }
3865                         rxbuf->buf = mp;
3866                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3867                             BUS_DMASYNC_PREREAD);
3868                         rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
3869                             htole64(seg[0].ds_addr);
3870                 } else {
3871                         rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
3872                         rxbuf->flags &= ~IXGBE_RX_COPY;
3873                 }
3874
3875                 refreshed = TRUE;
3876                 /* Next is precalculated */
3877                 i = j;
3878                 rxr->next_to_refresh = i;
3879                 if (++j == rxr->num_desc)
3880                         j = 0;
3881         }
3882 update:
3883         if (refreshed) /* Update hardware tail index */
3884                 IXGBE_WRITE_REG(&adapter->hw,
3885                     IXGBE_RDT(rxr->me), rxr->next_to_refresh);
3886         return;
3887 }
3888
3889 /*********************************************************************
3890  *
3891  *  Allocate memory for rx_buffer structures. Since we use one
3892  *  rx_buffer per received packet, the maximum number of rx_buffer's
3893  *  that we'll need is equal to the number of receive descriptors
3894  *  that we've allocated.
3895  *
3896  **********************************************************************/
3897 static int
3898 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
3899 {
3900         struct  adapter         *adapter = rxr->adapter;
3901         device_t                dev = adapter->dev;
3902         struct ixgbe_rx_buf     *rxbuf;
3903         int                     i, bsize, error;
3904
3905         bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
3906         if (!(rxr->rx_buffers =
3907             (struct ixgbe_rx_buf *) malloc(bsize,
3908             M_DEVBUF, M_NOWAIT | M_ZERO))) {
3909                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3910                 error = ENOMEM;
3911                 goto fail;
3912         }
3913
3914         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),   /* parent */
3915                                    1, 0,        /* alignment, bounds */
3916                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3917                                    BUS_SPACE_MAXADDR,   /* highaddr */
3918                                    NULL, NULL,          /* filter, filterarg */
3919                                    MJUM16BYTES,         /* maxsize */
3920                                    1,                   /* nsegments */
3921                                    MJUM16BYTES,         /* maxsegsize */
3922                                    0,                   /* flags */
3923                                    NULL,                /* lockfunc */
3924                                    NULL,                /* lockfuncarg */
3925                                    &rxr->ptag))) {
3926                 device_printf(dev, "Unable to create RX DMA tag\n");
3927                 goto fail;
3928         }
3929
3930         for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
3931                 rxbuf = &rxr->rx_buffers[i];
3932                 error = bus_dmamap_create(rxr->ptag,
3933                     BUS_DMA_NOWAIT, &rxbuf->pmap);
3934                 if (error) {
3935                         device_printf(dev, "Unable to create RX dma map\n");
3936                         goto fail;
3937                 }
3938         }
3939
3940         return (0);
3941
3942 fail:
3943         /* Frees all, but can handle partial completion */
3944         ixgbe_free_receive_structures(adapter);
3945         return (error);
3946 }
3947
3948 /*
3949 ** Used to detect a descriptor that has
3950 ** been merged by Hardware RSC.
3951 */
3952 static inline u32
3953 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
3954 {
3955         return (le32toh(rx->wb.lower.lo_dword.data) &
3956             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
3957 }
3958
3959 /*********************************************************************
3960  *
3961  *  Initialize Hardware RSC (LRO) feature on 82599
3962  *  for an RX ring, this is toggled by the LRO capability
3963  *  even though it is transparent to the stack.
3964  *
3965  *  NOTE: since this HW feature only works with IPV4 and 
3966  *        our testing has shown soft LRO to be as effective
3967  *        I have decided to disable this by default.
3968  *
3969  **********************************************************************/
3970 static void
3971 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
3972 {
3973         struct  adapter         *adapter = rxr->adapter;
3974         struct  ixgbe_hw        *hw = &adapter->hw;
3975         u32                     rscctrl, rdrxctl;
3976
3977         /* If turning LRO/RSC off we need to disable it */
3978         if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
3979                 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
3980                 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
3981                 return;
3982         }
3983
3984         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
3985         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
3986 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
3987         if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
3988 #endif /* DEV_NETMAP */
3989         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
3990         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
3991         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
3992
3993         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
3994         rscctrl |= IXGBE_RSCCTL_RSCEN;
3995         /*
3996         ** Limit the total number of descriptors that
3997         ** can be combined, so it does not exceed 64K
3998         */
3999         if (rxr->mbuf_sz == MCLBYTES)
4000                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
4001         else if (rxr->mbuf_sz == MJUMPAGESIZE)
4002                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
4003         else if (rxr->mbuf_sz == MJUM9BYTES)
4004                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
4005         else  /* Using 16K cluster */
4006                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
4007
4008         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
4009
4010         /* Enable TCP header recognition */
4011         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
4012             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
4013             IXGBE_PSRTYPE_TCPHDR));
4014
4015         /* Disable RSC for ACK packets */
4016         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
4017             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
4018
4019         rxr->hw_rsc = TRUE;
4020 }
4021
4022
4023 static void     
4024 ixgbe_free_receive_ring(struct rx_ring *rxr)
4025
4026         struct ixgbe_rx_buf       *rxbuf;
4027         int i;
4028
4029         for (i = 0; i < rxr->num_desc; i++) {
4030                 rxbuf = &rxr->rx_buffers[i];
4031                 if (rxbuf->buf != NULL) {
4032                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4033                             BUS_DMASYNC_POSTREAD);
4034                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4035                         rxbuf->buf->m_flags |= M_PKTHDR;
4036                         m_freem(rxbuf->buf);
4037                         rxbuf->buf = NULL;
4038                         rxbuf->flags = 0;
4039                 }
4040         }
4041 }
4042
4043
4044 /*********************************************************************
4045  *
4046  *  Initialize a receive ring and its buffers.
4047  *
4048  **********************************************************************/
4049 static int
4050 ixgbe_setup_receive_ring(struct rx_ring *rxr)
4051 {
4052         struct  adapter         *adapter;
4053         struct ifnet            *ifp;
4054         device_t                dev;
4055         struct ixgbe_rx_buf     *rxbuf;
4056         bus_dma_segment_t       seg[1];
4057         struct lro_ctrl         *lro = &rxr->lro;
4058         int                     rsize, nsegs, error = 0;
4059 #ifdef DEV_NETMAP
4060         struct netmap_adapter *na = NA(rxr->adapter->ifp);
4061         struct netmap_slot *slot;
4062 #endif /* DEV_NETMAP */
4063
4064         adapter = rxr->adapter;
4065         ifp = adapter->ifp;
4066         dev = adapter->dev;
4067
4068         /* Clear the ring contents */
4069         IXGBE_RX_LOCK(rxr);
4070 #ifdef DEV_NETMAP
4071         /* same as in ixgbe_setup_transmit_ring() */
4072         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4073 #endif /* DEV_NETMAP */
4074         rsize = roundup2(adapter->num_rx_desc *
4075             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
4076         bzero((void *)rxr->rx_base, rsize);
4077         /* Cache the size */
4078         rxr->mbuf_sz = adapter->rx_mbuf_sz;
4079
4080         /* Free current RX buffer structs and their mbufs */
4081         ixgbe_free_receive_ring(rxr);
4082
4083         /* Now replenish the mbufs */
4084         for (int j = 0; j != rxr->num_desc; ++j) {
4085                 struct mbuf     *mp;
4086
4087                 rxbuf = &rxr->rx_buffers[j];
4088 #ifdef DEV_NETMAP
4089                 /*
4090                  * In netmap mode, fill the map and set the buffer
4091                  * address in the NIC ring, considering the offset
4092                  * between the netmap and NIC rings (see comment in
4093                  * ixgbe_setup_transmit_ring() ). No need to allocate
4094                  * an mbuf, so end the block with a continue;
4095                  */
4096                 if (slot) {
4097                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4098                         uint64_t paddr;
4099                         void *addr;
4100
4101                         addr = PNMB(slot + sj, &paddr);
4102                         netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4103                         /* Update descriptor and the cached value */
4104                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4105                         rxbuf->addr = htole64(paddr);
4106                         continue;
4107                 }
4108 #endif /* DEV_NETMAP */
4109                 rxbuf->flags = 0; 
4110                 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
4111                     M_PKTHDR, adapter->rx_mbuf_sz);
4112                 if (rxbuf->buf == NULL) {
4113                         error = ENOBUFS;
4114                         goto fail;
4115                 }
4116                 mp = rxbuf->buf;
4117                 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
4118                 /* Get the memory mapping */
4119                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4120                     rxbuf->pmap, mp, seg,
4121                     &nsegs, BUS_DMA_NOWAIT);
4122                 if (error != 0)
4123                         goto fail;
4124                 bus_dmamap_sync(rxr->ptag,
4125                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4126                 /* Update the descriptor and the cached value */
4127                 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
4128                 rxbuf->addr = htole64(seg[0].ds_addr);
4129         }
4130
4131
4132         /* Setup our descriptor indices */
4133         rxr->next_to_check = 0;
4134         rxr->next_to_refresh = 0;
4135         rxr->lro_enabled = FALSE;
4136         rxr->rx_copies = 0;
4137         rxr->rx_bytes = 0;
4138         rxr->discard = FALSE;
4139         rxr->vtag_strip = FALSE;
4140
4141         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4142             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4143
4144         /*
4145         ** Now set up the LRO interface:
4146         */
4147         if (ixgbe_rsc_enable)
4148                 ixgbe_setup_hw_rsc(rxr);
4149         else if (ifp->if_capenable & IFCAP_LRO) {
4150                 int err = tcp_lro_init(lro);
4151                 if (err) {
4152                         device_printf(dev, "LRO Initialization failed!\n");
4153                         goto fail;
4154                 }
4155                 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
4156                 rxr->lro_enabled = TRUE;
4157                 lro->ifp = adapter->ifp;
4158         }
4159
4160         IXGBE_RX_UNLOCK(rxr);
4161         return (0);
4162
4163 fail:
4164         ixgbe_free_receive_ring(rxr);
4165         IXGBE_RX_UNLOCK(rxr);
4166         return (error);
4167 }
4168
4169 /*********************************************************************
4170  *
4171  *  Initialize all receive rings.
4172  *
4173  **********************************************************************/
4174 static int
4175 ixgbe_setup_receive_structures(struct adapter *adapter)
4176 {
4177         struct rx_ring *rxr = adapter->rx_rings;
4178         int j;
4179
4180         for (j = 0; j < adapter->num_queues; j++, rxr++)
4181                 if (ixgbe_setup_receive_ring(rxr))
4182                         goto fail;
4183
4184         return (0);
4185 fail:
4186         /*
4187          * Free RX buffers allocated so far, we will only handle
4188          * the rings that completed, the failing case will have
4189          * cleaned up for itself. 'j' failed, so its the terminus.
4190          */
4191         for (int i = 0; i < j; ++i) {
4192                 rxr = &adapter->rx_rings[i];
4193                 ixgbe_free_receive_ring(rxr);
4194         }
4195
4196         return (ENOBUFS);
4197 }
4198
4199 static void
4200 ixgbe_initialise_rss_mapping(struct adapter *adapter)
4201 {
4202         struct ixgbe_hw *hw = &adapter->hw;
4203         uint32_t reta;
4204         int i, j, queue_id;
4205         uint32_t rss_key[10];
4206         uint32_t mrqc;
4207
4208         /* Setup RSS */
4209         reta = 0;
4210
4211 #ifdef  RSS
4212         /* Fetch the configured RSS key */
4213         rss_getkey((uint8_t *) &rss_key);
4214 #else
4215         /* set up random bits */
4216         arc4rand(&rss_key, sizeof(rss_key), 0);
4217 #endif
4218
4219         /* Set up the redirection table */
4220         for (i = 0, j = 0; i < 128; i++, j++) {
4221                 if (j == adapter->num_queues) j = 0;
4222 #ifdef  RSS
4223                 /*
4224                  * Fetch the RSS bucket id for the given indirection entry.
4225                  * Cap it at the number of configured buckets (which is
4226                  * num_queues.)
4227                  */
4228                 queue_id = rss_get_indirection_to_bucket(i);
4229                 queue_id = queue_id % adapter->num_queues;
4230 #else
4231                 queue_id = (j * 0x11);
4232 #endif
4233                 /*
4234                  * The low 8 bits are for hash value (n+0);
4235                  * The next 8 bits are for hash value (n+1), etc.
4236                  */
4237                 reta = reta >> 8;
4238                 reta = reta | ( ((uint32_t) queue_id) << 24);
4239                 if ((i & 3) == 3) {
4240                         IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
4241                         reta = 0;
4242                 }
4243         }
4244
4245         /* Now fill our hash function seeds */
4246         for (int i = 0; i < 10; i++)
4247                 IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rss_key[i]);
4248
4249         /* Perform hash on these packet types */
4250         mrqc = IXGBE_MRQC_RSSEN
4251              | IXGBE_MRQC_RSS_FIELD_IPV4
4252              | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
4253              | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
4254              | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
4255              | IXGBE_MRQC_RSS_FIELD_IPV6_EX
4256              | IXGBE_MRQC_RSS_FIELD_IPV6
4257              | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
4258              | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
4259              | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
4260         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4261 }
4262
4263
4264 /*********************************************************************
4265  *
4266  *  Setup receive registers and features.
4267  *
4268  **********************************************************************/
4269 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
4270
4271 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
4272         
4273 static void
4274 ixgbe_initialize_receive_units(struct adapter *adapter)
4275 {
4276         struct  rx_ring *rxr = adapter->rx_rings;
4277         struct ixgbe_hw *hw = &adapter->hw;
4278         struct ifnet   *ifp = adapter->ifp;
4279         u32             bufsz, rxctrl, fctrl, srrctl, rxcsum;
4280         u32             hlreg;
4281
4282
4283         /*
4284          * Make sure receives are disabled while
4285          * setting up the descriptor ring
4286          */
4287         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4288         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
4289             rxctrl & ~IXGBE_RXCTRL_RXEN);
4290
4291         /* Enable broadcasts */
4292         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4293         fctrl |= IXGBE_FCTRL_BAM;
4294         fctrl |= IXGBE_FCTRL_DPF;
4295         fctrl |= IXGBE_FCTRL_PMCF;
4296         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4297
4298         /* Set for Jumbo Frames? */
4299         hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4300         if (ifp->if_mtu > ETHERMTU)
4301                 hlreg |= IXGBE_HLREG0_JUMBOEN;
4302         else
4303                 hlreg &= ~IXGBE_HLREG0_JUMBOEN;
4304 #ifdef DEV_NETMAP
4305         /* crcstrip is conditional in netmap (in RDRXCTL too ?) */
4306         if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
4307                 hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
4308         else
4309                 hlreg |= IXGBE_HLREG0_RXCRCSTRP;
4310 #endif /* DEV_NETMAP */
4311         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
4312
4313         bufsz = (adapter->rx_mbuf_sz +
4314             BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
4315
4316         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4317                 u64 rdba = rxr->rxdma.dma_paddr;
4318
4319                 /* Setup the Base and Length of the Rx Descriptor Ring */
4320                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
4321                                (rdba & 0x00000000ffffffffULL));
4322                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
4323                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
4324                     adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4325
4326                 /* Set up the SRRCTL register */
4327                 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
4328                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4329                 srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
4330                 srrctl |= bufsz;
4331                 srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4332                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
4333
4334                 /* Setup the HW Rx Head and Tail Descriptor Pointers */
4335                 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
4336                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
4337
4338                 /* Set the processing limit */
4339                 rxr->process_limit = ixgbe_rx_process_limit;
4340         }
4341
4342         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
4343                 u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
4344                               IXGBE_PSRTYPE_UDPHDR |
4345                               IXGBE_PSRTYPE_IPV4HDR |
4346                               IXGBE_PSRTYPE_IPV6HDR;
4347                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
4348         }
4349
4350         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4351
4352         ixgbe_initialise_rss_mapping(adapter);
4353
4354         if (adapter->num_queues > 1) {
4355                 /* RSS and RX IPP Checksum are mutually exclusive */
4356                 rxcsum |= IXGBE_RXCSUM_PCSD;
4357         }
4358
4359         if (ifp->if_capenable & IFCAP_RXCSUM)
4360                 rxcsum |= IXGBE_RXCSUM_PCSD;
4361
4362         if (!(rxcsum & IXGBE_RXCSUM_PCSD))
4363                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4364
4365         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4366
4367         return;
4368 }
4369
4370 /*********************************************************************
4371  *
4372  *  Free all receive rings.
4373  *
4374  **********************************************************************/
4375 static void
4376 ixgbe_free_receive_structures(struct adapter *adapter)
4377 {
4378         struct rx_ring *rxr = adapter->rx_rings;
4379
4380         INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
4381
4382         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4383                 struct lro_ctrl         *lro = &rxr->lro;
4384                 ixgbe_free_receive_buffers(rxr);
4385                 /* Free LRO memory */
4386                 tcp_lro_free(lro);
4387                 /* Free the ring memory as well */
4388                 ixgbe_dma_free(adapter, &rxr->rxdma);
4389         }
4390
4391         free(adapter->rx_rings, M_DEVBUF);
4392 }
4393
4394
4395 /*********************************************************************
4396  *
4397  *  Free receive ring data structures
4398  *
4399  **********************************************************************/
4400 static void
4401 ixgbe_free_receive_buffers(struct rx_ring *rxr)
4402 {
4403         struct adapter          *adapter = rxr->adapter;
4404         struct ixgbe_rx_buf     *rxbuf;
4405
4406         INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
4407
4408         /* Cleanup any existing buffers */
4409         if (rxr->rx_buffers != NULL) {
4410                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4411                         rxbuf = &rxr->rx_buffers[i];
4412                         if (rxbuf->buf != NULL) {
4413                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4414                                     BUS_DMASYNC_POSTREAD);
4415                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4416                                 rxbuf->buf->m_flags |= M_PKTHDR;
4417                                 m_freem(rxbuf->buf);
4418                         }
4419                         rxbuf->buf = NULL;
4420                         if (rxbuf->pmap != NULL) {
4421                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4422                                 rxbuf->pmap = NULL;
4423                         }
4424                 }
4425                 if (rxr->rx_buffers != NULL) {
4426                         free(rxr->rx_buffers, M_DEVBUF);
4427                         rxr->rx_buffers = NULL;
4428                 }
4429         }
4430
4431         if (rxr->ptag != NULL) {
4432                 bus_dma_tag_destroy(rxr->ptag);
4433                 rxr->ptag = NULL;
4434         }
4435
4436         return;
4437 }
4438
4439 static __inline void
4440 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4441 {
4442                  
4443         /*
4444          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
4445          * should be computed by hardware. Also it should not have VLAN tag in
4446          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
4447          */
4448         if (rxr->lro_enabled &&
4449             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4450             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4451             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4452             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
4453             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4454             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
4455             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4456             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4457                 /*
4458                  * Send to the stack if:
4459                  **  - LRO not enabled, or
4460                  **  - no LRO resources, or
4461                  **  - lro enqueue fails
4462                  */
4463                 if (rxr->lro.lro_cnt != 0)
4464                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4465                                 return;
4466         }
4467         IXGBE_RX_UNLOCK(rxr);
4468         (*ifp->if_input)(ifp, m);
4469         IXGBE_RX_LOCK(rxr);
4470 }
4471
4472 static __inline void
4473 ixgbe_rx_discard(struct rx_ring *rxr, int i)
4474 {
4475         struct ixgbe_rx_buf     *rbuf;
4476
4477         rbuf = &rxr->rx_buffers[i];
4478
4479         if (rbuf->fmp != NULL) {/* Partial chain ? */
4480                 rbuf->fmp->m_flags |= M_PKTHDR;
4481                 m_freem(rbuf->fmp);
4482                 rbuf->fmp = NULL;
4483         }
4484
4485         /*
4486         ** With advanced descriptors the writeback
4487         ** clobbers the buffer addrs, so its easier
4488         ** to just free the existing mbufs and take
4489         ** the normal refresh path to get new buffers
4490         ** and mapping.
4491         */
4492         if (rbuf->buf) {
4493                 m_free(rbuf->buf);
4494                 rbuf->buf = NULL;
4495         }
4496
4497         rbuf->flags = 0;
4498  
4499         return;
4500 }
4501
4502
4503 /*********************************************************************
4504  *
4505  *  This routine executes in interrupt context. It replenishes
4506  *  the mbufs in the descriptor and sends data which has been
4507  *  dma'ed into host memory to upper layer.
4508  *
4509  *  We loop at most count times if count is > 0, or until done if
4510  *  count < 0.
4511  *
4512  *  Return TRUE for more work, FALSE for all clean.
4513  *********************************************************************/
4514 static bool
4515 ixgbe_rxeof(struct ix_queue *que)
4516 {
4517         struct adapter          *adapter = que->adapter;
4518         struct rx_ring          *rxr = que->rxr;
4519         struct ifnet            *ifp = adapter->ifp;
4520         struct lro_ctrl         *lro = &rxr->lro;
4521         struct lro_entry        *queued;
4522         int                     i, nextp, processed = 0;
4523         u32                     staterr = 0;
4524         u16                     count = rxr->process_limit;
4525         union ixgbe_adv_rx_desc *cur;
4526         struct ixgbe_rx_buf     *rbuf, *nbuf;
4527         u16                     pkt_info;
4528
4529         IXGBE_RX_LOCK(rxr);
4530
4531 #ifdef DEV_NETMAP
4532         /* Same as the txeof routine: wakeup clients on intr. */
4533         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4534                 IXGBE_RX_UNLOCK(rxr);
4535                 return (FALSE);
4536         }
4537 #endif /* DEV_NETMAP */
4538
4539         for (i = rxr->next_to_check; count != 0;) {
4540                 struct mbuf     *sendmp, *mp;
4541                 u32             rsc, ptype;
4542                 u16             len;
4543                 u16             vtag = 0;
4544                 bool            eop;
4545  
4546                 /* Sync the ring. */
4547                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4548                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4549
4550                 cur = &rxr->rx_base[i];
4551                 staterr = le32toh(cur->wb.upper.status_error);
4552                 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
4553
4554                 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
4555                         break;
4556                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4557                         break;
4558
4559                 count--;
4560                 sendmp = NULL;
4561                 nbuf = NULL;
4562                 rsc = 0;
4563                 cur->wb.upper.status_error = 0;
4564                 rbuf = &rxr->rx_buffers[i];
4565                 mp = rbuf->buf;
4566
4567                 len = le16toh(cur->wb.upper.length);
4568                 ptype = le32toh(cur->wb.lower.lo_dword.data) &
4569                     IXGBE_RXDADV_PKTTYPE_MASK;
4570                 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
4571
4572                 /* Make sure bad packets are discarded */
4573                 if (((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) ||
4574                     (rxr->discard)) {
4575                         rxr->rx_discarded++;
4576                         if (eop)
4577                                 rxr->discard = FALSE;
4578                         else
4579                                 rxr->discard = TRUE;
4580                         ixgbe_rx_discard(rxr, i);
4581                         goto next_desc;
4582                 }
4583
4584                 /*
4585                 ** On 82599 which supports a hardware
4586                 ** LRO (called HW RSC), packets need
4587                 ** not be fragmented across sequential
4588                 ** descriptors, rather the next descriptor
4589                 ** is indicated in bits of the descriptor.
4590                 ** This also means that we might proceses
4591                 ** more than one packet at a time, something
4592                 ** that has never been true before, it
4593                 ** required eliminating global chain pointers
4594                 ** in favor of what we are doing here.  -jfv
4595                 */
4596                 if (!eop) {
4597                         /*
4598                         ** Figure out the next descriptor
4599                         ** of this frame.
4600                         */
4601                         if (rxr->hw_rsc == TRUE) {
4602                                 rsc = ixgbe_rsc_count(cur);
4603                                 rxr->rsc_num += (rsc - 1);
4604                         }
4605                         if (rsc) { /* Get hardware index */
4606                                 nextp = ((staterr &
4607                                     IXGBE_RXDADV_NEXTP_MASK) >>
4608                                     IXGBE_RXDADV_NEXTP_SHIFT);
4609                         } else { /* Just sequential */
4610                                 nextp = i + 1;
4611                                 if (nextp == adapter->num_rx_desc)
4612                                         nextp = 0;
4613                         }
4614                         nbuf = &rxr->rx_buffers[nextp];
4615                         prefetch(nbuf);
4616                 }
4617                 /*
4618                 ** Rather than using the fmp/lmp global pointers
4619                 ** we now keep the head of a packet chain in the
4620                 ** buffer struct and pass this along from one
4621                 ** descriptor to the next, until we get EOP.
4622                 */
4623                 mp->m_len = len;
4624                 /*
4625                 ** See if there is a stored head
4626                 ** that determines what we are
4627                 */
4628                 sendmp = rbuf->fmp;
4629                 if (sendmp != NULL) {  /* secondary frag */
4630                         rbuf->buf = rbuf->fmp = NULL;
4631                         mp->m_flags &= ~M_PKTHDR;
4632                         sendmp->m_pkthdr.len += mp->m_len;
4633                 } else {
4634                         /*
4635                          * Optimize.  This might be a small packet,
4636                          * maybe just a TCP ACK.  Do a fast copy that
4637                          * is cache aligned into a new mbuf, and
4638                          * leave the old mbuf+cluster for re-use.
4639                          */
4640                         if (eop && len <= IXGBE_RX_COPY_LEN) {
4641                                 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
4642                                 if (sendmp != NULL) {
4643                                         sendmp->m_data +=
4644                                             IXGBE_RX_COPY_ALIGN;
4645                                         ixgbe_bcopy(mp->m_data,
4646                                             sendmp->m_data, len);
4647                                         sendmp->m_len = len;
4648                                         rxr->rx_copies++;
4649                                         rbuf->flags |= IXGBE_RX_COPY;
4650                                 }
4651                         }
4652                         if (sendmp == NULL) {
4653                                 rbuf->buf = rbuf->fmp = NULL;
4654                                 sendmp = mp;
4655                         }
4656
4657                         /* first desc of a non-ps chain */
4658                         sendmp->m_flags |= M_PKTHDR;
4659                         sendmp->m_pkthdr.len = mp->m_len;
4660                 }
4661                 ++processed;
4662
4663                 /* Pass the head pointer on */
4664                 if (eop == 0) {
4665                         nbuf->fmp = sendmp;
4666                         sendmp = NULL;
4667                         mp->m_next = nbuf->buf;
4668                 } else { /* Sending this frame */
4669                         sendmp->m_pkthdr.rcvif = ifp;
4670                         rxr->rx_packets++;
4671                         /* capture data for AIM */
4672                         rxr->bytes += sendmp->m_pkthdr.len;
4673                         rxr->rx_bytes += sendmp->m_pkthdr.len;
4674                         /* Process vlan info */
4675                         if ((rxr->vtag_strip) &&
4676                             (staterr & IXGBE_RXD_STAT_VP))
4677                                 vtag = le16toh(cur->wb.upper.vlan);
4678                         if (vtag) {
4679                                 sendmp->m_pkthdr.ether_vtag = vtag;
4680                                 sendmp->m_flags |= M_VLANTAG;
4681                         }
4682                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4683                                 ixgbe_rx_checksum(staterr, sendmp, ptype);
4684 #if __FreeBSD_version >= 800000
4685 #ifdef RSS
4686                         sendmp->m_pkthdr.flowid =
4687                             le32toh(cur->wb.lower.hi_dword.rss);
4688                         sendmp->m_flags |= M_FLOWID;
4689                         switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
4690                         case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
4691                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4);
4692                                 break;
4693                         case IXGBE_RXDADV_RSSTYPE_IPV4:
4694                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4);
4695                                 break;
4696                         case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
4697                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6);
4698                                 break;
4699                         case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
4700                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX);
4701                                 break;
4702                         case IXGBE_RXDADV_RSSTYPE_IPV6:
4703                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6);
4704                                 break;
4705                         case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
4706                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX);
4707                                 break;
4708                         /* XXX no UDP support in RSS just yet */
4709 #ifdef  notyet
4710                         case IGXBE_RXDADV_RSSTYPE_IPV4_UDP:
4711                         case IGXBE_RXDADV_RSSTYPE_IPV6_UDP:
4712                         case IGXBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
4713 #endif /* notyet */
4714                         default:
4715                                 /* XXX fallthrough */
4716                                 M_HASHTYPE_SET(sendmp, M_HASHTYPE_NONE);
4717                         }
4718 #else /* RSS */
4719                         sendmp->m_pkthdr.flowid = que->msix;
4720                         sendmp->m_flags |= M_FLOWID;
4721 #endif /* RSS */
4722 #endif /* FreeBSD_version */
4723                 }
4724 next_desc:
4725                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4726                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4727
4728                 /* Advance our pointers to the next descriptor. */
4729                 if (++i == rxr->num_desc)
4730                         i = 0;
4731
4732                 /* Now send to the stack or do LRO */
4733                 if (sendmp != NULL) {
4734                         rxr->next_to_check = i;
4735                         ixgbe_rx_input(rxr, ifp, sendmp, ptype);
4736                         i = rxr->next_to_check;
4737                 }
4738
4739                /* Every 8 descriptors we go to refresh mbufs */
4740                 if (processed == 8) {
4741                         ixgbe_refresh_mbufs(rxr, i);
4742                         processed = 0;
4743                 }
4744         }
4745
4746         /* Refresh any remaining buf structs */
4747         if (ixgbe_rx_unrefreshed(rxr))
4748                 ixgbe_refresh_mbufs(rxr, i);
4749
4750         rxr->next_to_check = i;
4751
4752         /*
4753          * Flush any outstanding LRO work
4754          */
4755         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4756                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4757                 tcp_lro_flush(lro, queued);
4758         }
4759
4760         IXGBE_RX_UNLOCK(rxr);
4761
4762         /*
4763         ** Still have cleaning to do?
4764         */
4765         if ((staterr & IXGBE_RXD_STAT_DD) != 0)
4766                 return (TRUE);
4767         else
4768                 return (FALSE);
4769 }
4770
4771
4772 /*********************************************************************
4773  *
4774  *  Verify that the hardware indicated that the checksum is valid.
4775  *  Inform the stack about the status of checksum so that stack
4776  *  doesn't spend time verifying the checksum.
4777  *
4778  *********************************************************************/
4779 static void
4780 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
4781 {
4782         u16     status = (u16) staterr;
4783         u8      errors = (u8) (staterr >> 24);
4784         bool    sctp = FALSE;
4785
4786         if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4787             (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
4788                 sctp = TRUE;
4789
4790         if (status & IXGBE_RXD_STAT_IPCS) {
4791                 if (!(errors & IXGBE_RXD_ERR_IPE)) {
4792                         /* IP Checksum Good */
4793                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4794                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4795
4796                 } else
4797                         mp->m_pkthdr.csum_flags = 0;
4798         }
4799         if (status & IXGBE_RXD_STAT_L4CS) {
4800                 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4801 #if __FreeBSD_version >= 800000
4802                 if (sctp)
4803                         type = CSUM_SCTP_VALID;
4804 #endif
4805                 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
4806                         mp->m_pkthdr.csum_flags |= type;
4807                         if (!sctp)
4808                                 mp->m_pkthdr.csum_data = htons(0xffff);
4809                 } 
4810         }
4811         return;
4812 }
4813
4814
4815 /*
4816 ** This routine is run via an vlan config EVENT,
4817 ** it enables us to use the HW Filter table since
4818 ** we can get the vlan id. This just creates the
4819 ** entry in the soft version of the VFTA, init will
4820 ** repopulate the real table.
4821 */
4822 static void
4823 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4824 {
4825         struct adapter  *adapter = ifp->if_softc;
4826         u16             index, bit;
4827
4828         if (ifp->if_softc !=  arg)   /* Not our event */
4829                 return;
4830
4831         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4832                 return;
4833
4834         IXGBE_CORE_LOCK(adapter);
4835         index = (vtag >> 5) & 0x7F;
4836         bit = vtag & 0x1F;
4837         adapter->shadow_vfta[index] |= (1 << bit);
4838         ++adapter->num_vlans;
4839         ixgbe_setup_vlan_hw_support(adapter);
4840         IXGBE_CORE_UNLOCK(adapter);
4841 }
4842
4843 /*
4844 ** This routine is run via an vlan
4845 ** unconfig EVENT, remove our entry
4846 ** in the soft vfta.
4847 */
4848 static void
4849 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4850 {
4851         struct adapter  *adapter = ifp->if_softc;
4852         u16             index, bit;
4853
4854         if (ifp->if_softc !=  arg)
4855                 return;
4856
4857         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4858                 return;
4859
4860         IXGBE_CORE_LOCK(adapter);
4861         index = (vtag >> 5) & 0x7F;
4862         bit = vtag & 0x1F;
4863         adapter->shadow_vfta[index] &= ~(1 << bit);
4864         --adapter->num_vlans;
4865         /* Re-init to load the changes */
4866         ixgbe_setup_vlan_hw_support(adapter);
4867         IXGBE_CORE_UNLOCK(adapter);
4868 }
4869
4870 static void
4871 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
4872 {
4873         struct ifnet    *ifp = adapter->ifp;
4874         struct ixgbe_hw *hw = &adapter->hw;
4875         struct rx_ring  *rxr;
4876         u32             ctrl;
4877
4878
4879         /*
4880         ** We get here thru init_locked, meaning
4881         ** a soft reset, this has already cleared
4882         ** the VFTA and other state, so if there
4883         ** have been no vlan's registered do nothing.
4884         */
4885         if (adapter->num_vlans == 0)
4886                 return;
4887
4888         /* Setup the queues for vlans */
4889         for (int i = 0; i < adapter->num_queues; i++) {
4890                 rxr = &adapter->rx_rings[i];
4891                 /* On 82599 the VLAN enable is per/queue in RXDCTL */
4892                 if (hw->mac.type != ixgbe_mac_82598EB) {
4893                         ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
4894                         ctrl |= IXGBE_RXDCTL_VME;
4895                         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
4896                 }
4897                 rxr->vtag_strip = TRUE;
4898         }
4899
4900         if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
4901                 return;
4902         /*
4903         ** A soft reset zero's out the VFTA, so
4904         ** we need to repopulate it now.
4905         */
4906         for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
4907                 if (adapter->shadow_vfta[i] != 0)
4908                         IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
4909                             adapter->shadow_vfta[i]);
4910
4911         ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4912         /* Enable the Filter Table if enabled */
4913         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
4914                 ctrl &= ~IXGBE_VLNCTRL_CFIEN;
4915                 ctrl |= IXGBE_VLNCTRL_VFE;
4916         }
4917         if (hw->mac.type == ixgbe_mac_82598EB)
4918                 ctrl |= IXGBE_VLNCTRL_VME;
4919         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
4920 }
4921
4922 static void
4923 ixgbe_enable_intr(struct adapter *adapter)
4924 {
4925         struct ixgbe_hw *hw = &adapter->hw;
4926         struct ix_queue *que = adapter->queues;
4927         u32             mask, fwsm;
4928
4929         mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
4930         /* Enable Fan Failure detection */
4931         if (hw->device_id == IXGBE_DEV_ID_82598AT)
4932                     mask |= IXGBE_EIMS_GPI_SDP1;
4933
4934         switch (adapter->hw.mac.type) {
4935                 case ixgbe_mac_82599EB:
4936                         mask |= IXGBE_EIMS_ECC;
4937                         mask |= IXGBE_EIMS_GPI_SDP0;
4938                         mask |= IXGBE_EIMS_GPI_SDP1;
4939                         mask |= IXGBE_EIMS_GPI_SDP2;
4940 #ifdef IXGBE_FDIR
4941                         mask |= IXGBE_EIMS_FLOW_DIR;
4942 #endif
4943                         break;
4944                 case ixgbe_mac_X540:
4945                         mask |= IXGBE_EIMS_ECC;
4946                         /* Detect if Thermal Sensor is enabled */
4947                         fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
4948                         if (fwsm & IXGBE_FWSM_TS_ENABLED)
4949                                 mask |= IXGBE_EIMS_TS;
4950 #ifdef IXGBE_FDIR
4951                         mask |= IXGBE_EIMS_FLOW_DIR;
4952 #endif
4953                 /* falls through */
4954                 default:
4955                         break;
4956         }
4957
4958         IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
4959
4960         /* With RSS we use auto clear */
4961         if (adapter->msix_mem) {
4962                 mask = IXGBE_EIMS_ENABLE_MASK;
4963                 /* Don't autoclear Link */
4964                 mask &= ~IXGBE_EIMS_OTHER;
4965                 mask &= ~IXGBE_EIMS_LSC;
4966                 IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
4967         }
4968
4969         /*
4970         ** Now enable all queues, this is done separately to
4971         ** allow for handling the extended (beyond 32) MSIX
4972         ** vectors that can be used by 82599
4973         */
4974         for (int i = 0; i < adapter->num_queues; i++, que++)
4975                 ixgbe_enable_queue(adapter, que->msix);
4976
4977         IXGBE_WRITE_FLUSH(hw);
4978
4979         return;
4980 }
4981
4982 static void
4983 ixgbe_disable_intr(struct adapter *adapter)
4984 {
4985         if (adapter->msix_mem)
4986                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
4987         if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
4988                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
4989         } else {
4990                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
4991                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
4992                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
4993         }
4994         IXGBE_WRITE_FLUSH(&adapter->hw);
4995         return;
4996 }
4997
4998 u16
4999 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
5000 {
5001         u16 value;
5002
5003         value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
5004             reg, 2);
5005
5006         return (value);
5007 }
5008
5009 void
5010 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
5011 {
5012         pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
5013             reg, value, 2);
5014
5015         return;
5016 }
5017
5018 /*
5019 ** Get the width and transaction speed of
5020 ** the slot this adapter is plugged into.
5021 */
5022 static void
5023 ixgbe_get_slot_info(struct ixgbe_hw *hw)
5024 {
5025         device_t                dev = ((struct ixgbe_osdep *)hw->back)->dev;
5026         struct ixgbe_mac_info   *mac = &hw->mac;
5027         u16                     link;
5028         u32                     offset;
5029
5030         /* For most devices simply call the shared code routine */
5031         if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
5032                 ixgbe_get_bus_info(hw);
5033                 goto display;
5034         }
5035
5036         /*
5037         ** For the Quad port adapter we need to parse back
5038         ** up the PCI tree to find the speed of the expansion
5039         ** slot into which this adapter is plugged. A bit more work.
5040         */
5041         dev = device_get_parent(device_get_parent(dev));
5042 #ifdef IXGBE_DEBUG
5043         device_printf(dev, "parent pcib = %x,%x,%x\n",
5044             pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
5045 #endif
5046         dev = device_get_parent(device_get_parent(dev));
5047 #ifdef IXGBE_DEBUG
5048         device_printf(dev, "slot pcib = %x,%x,%x\n",
5049             pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
5050 #endif
5051         /* Now get the PCI Express Capabilities offset */
5052         pci_find_cap(dev, PCIY_EXPRESS, &offset);
5053         /* ...and read the Link Status Register */
5054         link = pci_read_config(dev, offset + PCIER_LINK_STA, 2);
5055         switch (link & IXGBE_PCI_LINK_WIDTH) {
5056         case IXGBE_PCI_LINK_WIDTH_1:
5057                 hw->bus.width = ixgbe_bus_width_pcie_x1;
5058                 break;
5059         case IXGBE_PCI_LINK_WIDTH_2:
5060                 hw->bus.width = ixgbe_bus_width_pcie_x2;
5061                 break;
5062         case IXGBE_PCI_LINK_WIDTH_4:
5063                 hw->bus.width = ixgbe_bus_width_pcie_x4;
5064                 break;
5065         case IXGBE_PCI_LINK_WIDTH_8:
5066                 hw->bus.width = ixgbe_bus_width_pcie_x8;
5067                 break;
5068         default:
5069                 hw->bus.width = ixgbe_bus_width_unknown;
5070                 break;
5071         }
5072
5073         switch (link & IXGBE_PCI_LINK_SPEED) {
5074         case IXGBE_PCI_LINK_SPEED_2500:
5075                 hw->bus.speed = ixgbe_bus_speed_2500;
5076                 break;
5077         case IXGBE_PCI_LINK_SPEED_5000:
5078                 hw->bus.speed = ixgbe_bus_speed_5000;
5079                 break;
5080         case IXGBE_PCI_LINK_SPEED_8000:
5081                 hw->bus.speed = ixgbe_bus_speed_8000;
5082                 break;
5083         default:
5084                 hw->bus.speed = ixgbe_bus_speed_unknown;
5085                 break;
5086         }
5087
5088         mac->ops.set_lan_id(hw);
5089
5090 display:
5091         device_printf(dev,"PCI Express Bus: Speed %s %s\n",
5092             ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s":
5093             (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s":
5094             (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s":"Unknown"),
5095             (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
5096             (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
5097             (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
5098             ("Unknown"));
5099
5100         if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) &&
5101             ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
5102             (hw->bus.speed == ixgbe_bus_speed_2500))) {
5103                 device_printf(dev, "PCI-Express bandwidth available"
5104                     " for this card\n     is not sufficient for"
5105                     " optimal performance.\n");
5106                 device_printf(dev, "For optimal performance a x8 "
5107                     "PCIE, or x4 PCIE Gen2 slot is required.\n");
5108         }
5109         if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) &&
5110             ((hw->bus.width <= ixgbe_bus_width_pcie_x8) &&
5111             (hw->bus.speed < ixgbe_bus_speed_8000))) {
5112                 device_printf(dev, "PCI-Express bandwidth available"
5113                     " for this card\n     is not sufficient for"
5114                     " optimal performance.\n");
5115                 device_printf(dev, "For optimal performance a x8 "
5116                     "PCIE Gen3 slot is required.\n");
5117         }
5118
5119         return;
5120 }
5121
5122
5123 /*
5124 ** Setup the correct IVAR register for a particular MSIX interrupt
5125 **   (yes this is all very magic and confusing :)
5126 **  - entry is the register array entry
5127 **  - vector is the MSIX vector for this queue
5128 **  - type is RX/TX/MISC
5129 */
5130 static void
5131 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
5132 {
5133         struct ixgbe_hw *hw = &adapter->hw;
5134         u32 ivar, index;
5135
5136         vector |= IXGBE_IVAR_ALLOC_VAL;
5137
5138         switch (hw->mac.type) {
5139
5140         case ixgbe_mac_82598EB:
5141                 if (type == -1)
5142                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
5143                 else
5144                         entry += (type * 64);
5145                 index = (entry >> 2) & 0x1F;
5146                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
5147                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
5148                 ivar |= (vector << (8 * (entry & 0x3)));
5149                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
5150                 break;
5151
5152         case ixgbe_mac_82599EB:
5153         case ixgbe_mac_X540:
5154                 if (type == -1) { /* MISC IVAR */
5155                         index = (entry & 1) * 8;
5156                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
5157                         ivar &= ~(0xFF << index);
5158                         ivar |= (vector << index);
5159                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
5160                 } else {        /* RX/TX IVARS */
5161                         index = (16 * (entry & 1)) + (8 * type);
5162                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
5163                         ivar &= ~(0xFF << index);
5164                         ivar |= (vector << index);
5165                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
5166                 }
5167
5168         default:
5169                 break;
5170         }
5171 }
5172
5173 static void
5174 ixgbe_configure_ivars(struct adapter *adapter)
5175 {
5176         struct  ix_queue *que = adapter->queues;
5177         u32 newitr;
5178
5179         if (ixgbe_max_interrupt_rate > 0)
5180                 newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
5181         else
5182                 newitr = 0;
5183
5184         for (int i = 0; i < adapter->num_queues; i++, que++) {
5185                 /* First the RX queue entry */
5186                 ixgbe_set_ivar(adapter, i, que->msix, 0);
5187                 /* ... and the TX */
5188                 ixgbe_set_ivar(adapter, i, que->msix, 1);
5189                 /* Set an Initial EITR value */
5190                 IXGBE_WRITE_REG(&adapter->hw,
5191                     IXGBE_EITR(que->msix), newitr);
5192         }
5193
5194         /* For the Link interrupt */
5195         ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
5196 }
5197
5198 /*
5199 ** ixgbe_sfp_probe - called in the local timer to
5200 ** determine if a port had optics inserted.
5201 */  
5202 static bool ixgbe_sfp_probe(struct adapter *adapter)
5203 {
5204         struct ixgbe_hw *hw = &adapter->hw;
5205         device_t        dev = adapter->dev;
5206         bool            result = FALSE;
5207
5208         if ((hw->phy.type == ixgbe_phy_nl) &&
5209             (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
5210                 s32 ret = hw->phy.ops.identify_sfp(hw);
5211                 if (ret)
5212                         goto out;
5213                 ret = hw->phy.ops.reset(hw);
5214                 if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5215                         device_printf(dev,"Unsupported SFP+ module detected!");
5216                         printf(" Reload driver with supported module.\n");
5217                         adapter->sfp_probe = FALSE;
5218                         goto out;
5219                 } else
5220                         device_printf(dev,"SFP+ module detected!\n");
5221                 /* We now have supported optics */
5222                 adapter->sfp_probe = FALSE;
5223                 /* Set the optics type so system reports correctly */
5224                 ixgbe_setup_optics(adapter);
5225                 result = TRUE;
5226         }
5227 out:
5228         return (result);
5229 }
5230
5231 /*
5232 ** Tasklet handler for MSIX Link interrupts
5233 **  - do outside interrupt since it might sleep
5234 */
5235 static void
5236 ixgbe_handle_link(void *context, int pending)
5237 {
5238         struct adapter  *adapter = context;
5239
5240         ixgbe_check_link(&adapter->hw,
5241             &adapter->link_speed, &adapter->link_up, 0);
5242         ixgbe_update_link_status(adapter);
5243 }
5244
5245 /*
5246 ** Tasklet for handling SFP module interrupts
5247 */
5248 static void
5249 ixgbe_handle_mod(void *context, int pending)
5250 {
5251         struct adapter  *adapter = context;
5252         struct ixgbe_hw *hw = &adapter->hw;
5253         device_t        dev = adapter->dev;
5254         u32 err;
5255
5256         err = hw->phy.ops.identify_sfp(hw);
5257         if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5258                 device_printf(dev,
5259                     "Unsupported SFP+ module type was detected.\n");
5260                 return;
5261         }
5262         err = hw->mac.ops.setup_sfp(hw);
5263         if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5264                 device_printf(dev,
5265                     "Setup failure - unsupported SFP+ module type.\n");
5266                 return;
5267         }
5268         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
5269         return;
5270 }
5271
5272
5273 /*
5274 ** Tasklet for handling MSF (multispeed fiber) interrupts
5275 */
5276 static void
5277 ixgbe_handle_msf(void *context, int pending)
5278 {
5279         struct adapter  *adapter = context;
5280         struct ixgbe_hw *hw = &adapter->hw;
5281         u32 autoneg;
5282         bool negotiate;
5283
5284         autoneg = hw->phy.autoneg_advertised;
5285         if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
5286                 hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
5287         if (hw->mac.ops.setup_link)
5288                 hw->mac.ops.setup_link(hw, autoneg, TRUE);
5289         return;
5290 }
5291
5292 #ifdef IXGBE_FDIR
5293 /*
5294 ** Tasklet for reinitializing the Flow Director filter table
5295 */
5296 static void
5297 ixgbe_reinit_fdir(void *context, int pending)
5298 {
5299         struct adapter  *adapter = context;
5300         struct ifnet   *ifp = adapter->ifp;
5301
5302         if (adapter->fdir_reinit != 1) /* Shouldn't happen */
5303                 return;
5304         ixgbe_reinit_fdir_tables_82599(&adapter->hw);
5305         adapter->fdir_reinit = 0;
5306         /* re-enable flow director interrupts */
5307         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
5308         /* Restart the interface */
5309         ifp->if_drv_flags |= IFF_DRV_RUNNING;
5310         return;
5311 }
5312 #endif
5313
5314 /**********************************************************************
5315  *
5316  *  Update the board statistics counters.
5317  *
5318  **********************************************************************/
5319 static void
5320 ixgbe_update_stats_counters(struct adapter *adapter)
5321 {
5322         struct ifnet   *ifp = adapter->ifp;
5323         struct ixgbe_hw *hw = &adapter->hw;
5324         u32  missed_rx = 0, bprc, lxon, lxoff, total;
5325         u64  total_missed_rx = 0;
5326
5327         adapter->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
5328         adapter->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
5329         adapter->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
5330         adapter->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
5331
5332         /*
5333         ** Note: these are for the 8 possible traffic classes,
5334         **       which in current implementation is unused,
5335         **       therefore only 0 should read real data.
5336         */
5337         for (int i = 0; i < 8; i++) {
5338                 u32 mp;
5339                 mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
5340                 /* missed_rx tallies misses for the gprc workaround */
5341                 missed_rx += mp;
5342                 /* global total per queue */
5343                 adapter->stats.mpc[i] += mp;
5344                 /* Running comprehensive total for stats display */
5345                 total_missed_rx += adapter->stats.mpc[i];
5346                 if (hw->mac.type == ixgbe_mac_82598EB) {
5347                         adapter->stats.rnbc[i] +=
5348                             IXGBE_READ_REG(hw, IXGBE_RNBC(i));
5349                         adapter->stats.qbtc[i] +=
5350                             IXGBE_READ_REG(hw, IXGBE_QBTC(i));
5351                         adapter->stats.qbrc[i] +=
5352                             IXGBE_READ_REG(hw, IXGBE_QBRC(i));
5353                         adapter->stats.pxonrxc[i] +=
5354                             IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
5355                 } else
5356                         adapter->stats.pxonrxc[i] +=
5357                             IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
5358                 adapter->stats.pxontxc[i] +=
5359                     IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
5360                 adapter->stats.pxofftxc[i] +=
5361                     IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
5362                 adapter->stats.pxoffrxc[i] +=
5363                     IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
5364                 adapter->stats.pxon2offc[i] +=
5365                     IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
5366         }
5367         for (int i = 0; i < 16; i++) {
5368                 adapter->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
5369                 adapter->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
5370                 adapter->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
5371         }
5372         adapter->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
5373         adapter->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
5374         adapter->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
5375
5376         /* Hardware workaround, gprc counts missed packets */
5377         adapter->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
5378         adapter->stats.gprc -= missed_rx;
5379
5380         if (hw->mac.type != ixgbe_mac_82598EB) {
5381                 adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
5382                     ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
5383                 adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
5384                     ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
5385                 adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
5386                     ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
5387                 adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
5388                 adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
5389         } else {
5390                 adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
5391                 adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
5392                 /* 82598 only has a counter in the high register */
5393                 adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
5394                 adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
5395                 adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
5396         }
5397
5398         /*
5399          * Workaround: mprc hardware is incorrectly counting
5400          * broadcasts, so for now we subtract those.
5401          */
5402         bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
5403         adapter->stats.bprc += bprc;
5404         adapter->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
5405         if (hw->mac.type == ixgbe_mac_82598EB)
5406                 adapter->stats.mprc -= bprc;
5407
5408         adapter->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
5409         adapter->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
5410         adapter->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
5411         adapter->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
5412         adapter->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
5413         adapter->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
5414
5415         lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
5416         adapter->stats.lxontxc += lxon;
5417         lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
5418         adapter->stats.lxofftxc += lxoff;
5419         total = lxon + lxoff;
5420
5421         adapter->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
5422         adapter->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
5423         adapter->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
5424         adapter->stats.gptc -= total;
5425         adapter->stats.mptc -= total;
5426         adapter->stats.ptc64 -= total;
5427         adapter->stats.gotc -= total * ETHER_MIN_LEN;
5428
5429         adapter->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
5430         adapter->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
5431         adapter->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
5432         adapter->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
5433         adapter->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
5434         adapter->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
5435         adapter->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
5436         adapter->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
5437         adapter->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
5438         adapter->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
5439         adapter->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
5440         adapter->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
5441         adapter->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
5442         adapter->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
5443         adapter->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
5444         adapter->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
5445         adapter->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
5446         adapter->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
5447         /* Only read FCOE on 82599 */
5448         if (hw->mac.type != ixgbe_mac_82598EB) {
5449                 adapter->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
5450                 adapter->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
5451                 adapter->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
5452                 adapter->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
5453                 adapter->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
5454         }
5455
5456         /* Fill out the OS statistics structure */
5457         ifp->if_ipackets = adapter->stats.gprc;
5458         ifp->if_opackets = adapter->stats.gptc;
5459         ifp->if_ibytes = adapter->stats.gorc;
5460         ifp->if_obytes = adapter->stats.gotc;
5461         ifp->if_imcasts = adapter->stats.mprc;
5462         ifp->if_omcasts = adapter->stats.mptc;
5463         ifp->if_collisions = 0;
5464
5465         /* Rx Errors */
5466         ifp->if_iqdrops = total_missed_rx;
5467         ifp->if_ierrors = adapter->stats.crcerrs + adapter->stats.rlec;
5468 }
5469
5470 /** ixgbe_sysctl_tdh_handler - Handler function
5471  *  Retrieves the TDH value from the hardware
5472  */
5473 static int 
5474 ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS)
5475 {
5476         int error;
5477
5478         struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
5479         if (!txr) return 0;
5480
5481         unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
5482         error = sysctl_handle_int(oidp, &val, 0, req);
5483         if (error || !req->newptr)
5484                 return error;
5485         return 0;
5486 }
5487
5488 /** ixgbe_sysctl_tdt_handler - Handler function
5489  *  Retrieves the TDT value from the hardware
5490  */
5491 static int 
5492 ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS)
5493 {
5494         int error;
5495
5496         struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
5497         if (!txr) return 0;
5498
5499         unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
5500         error = sysctl_handle_int(oidp, &val, 0, req);
5501         if (error || !req->newptr)
5502                 return error;
5503         return 0;
5504 }
5505
5506 /** ixgbe_sysctl_rdh_handler - Handler function
5507  *  Retrieves the RDH value from the hardware
5508  */
5509 static int 
5510 ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS)
5511 {
5512         int error;
5513
5514         struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
5515         if (!rxr) return 0;
5516
5517         unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
5518         error = sysctl_handle_int(oidp, &val, 0, req);
5519         if (error || !req->newptr)
5520                 return error;
5521         return 0;
5522 }
5523
5524 /** ixgbe_sysctl_rdt_handler - Handler function
5525  *  Retrieves the RDT value from the hardware
5526  */
5527 static int 
5528 ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS)
5529 {
5530         int error;
5531
5532         struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
5533         if (!rxr) return 0;
5534
5535         unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
5536         error = sysctl_handle_int(oidp, &val, 0, req);
5537         if (error || !req->newptr)
5538                 return error;
5539         return 0;
5540 }
5541
5542 static int
5543 ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5544 {
5545         int error;
5546         struct ix_queue *que = ((struct ix_queue *)oidp->oid_arg1);
5547         unsigned int reg, usec, rate;
5548
5549         reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
5550         usec = ((reg & 0x0FF8) >> 3);
5551         if (usec > 0)
5552                 rate = 500000 / usec;
5553         else
5554                 rate = 0;
5555         error = sysctl_handle_int(oidp, &rate, 0, req);
5556         if (error || !req->newptr)
5557                 return error;
5558         reg &= ~0xfff; /* default, no limitation */
5559         ixgbe_max_interrupt_rate = 0;
5560         if (rate > 0 && rate < 500000) {
5561                 if (rate < 1000)
5562                         rate = 1000;
5563                 ixgbe_max_interrupt_rate = rate;
5564                 reg |= ((4000000/rate) & 0xff8 );
5565         }
5566         IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
5567         return 0;
5568 }
5569
5570 /*
5571  * Add sysctl variables, one per statistic, to the system.
5572  */
5573 static void
5574 ixgbe_add_hw_stats(struct adapter *adapter)
5575 {
5576
5577         device_t dev = adapter->dev;
5578
5579         struct tx_ring *txr = adapter->tx_rings;
5580         struct rx_ring *rxr = adapter->rx_rings;
5581
5582         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5583         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5584         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5585         struct ixgbe_hw_stats *stats = &adapter->stats;
5586
5587         struct sysctl_oid *stat_node, *queue_node;
5588         struct sysctl_oid_list *stat_list, *queue_list;
5589
5590 #define QUEUE_NAME_LEN 32
5591         char namebuf[QUEUE_NAME_LEN];
5592
5593         /* Driver Statistics */
5594         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5595                         CTLFLAG_RD, &adapter->dropped_pkts,
5596                         "Driver dropped packets");
5597         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_failed",
5598                         CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5599                         "m_defrag() failed");
5600         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events",
5601                         CTLFLAG_RD, &adapter->watchdog_events,
5602                         "Watchdog timeouts");
5603         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5604                         CTLFLAG_RD, &adapter->link_irq,
5605                         "Link MSIX IRQ Handled");
5606
5607         for (int i = 0; i < adapter->num_queues; i++, txr++) {
5608                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5609                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5610                                             CTLFLAG_RD, NULL, "Queue Name");
5611                 queue_list = SYSCTL_CHILDREN(queue_node);
5612
5613                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5614                                 CTLTYPE_UINT | CTLFLAG_RW, &adapter->queues[i],
5615                                 sizeof(&adapter->queues[i]),
5616                                 ixgbe_sysctl_interrupt_rate_handler, "IU",
5617                                 "Interrupt Rate");
5618                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs",
5619                                 CTLFLAG_RD, &(adapter->queues[i].irqs),
5620                                 "irqs on this queue");
5621                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5622                                 CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
5623                                 ixgbe_sysctl_tdh_handler, "IU",
5624                                 "Transmit Descriptor Head");
5625                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5626                                 CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
5627                                 ixgbe_sysctl_tdt_handler, "IU",
5628                                 "Transmit Descriptor Tail");
5629                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tso_tx",
5630                                 CTLFLAG_RD, &txr->tso_tx,
5631                                 "TSO");
5632                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_tx_dma_setup",
5633                                 CTLFLAG_RD, &txr->no_tx_dma_setup,
5634                                 "Driver tx dma failure in xmit");
5635                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5636                                 CTLFLAG_RD, &txr->no_desc_avail,
5637                                 "Queue No Descriptor Available");
5638                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5639                                 CTLFLAG_RD, &txr->total_packets,
5640                                 "Queue Packets Transmitted");
5641         }
5642
5643         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
5644                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5645                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
5646                                             CTLFLAG_RD, NULL, "Queue Name");
5647                 queue_list = SYSCTL_CHILDREN(queue_node);
5648
5649                 struct lro_ctrl *lro = &rxr->lro;
5650
5651                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5652                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
5653                                             CTLFLAG_RD, NULL, "Queue Name");
5654                 queue_list = SYSCTL_CHILDREN(queue_node);
5655
5656                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5657                                 CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
5658                                 ixgbe_sysctl_rdh_handler, "IU",
5659                                 "Receive Descriptor Head");
5660                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5661                                 CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
5662                                 ixgbe_sysctl_rdt_handler, "IU",
5663                                 "Receive Descriptor Tail");
5664                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5665                                 CTLFLAG_RD, &rxr->rx_packets,
5666                                 "Queue Packets Received");
5667                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5668                                 CTLFLAG_RD, &rxr->rx_bytes,
5669                                 "Queue Bytes Received");
5670                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_copies",
5671                                 CTLFLAG_RD, &rxr->rx_copies,
5672                                 "Copied RX Frames");
5673                 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
5674                                 CTLFLAG_RD, &lro->lro_queued, 0,
5675                                 "LRO Queued");
5676                 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
5677                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5678                                 "LRO Flushed");
5679         }
5680
5681         /* MAC stats get the own sub node */
5682
5683         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5684                                     CTLFLAG_RD, NULL, "MAC Statistics");
5685         stat_list = SYSCTL_CHILDREN(stat_node);
5686
5687         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5688                         CTLFLAG_RD, &stats->crcerrs,
5689                         "CRC Errors");
5690         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
5691                         CTLFLAG_RD, &stats->illerrc,
5692                         "Illegal Byte Errors");
5693         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
5694                         CTLFLAG_RD, &stats->errbc,
5695                         "Byte Errors");
5696         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
5697                         CTLFLAG_RD, &stats->mspdc,
5698                         "MAC Short Packets Discarded");
5699         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
5700                         CTLFLAG_RD, &stats->mlfc,
5701                         "MAC Local Faults");
5702         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
5703                         CTLFLAG_RD, &stats->mrfc,
5704                         "MAC Remote Faults");
5705         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
5706                         CTLFLAG_RD, &stats->rlec,
5707                         "Receive Length Errors");
5708
5709         /* Flow Control stats */
5710         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5711                         CTLFLAG_RD, &stats->lxontxc,
5712                         "Link XON Transmitted");
5713         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5714                         CTLFLAG_RD, &stats->lxonrxc,
5715                         "Link XON Received");
5716         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5717                         CTLFLAG_RD, &stats->lxofftxc,
5718                         "Link XOFF Transmitted");
5719         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5720                         CTLFLAG_RD, &stats->lxoffrxc,
5721                         "Link XOFF Received");
5722
5723         /* Packet Reception Stats */
5724         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
5725                         CTLFLAG_RD, &stats->tor, 
5726                         "Total Octets Received"); 
5727         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
5728                         CTLFLAG_RD, &stats->gorc, 
5729                         "Good Octets Received"); 
5730         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
5731                         CTLFLAG_RD, &stats->tpr,
5732                         "Total Packets Received");
5733         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
5734                         CTLFLAG_RD, &stats->gprc,
5735                         "Good Packets Received");
5736         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
5737                         CTLFLAG_RD, &stats->mprc,
5738                         "Multicast Packets Received");
5739         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
5740                         CTLFLAG_RD, &stats->bprc,
5741                         "Broadcast Packets Received");
5742         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5743                         CTLFLAG_RD, &stats->prc64,
5744                         "64 byte frames received ");
5745         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5746                         CTLFLAG_RD, &stats->prc127,
5747                         "65-127 byte frames received");
5748         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5749                         CTLFLAG_RD, &stats->prc255,
5750                         "128-255 byte frames received");
5751         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5752                         CTLFLAG_RD, &stats->prc511,
5753                         "256-511 byte frames received");
5754         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5755                         CTLFLAG_RD, &stats->prc1023,
5756                         "512-1023 byte frames received");
5757         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5758                         CTLFLAG_RD, &stats->prc1522,
5759                         "1023-1522 byte frames received");
5760         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
5761                         CTLFLAG_RD, &stats->ruc,
5762                         "Receive Undersized");
5763         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5764                         CTLFLAG_RD, &stats->rfc,
5765                         "Fragmented Packets Received ");
5766         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
5767                         CTLFLAG_RD, &stats->roc,
5768                         "Oversized Packets Received");
5769         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
5770                         CTLFLAG_RD, &stats->rjc,
5771                         "Received Jabber");
5772         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
5773                         CTLFLAG_RD, &stats->mngprc,
5774                         "Management Packets Received");
5775         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
5776                         CTLFLAG_RD, &stats->mngptc,
5777                         "Management Packets Dropped");
5778         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
5779                         CTLFLAG_RD, &stats->xec,
5780                         "Checksum Errors");
5781
5782         /* Packet Transmission Stats */
5783         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5784                         CTLFLAG_RD, &stats->gotc, 
5785                         "Good Octets Transmitted"); 
5786         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5787                         CTLFLAG_RD, &stats->tpt,
5788                         "Total Packets Transmitted");
5789         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5790                         CTLFLAG_RD, &stats->gptc,
5791                         "Good Packets Transmitted");
5792         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5793                         CTLFLAG_RD, &stats->bptc,
5794                         "Broadcast Packets Transmitted");
5795         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5796                         CTLFLAG_RD, &stats->mptc,
5797                         "Multicast Packets Transmitted");
5798         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
5799                         CTLFLAG_RD, &stats->mngptc,
5800                         "Management Packets Transmitted");
5801         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5802                         CTLFLAG_RD, &stats->ptc64,
5803                         "64 byte frames transmitted ");
5804         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5805                         CTLFLAG_RD, &stats->ptc127,
5806                         "65-127 byte frames transmitted");
5807         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5808                         CTLFLAG_RD, &stats->ptc255,
5809                         "128-255 byte frames transmitted");
5810         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5811                         CTLFLAG_RD, &stats->ptc511,
5812                         "256-511 byte frames transmitted");
5813         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5814                         CTLFLAG_RD, &stats->ptc1023,
5815                         "512-1023 byte frames transmitted");
5816         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5817                         CTLFLAG_RD, &stats->ptc1522,
5818                         "1024-1522 byte frames transmitted");
5819 }
5820
5821 /*
5822 ** Set flow control using sysctl:
5823 ** Flow control values:
5824 **      0 - off
5825 **      1 - rx pause
5826 **      2 - tx pause
5827 **      3 - full
5828 */
5829 static int
5830 ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS)
5831 {
5832         int error, last;
5833         struct adapter *adapter = (struct adapter *) arg1;
5834
5835         last = adapter->fc;
5836         error = sysctl_handle_int(oidp, &adapter->fc, 0, req);
5837         if ((error) || (req->newptr == NULL))
5838                 return (error);
5839
5840         /* Don't bother if it's not changed */
5841         if (adapter->fc == last)
5842                 return (0);
5843
5844         switch (adapter->fc) {
5845                 case ixgbe_fc_rx_pause:
5846                 case ixgbe_fc_tx_pause:
5847                 case ixgbe_fc_full:
5848                         adapter->hw.fc.requested_mode = adapter->fc;
5849                         if (adapter->num_queues > 1)
5850                                 ixgbe_disable_rx_drop(adapter);
5851                         break;
5852                 case ixgbe_fc_none:
5853                         adapter->hw.fc.requested_mode = ixgbe_fc_none;
5854                         if (adapter->num_queues > 1)
5855                                 ixgbe_enable_rx_drop(adapter);
5856                         break;
5857                 default:
5858                         adapter->fc = last;
5859                         return (EINVAL);
5860         }
5861         /* Don't autoneg if forcing a value */
5862         adapter->hw.fc.disable_fc_autoneg = TRUE;
5863         ixgbe_fc_enable(&adapter->hw);
5864         return error;
5865 }
5866
5867 /*
5868 ** Control link advertise speed:
5869 **      1 - advertise only 1G
5870 **      2 - advertise 100Mb
5871 **      3 - advertise normal
5872 */
5873 static int
5874 ixgbe_set_advertise(SYSCTL_HANDLER_ARGS)
5875 {
5876         int                     error = 0;
5877         struct adapter          *adapter;
5878         device_t                dev;
5879         struct ixgbe_hw         *hw;
5880         ixgbe_link_speed        speed, last;
5881
5882         adapter = (struct adapter *) arg1;
5883         dev = adapter->dev;
5884         hw = &adapter->hw;
5885         last = adapter->advertise;
5886
5887         error = sysctl_handle_int(oidp, &adapter->advertise, 0, req);
5888         if ((error) || (req->newptr == NULL))
5889                 return (error);
5890
5891         if (adapter->advertise == last) /* no change */
5892                 return (0);
5893
5894         if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
5895             (hw->phy.multispeed_fiber)))
5896                 return (EINVAL);
5897
5898         if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
5899                 device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
5900                 return (EINVAL);
5901         }
5902
5903         if (adapter->advertise == 1)
5904                 speed = IXGBE_LINK_SPEED_1GB_FULL;
5905         else if (adapter->advertise == 2)
5906                 speed = IXGBE_LINK_SPEED_100_FULL;
5907         else if (adapter->advertise == 3)
5908                 speed = IXGBE_LINK_SPEED_1GB_FULL |
5909                         IXGBE_LINK_SPEED_10GB_FULL;
5910         else {  /* bogus value */
5911                 adapter->advertise = last;
5912                 return (EINVAL);
5913         }
5914
5915         hw->mac.autotry_restart = TRUE;
5916         hw->mac.ops.setup_link(hw, speed, TRUE);
5917
5918         return (error);
5919 }
5920
5921 /*
5922 ** Thermal Shutdown Trigger
5923 **   - cause a Thermal Overtemp IRQ
5924 **   - this now requires firmware enabling
5925 */
5926 static int
5927 ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS)
5928 {
5929         int             error, fire = 0;
5930         struct adapter  *adapter = (struct adapter *) arg1;
5931         struct ixgbe_hw *hw = &adapter->hw;
5932
5933
5934         if (hw->mac.type != ixgbe_mac_X540)
5935                 return (0);
5936
5937         error = sysctl_handle_int(oidp, &fire, 0, req);
5938         if ((error) || (req->newptr == NULL))
5939                 return (error);
5940
5941         if (fire) {
5942                 u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
5943                 reg |= IXGBE_EICR_TS;
5944                 IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
5945         }
5946
5947         return (0);
5948 }
5949
5950 /*
5951 ** Enable the hardware to drop packets when the buffer is
5952 ** full. This is useful when multiqueue,so that no single
5953 ** queue being full stalls the entire RX engine. We only
5954 ** enable this when Multiqueue AND when Flow Control is 
5955 ** disabled.
5956 */
5957 static void
5958 ixgbe_enable_rx_drop(struct adapter *adapter)
5959 {
5960         struct ixgbe_hw *hw = &adapter->hw;
5961
5962         for (int i = 0; i < adapter->num_queues; i++) {
5963                 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
5964                 srrctl |= IXGBE_SRRCTL_DROP_EN;
5965                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
5966         }
5967 }
5968
5969 static void
5970 ixgbe_disable_rx_drop(struct adapter *adapter)
5971 {
5972         struct ixgbe_hw *hw = &adapter->hw;
5973
5974         for (int i = 0; i < adapter->num_queues; i++) {
5975                 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
5976                 srrctl &= ~IXGBE_SRRCTL_DROP_EN;
5977                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
5978         }
5979 }