]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/e1000/if_em.c
Upgrade to Unbound 1.5.10.
[FreeBSD/FreeBSD.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_var.h>
74 #include <net/if_arp.h>
75 #include <net/if_dl.h>
76 #include <net/if_media.h>
77
78 #include <net/if_types.h>
79 #include <net/if_vlan_var.h>
80
81 #include <netinet/in_systm.h>
82 #include <netinet/in.h>
83 #include <netinet/if_ether.h>
84 #include <netinet/ip.h>
85 #include <netinet/ip6.h>
86 #include <netinet/tcp.h>
87 #include <netinet/udp.h>
88
89 #include <machine/in_cksum.h>
90 #include <dev/led/led.h>
91 #include <dev/pci/pcivar.h>
92 #include <dev/pci/pcireg.h>
93
94 #include "e1000_api.h"
95 #include "e1000_82571.h"
96 #include "if_em.h"
97
98 /*********************************************************************
99  *  Driver version:
100  *********************************************************************/
101 char em_driver_version[] = "7.6.1-k";
102
103 /*********************************************************************
104  *  PCI Device ID Table
105  *
106  *  Used by probe to select devices to load on
107  *  Last field stores an index into e1000_strings
108  *  Last entry must be all 0s
109  *
110  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
111  *********************************************************************/
112
113 static em_vendor_info_t em_vendor_info_array[] =
114 {
115         /* Intel(R) PRO/1000 Network Connection */
116         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
117         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
119         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
120                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
122                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
124                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
126                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
128                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
130                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
135
136         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
141                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
143                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
145                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
147                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
175         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
176         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
177         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
178         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
179         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
180         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
181         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
182                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
183         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
184                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
185         { 0x8086, E1000_DEV_ID_PCH_I218_LM2,    PCI_ANY_ID, PCI_ANY_ID, 0},
186         { 0x8086, E1000_DEV_ID_PCH_I218_V2,     PCI_ANY_ID, PCI_ANY_ID, 0},
187         { 0x8086, E1000_DEV_ID_PCH_I218_LM3,    PCI_ANY_ID, PCI_ANY_ID, 0},
188         { 0x8086, E1000_DEV_ID_PCH_I218_V3,     PCI_ANY_ID, PCI_ANY_ID, 0},
189         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
190         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
191         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
192                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
193         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
194         { 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
195                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
196         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
197                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
198         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
199         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
200                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
201         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
202         /* required last entry */
203         { 0, 0, 0, 0, 0}
204 };
205
206 /*********************************************************************
207  *  Table of branding strings for all supported NICs.
208  *********************************************************************/
209
210 static char *em_strings[] = {
211         "Intel(R) PRO/1000 Network Connection"
212 };
213
214 /*********************************************************************
215  *  Function prototypes
216  *********************************************************************/
217 static int      em_probe(device_t);
218 static int      em_attach(device_t);
219 static int      em_detach(device_t);
220 static int      em_shutdown(device_t);
221 static int      em_suspend(device_t);
222 static int      em_resume(device_t);
223 #ifdef EM_MULTIQUEUE
224 static int      em_mq_start(if_t, struct mbuf *);
225 static int      em_mq_start_locked(if_t,
226                     struct tx_ring *);
227 static void     em_qflush(if_t);
228 #else
229 static void     em_start(if_t);
230 static void     em_start_locked(if_t, struct tx_ring *);
231 #endif
232 static int      em_ioctl(if_t, u_long, caddr_t);
233 static uint64_t em_get_counter(if_t, ift_counter);
234 static void     em_init(void *);
235 static void     em_init_locked(struct adapter *);
236 static void     em_stop(void *);
237 static void     em_media_status(if_t, struct ifmediareq *);
238 static int      em_media_change(if_t);
239 static void     em_identify_hardware(struct adapter *);
240 static int      em_allocate_pci_resources(struct adapter *);
241 static int      em_allocate_legacy(struct adapter *);
242 static int      em_allocate_msix(struct adapter *);
243 static int      em_allocate_queues(struct adapter *);
244 static int      em_setup_msix(struct adapter *);
245 static void     em_free_pci_resources(struct adapter *);
246 static void     em_local_timer(void *);
247 static void     em_reset(struct adapter *);
248 static int      em_setup_interface(device_t, struct adapter *);
249 static void     em_flush_desc_rings(struct adapter *);
250
251 static void     em_setup_transmit_structures(struct adapter *);
252 static void     em_initialize_transmit_unit(struct adapter *);
253 static int      em_allocate_transmit_buffers(struct tx_ring *);
254 static void     em_free_transmit_structures(struct adapter *);
255 static void     em_free_transmit_buffers(struct tx_ring *);
256
257 static int      em_setup_receive_structures(struct adapter *);
258 static int      em_allocate_receive_buffers(struct rx_ring *);
259 static void     em_initialize_receive_unit(struct adapter *);
260 static void     em_free_receive_structures(struct adapter *);
261 static void     em_free_receive_buffers(struct rx_ring *);
262
263 static void     em_enable_intr(struct adapter *);
264 static void     em_disable_intr(struct adapter *);
265 static void     em_update_stats_counters(struct adapter *);
266 static void     em_add_hw_stats(struct adapter *adapter);
267 static void     em_txeof(struct tx_ring *);
268 static bool     em_rxeof(struct rx_ring *, int, int *);
269 #ifndef __NO_STRICT_ALIGNMENT
270 static int      em_fixup_rx(struct rx_ring *);
271 #endif
272 static void     em_setup_rxdesc(union e1000_rx_desc_extended *,
273                     const struct em_rxbuffer *rxbuf);
274 static void     em_receive_checksum(uint32_t status, struct mbuf *);
275 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
276                     struct ip *, u32 *, u32 *);
277 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
278                     struct tcphdr *, u32 *, u32 *);
279 static void     em_set_promisc(struct adapter *);
280 static void     em_disable_promisc(struct adapter *);
281 static void     em_set_multi(struct adapter *);
282 static void     em_update_link_status(struct adapter *);
283 static void     em_refresh_mbufs(struct rx_ring *, int);
284 static void     em_register_vlan(void *, if_t, u16);
285 static void     em_unregister_vlan(void *, if_t, u16);
286 static void     em_setup_vlan_hw_support(struct adapter *);
287 static int      em_xmit(struct tx_ring *, struct mbuf **);
288 static int      em_dma_malloc(struct adapter *, bus_size_t,
289                     struct em_dma_alloc *, int);
290 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
291 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
292 static void     em_print_nvm_info(struct adapter *);
293 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
294 static void     em_print_debug_info(struct adapter *);
295 static int      em_is_valid_ether_addr(u8 *);
296 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
297 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
298                     const char *, struct em_int_delay_info *, int, int);
299 /* Management and WOL Support */
300 static void     em_init_manageability(struct adapter *);
301 static void     em_release_manageability(struct adapter *);
302 static void     em_get_hw_control(struct adapter *);
303 static void     em_release_hw_control(struct adapter *);
304 static void     em_get_wakeup(device_t);
305 static void     em_enable_wakeup(device_t);
306 static int      em_enable_phy_wakeup(struct adapter *);
307 static void     em_led_func(void *, int);
308 static void     em_disable_aspm(struct adapter *);
309
310 static int      em_irq_fast(void *);
311
312 /* MSIX handlers */
313 static void     em_msix_tx(void *);
314 static void     em_msix_rx(void *);
315 static void     em_msix_link(void *);
316 static void     em_handle_tx(void *context, int pending);
317 static void     em_handle_rx(void *context, int pending);
318 static void     em_handle_link(void *context, int pending);
319
320 #ifdef EM_MULTIQUEUE
321 static void     em_enable_vectors_82574(struct adapter *);
322 #endif
323
324 static void     em_set_sysctl_value(struct adapter *, const char *,
325                     const char *, int *, int);
326 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
327 static int      em_sysctl_eee(SYSCTL_HANDLER_ARGS);
328
329 static __inline void em_rx_discard(struct rx_ring *, int);
330
331 #ifdef DEVICE_POLLING
332 static poll_handler_t em_poll;
333 #endif /* POLLING */
334
335 /*********************************************************************
336  *  FreeBSD Device Interface Entry Points
337  *********************************************************************/
338
339 static device_method_t em_methods[] = {
340         /* Device interface */
341         DEVMETHOD(device_probe, em_probe),
342         DEVMETHOD(device_attach, em_attach),
343         DEVMETHOD(device_detach, em_detach),
344         DEVMETHOD(device_shutdown, em_shutdown),
345         DEVMETHOD(device_suspend, em_suspend),
346         DEVMETHOD(device_resume, em_resume),
347         DEVMETHOD_END
348 };
349
350 static driver_t em_driver = {
351         "em", em_methods, sizeof(struct adapter),
352 };
353
354 devclass_t em_devclass;
355 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
356 MODULE_DEPEND(em, pci, 1, 1, 1);
357 MODULE_DEPEND(em, ether, 1, 1, 1);
358 #ifdef DEV_NETMAP
359 MODULE_DEPEND(em, netmap, 1, 1, 1);
360 #endif /* DEV_NETMAP */
361
362 /*********************************************************************
363  *  Tunable default values.
364  *********************************************************************/
365
366 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
367 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
368 #define M_TSO_LEN                       66
369
370 #define MAX_INTS_PER_SEC        8000
371 #define DEFAULT_ITR             (1000000000/(MAX_INTS_PER_SEC * 256))
372
373 /* Allow common code without TSO */
374 #ifndef CSUM_TSO
375 #define CSUM_TSO        0
376 #endif
377
378 #define TSO_WORKAROUND  4
379
380 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
381
382 static int em_disable_crc_stripping = 0;
383 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
384     &em_disable_crc_stripping, 0, "Disable CRC Stripping");
385
386 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
387 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
388 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
389     0, "Default transmit interrupt delay in usecs");
390 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
391     0, "Default receive interrupt delay in usecs");
392
393 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
394 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
395 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
396     &em_tx_abs_int_delay_dflt, 0,
397     "Default transmit interrupt delay limit in usecs");
398 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
399     &em_rx_abs_int_delay_dflt, 0,
400     "Default receive interrupt delay limit in usecs");
401
402 static int em_rxd = EM_DEFAULT_RXD;
403 static int em_txd = EM_DEFAULT_TXD;
404 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
405     "Number of receive descriptors per queue");
406 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
407     "Number of transmit descriptors per queue");
408
409 static int em_smart_pwr_down = FALSE;
410 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
411     0, "Set to true to leave smart power down enabled on newer adapters");
412
413 /* Controls whether promiscuous also shows bad packets */
414 static int em_debug_sbp = FALSE;
415 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
416     "Show bad packets in promiscuous mode");
417
418 static int em_enable_msix = TRUE;
419 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
420     "Enable MSI-X interrupts");
421
422 #ifdef EM_MULTIQUEUE
423 static int em_num_queues = 1;
424 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
425     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
426 #endif
427
428 /*
429 ** Global variable to store last used CPU when binding queues
430 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
431 ** queue is bound to a cpu.
432 */
433 static int em_last_bind_cpu = -1;
434
435 /* How many packets rxeof tries to clean at a time */
436 static int em_rx_process_limit = 100;
437 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
438     &em_rx_process_limit, 0,
439     "Maximum number of received packets to process "
440     "at a time, -1 means unlimited");
441
442 /* Energy efficient ethernet - default to OFF */
443 static int eee_setting = 1;
444 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
445     "Enable Energy Efficient Ethernet");
446
447 /* Global used in WOL setup with multiport cards */
448 static int global_quad_port_a = 0;
449
450 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
451 #include <dev/netmap/if_em_netmap.h>
452 #endif /* DEV_NETMAP */
453
454 /*********************************************************************
455  *  Device identification routine
456  *
457  *  em_probe determines if the driver should be loaded on
458  *  adapter based on PCI vendor/device id of the adapter.
459  *
460  *  return BUS_PROBE_DEFAULT on success, positive on failure
461  *********************************************************************/
462
463 static int
464 em_probe(device_t dev)
465 {
466         char            adapter_name[60];
467         uint16_t        pci_vendor_id = 0;
468         uint16_t        pci_device_id = 0;
469         uint16_t        pci_subvendor_id = 0;
470         uint16_t        pci_subdevice_id = 0;
471         em_vendor_info_t *ent;
472
473         INIT_DEBUGOUT("em_probe: begin");
474
475         pci_vendor_id = pci_get_vendor(dev);
476         if (pci_vendor_id != EM_VENDOR_ID)
477                 return (ENXIO);
478
479         pci_device_id = pci_get_device(dev);
480         pci_subvendor_id = pci_get_subvendor(dev);
481         pci_subdevice_id = pci_get_subdevice(dev);
482
483         ent = em_vendor_info_array;
484         while (ent->vendor_id != 0) {
485                 if ((pci_vendor_id == ent->vendor_id) &&
486                     (pci_device_id == ent->device_id) &&
487
488                     ((pci_subvendor_id == ent->subvendor_id) ||
489                     (ent->subvendor_id == PCI_ANY_ID)) &&
490
491                     ((pci_subdevice_id == ent->subdevice_id) ||
492                     (ent->subdevice_id == PCI_ANY_ID))) {
493                         sprintf(adapter_name, "%s %s",
494                                 em_strings[ent->index],
495                                 em_driver_version);
496                         device_set_desc_copy(dev, adapter_name);
497                         return (BUS_PROBE_DEFAULT);
498                 }
499                 ent++;
500         }
501
502         return (ENXIO);
503 }
504
505 /*********************************************************************
506  *  Device initialization routine
507  *
508  *  The attach entry point is called when the driver is being loaded.
509  *  This routine identifies the type of hardware, allocates all resources
510  *  and initializes the hardware.
511  *
512  *  return 0 on success, positive on failure
513  *********************************************************************/
514
515 static int
516 em_attach(device_t dev)
517 {
518         struct adapter  *adapter;
519         struct e1000_hw *hw;
520         int             error = 0;
521
522         INIT_DEBUGOUT("em_attach: begin");
523
524         if (resource_disabled("em", device_get_unit(dev))) {
525                 device_printf(dev, "Disabled by device hint\n");
526                 return (ENXIO);
527         }
528
529         adapter = device_get_softc(dev);
530         adapter->dev = adapter->osdep.dev = dev;
531         hw = &adapter->hw;
532         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
533
534         /* SYSCTL stuff */
535         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
536             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
537             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
538             em_sysctl_nvm_info, "I", "NVM Information");
539
540         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
541             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
542             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
543             em_sysctl_debug_info, "I", "Debug Information");
544
545         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
546             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
547             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
548             em_set_flowcntl, "I", "Flow Control");
549
550         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
551
552         /* Determine hardware and mac info */
553         em_identify_hardware(adapter);
554
555         /* Setup PCI resources */
556         if (em_allocate_pci_resources(adapter)) {
557                 device_printf(dev, "Allocation of PCI resources failed\n");
558                 error = ENXIO;
559                 goto err_pci;
560         }
561
562         /*
563         ** For ICH8 and family we need to
564         ** map the flash memory, and this
565         ** must happen after the MAC is 
566         ** identified
567         */
568         if ((hw->mac.type == e1000_ich8lan) ||
569             (hw->mac.type == e1000_ich9lan) ||
570             (hw->mac.type == e1000_ich10lan) ||
571             (hw->mac.type == e1000_pchlan) ||
572             (hw->mac.type == e1000_pch2lan) ||
573             (hw->mac.type == e1000_pch_lpt)) {
574                 int rid = EM_BAR_TYPE_FLASH;
575                 adapter->flash = bus_alloc_resource_any(dev,
576                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
577                 if (adapter->flash == NULL) {
578                         device_printf(dev, "Mapping of Flash failed\n");
579                         error = ENXIO;
580                         goto err_pci;
581                 }
582                 /* This is used in the shared code */
583                 hw->flash_address = (u8 *)adapter->flash;
584                 adapter->osdep.flash_bus_space_tag =
585                     rman_get_bustag(adapter->flash);
586                 adapter->osdep.flash_bus_space_handle =
587                     rman_get_bushandle(adapter->flash);
588         }
589         /*
590         ** In the new SPT device flash is not  a
591         ** separate BAR, rather it is also in BAR0,
592         ** so use the same tag and an offset handle for the
593         ** FLASH read/write macros in the shared code.
594         */
595         else if (hw->mac.type == e1000_pch_spt) {
596                 adapter->osdep.flash_bus_space_tag =
597                     adapter->osdep.mem_bus_space_tag;
598                 adapter->osdep.flash_bus_space_handle =
599                     adapter->osdep.mem_bus_space_handle
600                     + E1000_FLASH_BASE_ADDR;
601         }
602
603         /* Do Shared Code initialization */
604         error = e1000_setup_init_funcs(hw, TRUE);
605         if (error) {
606                 device_printf(dev, "Setup of Shared code failed, error %d\n",
607                     error);
608                 error = ENXIO;
609                 goto err_pci;
610         }
611
612         /*
613          * Setup MSI/X or MSI if PCI Express
614          */
615         adapter->msix = em_setup_msix(adapter);
616
617         e1000_get_bus_info(hw);
618
619         /* Set up some sysctls for the tunable interrupt delays */
620         em_add_int_delay_sysctl(adapter, "rx_int_delay",
621             "receive interrupt delay in usecs", &adapter->rx_int_delay,
622             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
623         em_add_int_delay_sysctl(adapter, "tx_int_delay",
624             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
625             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
626         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
627             "receive interrupt delay limit in usecs",
628             &adapter->rx_abs_int_delay,
629             E1000_REGISTER(hw, E1000_RADV),
630             em_rx_abs_int_delay_dflt);
631         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
632             "transmit interrupt delay limit in usecs",
633             &adapter->tx_abs_int_delay,
634             E1000_REGISTER(hw, E1000_TADV),
635             em_tx_abs_int_delay_dflt);
636         em_add_int_delay_sysctl(adapter, "itr",
637             "interrupt delay limit in usecs/4",
638             &adapter->tx_itr,
639             E1000_REGISTER(hw, E1000_ITR),
640             DEFAULT_ITR);
641
642         /* Sysctl for limiting the amount of work done in the taskqueue */
643         em_set_sysctl_value(adapter, "rx_processing_limit",
644             "max number of rx packets to process", &adapter->rx_process_limit,
645             em_rx_process_limit);
646
647         /*
648          * Validate number of transmit and receive descriptors. It
649          * must not exceed hardware maximum, and must be multiple
650          * of E1000_DBA_ALIGN.
651          */
652         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
653             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
654                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
655                     EM_DEFAULT_TXD, em_txd);
656                 adapter->num_tx_desc = EM_DEFAULT_TXD;
657         } else
658                 adapter->num_tx_desc = em_txd;
659
660         if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
661             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
662                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
663                     EM_DEFAULT_RXD, em_rxd);
664                 adapter->num_rx_desc = EM_DEFAULT_RXD;
665         } else
666                 adapter->num_rx_desc = em_rxd;
667
668         hw->mac.autoneg = DO_AUTO_NEG;
669         hw->phy.autoneg_wait_to_complete = FALSE;
670         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
671
672         /* Copper options */
673         if (hw->phy.media_type == e1000_media_type_copper) {
674                 hw->phy.mdix = AUTO_ALL_MODES;
675                 hw->phy.disable_polarity_correction = FALSE;
676                 hw->phy.ms_type = EM_MASTER_SLAVE;
677         }
678
679         /*
680          * Set the frame limits assuming
681          * standard ethernet sized frames.
682          */
683         adapter->hw.mac.max_frame_size =
684             ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
685
686         /*
687          * This controls when hardware reports transmit completion
688          * status.
689          */
690         hw->mac.report_tx_early = 1;
691
692         /* 
693         ** Get queue/ring memory
694         */
695         if (em_allocate_queues(adapter)) {
696                 error = ENOMEM;
697                 goto err_pci;
698         }
699
700         /* Allocate multicast array memory. */
701         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
702             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
703         if (adapter->mta == NULL) {
704                 device_printf(dev, "Can not allocate multicast setup array\n");
705                 error = ENOMEM;
706                 goto err_late;
707         }
708
709         /* Check SOL/IDER usage */
710         if (e1000_check_reset_block(hw))
711                 device_printf(dev, "PHY reset is blocked"
712                     " due to SOL/IDER session.\n");
713
714         /* Sysctl for setting Energy Efficient Ethernet */
715         hw->dev_spec.ich8lan.eee_disable = eee_setting;
716         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
717             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
718             OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
719             adapter, 0, em_sysctl_eee, "I",
720             "Disable Energy Efficient Ethernet");
721
722         /*
723         ** Start from a known state, this is
724         ** important in reading the nvm and
725         ** mac from that.
726         */
727         e1000_reset_hw(hw);
728
729
730         /* Make sure we have a good EEPROM before we read from it */
731         if (e1000_validate_nvm_checksum(hw) < 0) {
732                 /*
733                 ** Some PCI-E parts fail the first check due to
734                 ** the link being in sleep state, call it again,
735                 ** if it fails a second time its a real issue.
736                 */
737                 if (e1000_validate_nvm_checksum(hw) < 0) {
738                         device_printf(dev,
739                             "The EEPROM Checksum Is Not Valid\n");
740                         error = EIO;
741                         goto err_late;
742                 }
743         }
744
745         /* Copy the permanent MAC address out of the EEPROM */
746         if (e1000_read_mac_addr(hw) < 0) {
747                 device_printf(dev, "EEPROM read error while reading MAC"
748                     " address\n");
749                 error = EIO;
750                 goto err_late;
751         }
752
753         if (!em_is_valid_ether_addr(hw->mac.addr)) {
754                 device_printf(dev, "Invalid MAC address\n");
755                 error = EIO;
756                 goto err_late;
757         }
758
759         /* Disable ULP support */
760         e1000_disable_ulp_lpt_lp(hw, TRUE);
761
762         /*
763         **  Do interrupt configuration
764         */
765         if (adapter->msix > 1) /* Do MSIX */
766                 error = em_allocate_msix(adapter);
767         else  /* MSI or Legacy */
768                 error = em_allocate_legacy(adapter);
769         if (error)
770                 goto err_late;
771
772         /*
773          * Get Wake-on-Lan and Management info for later use
774          */
775         em_get_wakeup(dev);
776
777         /* Setup OS specific network interface */
778         if (em_setup_interface(dev, adapter) != 0)
779                 goto err_late;
780
781         em_reset(adapter);
782
783         /* Initialize statistics */
784         em_update_stats_counters(adapter);
785
786         hw->mac.get_link_status = 1;
787         em_update_link_status(adapter);
788
789         /* Register for VLAN events */
790         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
791             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
792         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
793             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
794
795         em_add_hw_stats(adapter);
796
797         /* Non-AMT based hardware can now take control from firmware */
798         if (adapter->has_manage && !adapter->has_amt)
799                 em_get_hw_control(adapter);
800
801         /* Tell the stack that the interface is not active */
802         if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
803
804         adapter->led_dev = led_create(em_led_func, adapter,
805             device_get_nameunit(dev));
806 #ifdef DEV_NETMAP
807         em_netmap_attach(adapter);
808 #endif /* DEV_NETMAP */
809
810         INIT_DEBUGOUT("em_attach: end");
811
812         return (0);
813
814 err_late:
815         em_free_transmit_structures(adapter);
816         em_free_receive_structures(adapter);
817         em_release_hw_control(adapter);
818         if (adapter->ifp != (void *)NULL)
819                 if_free(adapter->ifp);
820 err_pci:
821         em_free_pci_resources(adapter);
822         free(adapter->mta, M_DEVBUF);
823         EM_CORE_LOCK_DESTROY(adapter);
824
825         return (error);
826 }
827
828 /*********************************************************************
829  *  Device removal routine
830  *
831  *  The detach entry point is called when the driver is being removed.
832  *  This routine stops the adapter and deallocates all the resources
833  *  that were allocated for driver operation.
834  *
835  *  return 0 on success, positive on failure
836  *********************************************************************/
837
838 static int
839 em_detach(device_t dev)
840 {
841         struct adapter  *adapter = device_get_softc(dev);
842         if_t ifp = adapter->ifp;
843
844         INIT_DEBUGOUT("em_detach: begin");
845
846         /* Make sure VLANS are not using driver */
847         if (if_vlantrunkinuse(ifp)) {
848                 device_printf(dev,"Vlan in use, detach first\n");
849                 return (EBUSY);
850         }
851
852 #ifdef DEVICE_POLLING
853         if (if_getcapenable(ifp) & IFCAP_POLLING)
854                 ether_poll_deregister(ifp);
855 #endif
856
857         if (adapter->led_dev != NULL)
858                 led_destroy(adapter->led_dev);
859
860         EM_CORE_LOCK(adapter);
861         adapter->in_detach = 1;
862         em_stop(adapter);
863         EM_CORE_UNLOCK(adapter);
864         EM_CORE_LOCK_DESTROY(adapter);
865
866         e1000_phy_hw_reset(&adapter->hw);
867
868         em_release_manageability(adapter);
869         em_release_hw_control(adapter);
870
871         /* Unregister VLAN events */
872         if (adapter->vlan_attach != NULL)
873                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
874         if (adapter->vlan_detach != NULL)
875                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
876
877         ether_ifdetach(adapter->ifp);
878         callout_drain(&adapter->timer);
879
880 #ifdef DEV_NETMAP
881         netmap_detach(ifp);
882 #endif /* DEV_NETMAP */
883
884         em_free_pci_resources(adapter);
885         bus_generic_detach(dev);
886         if_free(ifp);
887
888         em_free_transmit_structures(adapter);
889         em_free_receive_structures(adapter);
890
891         em_release_hw_control(adapter);
892         free(adapter->mta, M_DEVBUF);
893
894         return (0);
895 }
896
897 /*********************************************************************
898  *
899  *  Shutdown entry point
900  *
901  **********************************************************************/
902
903 static int
904 em_shutdown(device_t dev)
905 {
906         return em_suspend(dev);
907 }
908
909 /*
910  * Suspend/resume device methods.
911  */
912 static int
913 em_suspend(device_t dev)
914 {
915         struct adapter *adapter = device_get_softc(dev);
916
917         EM_CORE_LOCK(adapter);
918
919         em_release_manageability(adapter);
920         em_release_hw_control(adapter);
921         em_enable_wakeup(dev);
922
923         EM_CORE_UNLOCK(adapter);
924
925         return bus_generic_suspend(dev);
926 }
927
928 static int
929 em_resume(device_t dev)
930 {
931         struct adapter *adapter = device_get_softc(dev);
932         struct tx_ring  *txr = adapter->tx_rings;
933         if_t ifp = adapter->ifp;
934
935         EM_CORE_LOCK(adapter);
936         if (adapter->hw.mac.type == e1000_pch2lan)
937                 e1000_resume_workarounds_pchlan(&adapter->hw);
938         em_init_locked(adapter);
939         em_init_manageability(adapter);
940
941         if ((if_getflags(ifp) & IFF_UP) &&
942             (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
943                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
944                         EM_TX_LOCK(txr);
945 #ifdef EM_MULTIQUEUE
946                         if (!drbr_empty(ifp, txr->br))
947                                 em_mq_start_locked(ifp, txr);
948 #else
949                         if (!if_sendq_empty(ifp))
950                                 em_start_locked(ifp, txr);
951 #endif
952                         EM_TX_UNLOCK(txr);
953                 }
954         }
955         EM_CORE_UNLOCK(adapter);
956
957         return bus_generic_resume(dev);
958 }
959
960
961 #ifndef EM_MULTIQUEUE
962 static void
963 em_start_locked(if_t ifp, struct tx_ring *txr)
964 {
965         struct adapter  *adapter = if_getsoftc(ifp);
966         struct mbuf     *m_head;
967
968         EM_TX_LOCK_ASSERT(txr);
969
970         if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
971             IFF_DRV_RUNNING)
972                 return;
973
974         if (!adapter->link_active)
975                 return;
976
977         while (!if_sendq_empty(ifp)) {
978                 /* Call cleanup if number of TX descriptors low */
979                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
980                         em_txeof(txr);
981                 if (txr->tx_avail < EM_MAX_SCATTER) {
982                         if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
983                         break;
984                 }
985                 m_head = if_dequeue(ifp);
986                 if (m_head == NULL)
987                         break;
988                 /*
989                  *  Encapsulation can modify our pointer, and or make it
990                  *  NULL on failure.  In that event, we can't requeue.
991                  */
992                 if (em_xmit(txr, &m_head)) {
993                         if (m_head == NULL)
994                                 break;
995                         if_sendq_prepend(ifp, m_head);
996                         break;
997                 }
998
999                 /* Mark the queue as having work */
1000                 if (txr->busy == EM_TX_IDLE)
1001                         txr->busy = EM_TX_BUSY;
1002
1003                 /* Send a copy of the frame to the BPF listener */
1004                 ETHER_BPF_MTAP(ifp, m_head);
1005
1006         }
1007
1008         return;
1009 }
1010
1011 static void
1012 em_start(if_t ifp)
1013 {
1014         struct adapter  *adapter = if_getsoftc(ifp);
1015         struct tx_ring  *txr = adapter->tx_rings;
1016
1017         if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1018                 EM_TX_LOCK(txr);
1019                 em_start_locked(ifp, txr);
1020                 EM_TX_UNLOCK(txr);
1021         }
1022         return;
1023 }
1024 #else /* EM_MULTIQUEUE */
1025 /*********************************************************************
1026  *  Multiqueue Transmit routines 
1027  *
1028  *  em_mq_start is called by the stack to initiate a transmit.
1029  *  however, if busy the driver can queue the request rather
1030  *  than do an immediate send. It is this that is an advantage
1031  *  in this driver, rather than also having multiple tx queues.
1032  **********************************************************************/
1033 /*
1034 ** Multiqueue capable stack interface
1035 */
1036 static int
1037 em_mq_start(if_t ifp, struct mbuf *m)
1038 {
1039         struct adapter  *adapter = if_getsoftc(ifp);
1040         struct tx_ring  *txr = adapter->tx_rings;
1041         unsigned int    i, error;
1042
1043         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1044                 i = m->m_pkthdr.flowid % adapter->num_queues;
1045         else
1046                 i = curcpu % adapter->num_queues;
1047
1048         txr = &adapter->tx_rings[i];
1049
1050         error = drbr_enqueue(ifp, txr->br, m);
1051         if (error)
1052                 return (error);
1053
1054         if (EM_TX_TRYLOCK(txr)) {
1055                 em_mq_start_locked(ifp, txr);
1056                 EM_TX_UNLOCK(txr);
1057         } else 
1058                 taskqueue_enqueue(txr->tq, &txr->tx_task);
1059
1060         return (0);
1061 }
1062
1063 static int
1064 em_mq_start_locked(if_t ifp, struct tx_ring *txr)
1065 {
1066         struct adapter  *adapter = txr->adapter;
1067         struct mbuf     *next;
1068         int             err = 0, enq = 0;
1069
1070         EM_TX_LOCK_ASSERT(txr);
1071
1072         if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
1073             adapter->link_active == 0) {
1074                 return (ENETDOWN);
1075         }
1076
1077         /* Process the queue */
1078         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1079                 if ((err = em_xmit(txr, &next)) != 0) {
1080                         if (next == NULL) {
1081                                 /* It was freed, move forward */
1082                                 drbr_advance(ifp, txr->br);
1083                         } else {
1084                                 /* 
1085                                  * Still have one left, it may not be
1086                                  * the same since the transmit function
1087                                  * may have changed it.
1088                                  */
1089                                 drbr_putback(ifp, txr->br, next);
1090                         }
1091                         break;
1092                 }
1093                 drbr_advance(ifp, txr->br);
1094                 enq++;
1095                 if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
1096                 if (next->m_flags & M_MCAST)
1097                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1098                 ETHER_BPF_MTAP(ifp, next);
1099                 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
1100                         break;
1101         }
1102
1103         /* Mark the queue as having work */
1104         if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1105                 txr->busy = EM_TX_BUSY;
1106
1107         if (txr->tx_avail < EM_MAX_SCATTER)
1108                 em_txeof(txr);
1109         if (txr->tx_avail < EM_MAX_SCATTER) {
1110                 if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
1111         }
1112         return (err);
1113 }
1114
1115 /*
1116 ** Flush all ring buffers
1117 */
1118 static void
1119 em_qflush(if_t ifp)
1120 {
1121         struct adapter  *adapter = if_getsoftc(ifp);
1122         struct tx_ring  *txr = adapter->tx_rings;
1123         struct mbuf     *m;
1124
1125         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1126                 EM_TX_LOCK(txr);
1127                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1128                         m_freem(m);
1129                 EM_TX_UNLOCK(txr);
1130         }
1131         if_qflush(ifp);
1132 }
1133 #endif /* EM_MULTIQUEUE */
1134
1135 /*********************************************************************
1136  *  Ioctl entry point
1137  *
1138  *  em_ioctl is called when the user wants to configure the
1139  *  interface.
1140  *
1141  *  return 0 on success, positive on failure
1142  **********************************************************************/
1143
1144 static int
1145 em_ioctl(if_t ifp, u_long command, caddr_t data)
1146 {
1147         struct adapter  *adapter = if_getsoftc(ifp);
1148         struct ifreq    *ifr = (struct ifreq *)data;
1149 #if defined(INET) || defined(INET6)
1150         struct ifaddr   *ifa = (struct ifaddr *)data;
1151 #endif
1152         bool            avoid_reset = FALSE;
1153         int             error = 0;
1154
1155         if (adapter->in_detach)
1156                 return (error);
1157
1158         switch (command) {
1159         case SIOCSIFADDR:
1160 #ifdef INET
1161                 if (ifa->ifa_addr->sa_family == AF_INET)
1162                         avoid_reset = TRUE;
1163 #endif
1164 #ifdef INET6
1165                 if (ifa->ifa_addr->sa_family == AF_INET6)
1166                         avoid_reset = TRUE;
1167 #endif
1168                 /*
1169                 ** Calling init results in link renegotiation,
1170                 ** so we avoid doing it when possible.
1171                 */
1172                 if (avoid_reset) {
1173                         if_setflagbits(ifp,IFF_UP,0);
1174                         if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1175                                 em_init(adapter);
1176 #ifdef INET
1177                         if (!(if_getflags(ifp) & IFF_NOARP))
1178                                 arp_ifinit(ifp, ifa);
1179 #endif
1180                 } else
1181                         error = ether_ioctl(ifp, command, data);
1182                 break;
1183         case SIOCSIFMTU:
1184             {
1185                 int max_frame_size;
1186
1187                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1188
1189                 EM_CORE_LOCK(adapter);
1190                 switch (adapter->hw.mac.type) {
1191                 case e1000_82571:
1192                 case e1000_82572:
1193                 case e1000_ich9lan:
1194                 case e1000_ich10lan:
1195                 case e1000_pch2lan:
1196                 case e1000_pch_lpt:
1197                 case e1000_pch_spt:
1198                 case e1000_82574:
1199                 case e1000_82583:
1200                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1201                         max_frame_size = 9234;
1202                         break;
1203                 case e1000_pchlan:
1204                         max_frame_size = 4096;
1205                         break;
1206                         /* Adapters that do not support jumbo frames */
1207                 case e1000_ich8lan:
1208                         max_frame_size = ETHER_MAX_LEN;
1209                         break;
1210                 default:
1211                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1212                 }
1213                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1214                     ETHER_CRC_LEN) {
1215                         EM_CORE_UNLOCK(adapter);
1216                         error = EINVAL;
1217                         break;
1218                 }
1219
1220                 if_setmtu(ifp, ifr->ifr_mtu);
1221                 adapter->hw.mac.max_frame_size =
1222                     if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1223                 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1224                         em_init_locked(adapter);
1225                 EM_CORE_UNLOCK(adapter);
1226                 break;
1227             }
1228         case SIOCSIFFLAGS:
1229                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1230                     SIOCSIFFLAGS (Set Interface Flags)");
1231                 EM_CORE_LOCK(adapter);
1232                 if (if_getflags(ifp) & IFF_UP) {
1233                         if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1234                                 if ((if_getflags(ifp) ^ adapter->if_flags) &
1235                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1236                                         em_disable_promisc(adapter);
1237                                         em_set_promisc(adapter);
1238                                 }
1239                         } else
1240                                 em_init_locked(adapter);
1241                 } else
1242                         if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1243                                 em_stop(adapter);
1244                 adapter->if_flags = if_getflags(ifp);
1245                 EM_CORE_UNLOCK(adapter);
1246                 break;
1247         case SIOCADDMULTI:
1248         case SIOCDELMULTI:
1249                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1250                 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1251                         EM_CORE_LOCK(adapter);
1252                         em_disable_intr(adapter);
1253                         em_set_multi(adapter);
1254 #ifdef DEVICE_POLLING
1255                         if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1256 #endif
1257                                 em_enable_intr(adapter);
1258                         EM_CORE_UNLOCK(adapter);
1259                 }
1260                 break;
1261         case SIOCSIFMEDIA:
1262                 /* Check SOL/IDER usage */
1263                 EM_CORE_LOCK(adapter);
1264                 if (e1000_check_reset_block(&adapter->hw)) {
1265                         EM_CORE_UNLOCK(adapter);
1266                         device_printf(adapter->dev, "Media change is"
1267                             " blocked due to SOL/IDER session.\n");
1268                         break;
1269                 }
1270                 EM_CORE_UNLOCK(adapter);
1271                 /* falls thru */
1272         case SIOCGIFMEDIA:
1273                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1274                     SIOCxIFMEDIA (Get/Set Interface Media)");
1275                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1276                 break;
1277         case SIOCSIFCAP:
1278             {
1279                 int mask, reinit;
1280
1281                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1282                 reinit = 0;
1283                 mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1284 #ifdef DEVICE_POLLING
1285                 if (mask & IFCAP_POLLING) {
1286                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1287                                 error = ether_poll_register(em_poll, ifp);
1288                                 if (error)
1289                                         return (error);
1290                                 EM_CORE_LOCK(adapter);
1291                                 em_disable_intr(adapter);
1292                                 if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1293                                 EM_CORE_UNLOCK(adapter);
1294                         } else {
1295                                 error = ether_poll_deregister(ifp);
1296                                 /* Enable interrupt even in error case */
1297                                 EM_CORE_LOCK(adapter);
1298                                 em_enable_intr(adapter);
1299                                 if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1300                                 EM_CORE_UNLOCK(adapter);
1301                         }
1302                 }
1303 #endif
1304                 if (mask & IFCAP_HWCSUM) {
1305                         if_togglecapenable(ifp,IFCAP_HWCSUM);
1306                         reinit = 1;
1307                 }
1308                 if (mask & IFCAP_TSO4) {
1309                         if_togglecapenable(ifp,IFCAP_TSO4);
1310                         reinit = 1;
1311                 }
1312                 if (mask & IFCAP_VLAN_HWTAGGING) {
1313                         if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1314                         reinit = 1;
1315                 }
1316                 if (mask & IFCAP_VLAN_HWFILTER) {
1317                         if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1318                         reinit = 1;
1319                 }
1320                 if (mask & IFCAP_VLAN_HWTSO) {
1321                         if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1322                         reinit = 1;
1323                 }
1324                 if ((mask & IFCAP_WOL) &&
1325                     (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1326                         if (mask & IFCAP_WOL_MCAST)
1327                                 if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1328                         if (mask & IFCAP_WOL_MAGIC)
1329                                 if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1330                 }
1331                 if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1332                         em_init(adapter);
1333                 if_vlancap(ifp);
1334                 break;
1335             }
1336
1337         default:
1338                 error = ether_ioctl(ifp, command, data);
1339                 break;
1340         }
1341
1342         return (error);
1343 }
1344
1345
1346 /*********************************************************************
1347  *  Init entry point
1348  *
1349  *  This routine is used in two ways. It is used by the stack as
1350  *  init entry point in network interface structure. It is also used
1351  *  by the driver as a hw/sw initialization routine to get to a
1352  *  consistent state.
1353  *
1354  *  return 0 on success, positive on failure
1355  **********************************************************************/
1356
1357 static void
1358 em_init_locked(struct adapter *adapter)
1359 {
1360         if_t ifp = adapter->ifp;
1361         device_t        dev = adapter->dev;
1362
1363         INIT_DEBUGOUT("em_init: begin");
1364
1365         EM_CORE_LOCK_ASSERT(adapter);
1366
1367         em_disable_intr(adapter);
1368         callout_stop(&adapter->timer);
1369
1370         /* Get the latest mac address, User can use a LAA */
1371         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1372               ETHER_ADDR_LEN);
1373
1374         /* Put the address into the Receive Address Array */
1375         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1376
1377         /*
1378          * With the 82571 adapter, RAR[0] may be overwritten
1379          * when the other port is reset, we make a duplicate
1380          * in RAR[14] for that eventuality, this assures
1381          * the interface continues to function.
1382          */
1383         if (adapter->hw.mac.type == e1000_82571) {
1384                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1385                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1386                     E1000_RAR_ENTRIES - 1);
1387         }
1388
1389         /* Initialize the hardware */
1390         em_reset(adapter);
1391         em_update_link_status(adapter);
1392
1393         /* Setup VLAN support, basic and offload if available */
1394         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1395
1396         /* Set hardware offload abilities */
1397         if_clearhwassist(ifp);
1398         if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1399                 if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1400         /* 
1401         ** There have proven to be problems with TSO when not
1402         ** at full gigabit speed, so disable the assist automatically
1403         ** when at lower speeds.  -jfv
1404         */
1405         if (if_getcapenable(ifp) & IFCAP_TSO4) {
1406                 if (adapter->link_speed == SPEED_1000)
1407                         if_sethwassistbits(ifp, CSUM_TSO, 0);
1408         }
1409
1410         /* Configure for OS presence */
1411         em_init_manageability(adapter);
1412
1413         /* Prepare transmit descriptors and buffers */
1414         em_setup_transmit_structures(adapter);
1415         em_initialize_transmit_unit(adapter);
1416
1417         /* Setup Multicast table */
1418         em_set_multi(adapter);
1419
1420         /*
1421         ** Figure out the desired mbuf
1422         ** pool for doing jumbos
1423         */
1424         if (adapter->hw.mac.max_frame_size <= 2048)
1425                 adapter->rx_mbuf_sz = MCLBYTES;
1426         else if (adapter->hw.mac.max_frame_size <= 4096)
1427                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1428         else
1429                 adapter->rx_mbuf_sz = MJUM9BYTES;
1430
1431         /* Prepare receive descriptors and buffers */
1432         if (em_setup_receive_structures(adapter)) {
1433                 device_printf(dev, "Could not setup receive structures\n");
1434                 em_stop(adapter);
1435                 return;
1436         }
1437         em_initialize_receive_unit(adapter);
1438
1439         /* Use real VLAN Filter support? */
1440         if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1441                 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1442                         /* Use real VLAN Filter support */
1443                         em_setup_vlan_hw_support(adapter);
1444                 else {
1445                         u32 ctrl;
1446                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1447                         ctrl |= E1000_CTRL_VME;
1448                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1449                 }
1450         }
1451
1452         /* Don't lose promiscuous settings */
1453         em_set_promisc(adapter);
1454
1455         /* Set the interface as ACTIVE */
1456         if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1457
1458         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1459         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1460
1461         /* MSI/X configuration for 82574 */
1462         if (adapter->hw.mac.type == e1000_82574) {
1463                 int tmp;
1464                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1465                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1466                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1467                 /* Set the IVAR - interrupt vector routing. */
1468                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1469         }
1470
1471 #ifdef DEVICE_POLLING
1472         /*
1473          * Only enable interrupts if we are not polling, make sure
1474          * they are off otherwise.
1475          */
1476         if (if_getcapenable(ifp) & IFCAP_POLLING)
1477                 em_disable_intr(adapter);
1478         else
1479 #endif /* DEVICE_POLLING */
1480                 em_enable_intr(adapter);
1481
1482         /* AMT based hardware can now take control from firmware */
1483         if (adapter->has_manage && adapter->has_amt)
1484                 em_get_hw_control(adapter);
1485 }
1486
1487 static void
1488 em_init(void *arg)
1489 {
1490         struct adapter *adapter = arg;
1491
1492         EM_CORE_LOCK(adapter);
1493         em_init_locked(adapter);
1494         EM_CORE_UNLOCK(adapter);
1495 }
1496
1497
1498 #ifdef DEVICE_POLLING
1499 /*********************************************************************
1500  *
1501  *  Legacy polling routine: note this only works with single queue
1502  *
1503  *********************************************************************/
1504 static int
1505 em_poll(if_t ifp, enum poll_cmd cmd, int count)
1506 {
1507         struct adapter *adapter = if_getsoftc(ifp);
1508         struct tx_ring  *txr = adapter->tx_rings;
1509         struct rx_ring  *rxr = adapter->rx_rings;
1510         u32             reg_icr;
1511         int             rx_done;
1512
1513         EM_CORE_LOCK(adapter);
1514         if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1515                 EM_CORE_UNLOCK(adapter);
1516                 return (0);
1517         }
1518
1519         if (cmd == POLL_AND_CHECK_STATUS) {
1520                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1521                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1522                         callout_stop(&adapter->timer);
1523                         adapter->hw.mac.get_link_status = 1;
1524                         em_update_link_status(adapter);
1525                         callout_reset(&adapter->timer, hz,
1526                             em_local_timer, adapter);
1527                 }
1528         }
1529         EM_CORE_UNLOCK(adapter);
1530
1531         em_rxeof(rxr, count, &rx_done);
1532
1533         EM_TX_LOCK(txr);
1534         em_txeof(txr);
1535 #ifdef EM_MULTIQUEUE
1536         if (!drbr_empty(ifp, txr->br))
1537                 em_mq_start_locked(ifp, txr);
1538 #else
1539         if (!if_sendq_empty(ifp))
1540                 em_start_locked(ifp, txr);
1541 #endif
1542         EM_TX_UNLOCK(txr);
1543
1544         return (rx_done);
1545 }
1546 #endif /* DEVICE_POLLING */
1547
1548
1549 /*********************************************************************
1550  *
1551  *  Fast Legacy/MSI Combined Interrupt Service routine  
1552  *
1553  *********************************************************************/
1554 static int
1555 em_irq_fast(void *arg)
1556 {
1557         struct adapter  *adapter = arg;
1558         if_t ifp;
1559         u32             reg_icr;
1560
1561         ifp = adapter->ifp;
1562
1563         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1564
1565         /* Hot eject?  */
1566         if (reg_icr == 0xffffffff)
1567                 return FILTER_STRAY;
1568
1569         /* Definitely not our interrupt.  */
1570         if (reg_icr == 0x0)
1571                 return FILTER_STRAY;
1572
1573         /*
1574          * Starting with the 82571 chip, bit 31 should be used to
1575          * determine whether the interrupt belongs to us.
1576          */
1577         if (adapter->hw.mac.type >= e1000_82571 &&
1578             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1579                 return FILTER_STRAY;
1580
1581         em_disable_intr(adapter);
1582         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1583
1584         /* Link status change */
1585         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1586                 adapter->hw.mac.get_link_status = 1;
1587                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1588         }
1589
1590         if (reg_icr & E1000_ICR_RXO)
1591                 adapter->rx_overruns++;
1592         return FILTER_HANDLED;
1593 }
1594
1595 /* Combined RX/TX handler, used by Legacy and MSI */
1596 static void
1597 em_handle_que(void *context, int pending)
1598 {
1599         struct adapter  *adapter = context;
1600         if_t ifp = adapter->ifp;
1601         struct tx_ring  *txr = adapter->tx_rings;
1602         struct rx_ring  *rxr = adapter->rx_rings;
1603
1604         if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1605                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1606
1607                 EM_TX_LOCK(txr);
1608                 em_txeof(txr);
1609 #ifdef EM_MULTIQUEUE
1610                 if (!drbr_empty(ifp, txr->br))
1611                         em_mq_start_locked(ifp, txr);
1612 #else
1613                 if (!if_sendq_empty(ifp))
1614                         em_start_locked(ifp, txr);
1615 #endif
1616                 EM_TX_UNLOCK(txr);
1617                 if (more) {
1618                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1619                         return;
1620                 }
1621         }
1622
1623         em_enable_intr(adapter);
1624         return;
1625 }
1626
1627
1628 /*********************************************************************
1629  *
1630  *  MSIX Interrupt Service Routines
1631  *
1632  **********************************************************************/
1633 static void
1634 em_msix_tx(void *arg)
1635 {
1636         struct tx_ring *txr = arg;
1637         struct adapter *adapter = txr->adapter;
1638         if_t ifp = adapter->ifp;
1639
1640         ++txr->tx_irq;
1641         EM_TX_LOCK(txr);
1642         em_txeof(txr);
1643 #ifdef EM_MULTIQUEUE
1644         if (!drbr_empty(ifp, txr->br))
1645                 em_mq_start_locked(ifp, txr);
1646 #else
1647         if (!if_sendq_empty(ifp))
1648                 em_start_locked(ifp, txr);
1649 #endif
1650
1651         /* Reenable this interrupt */
1652         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1653         EM_TX_UNLOCK(txr);
1654         return;
1655 }
1656
1657 /*********************************************************************
1658  *
1659  *  MSIX RX Interrupt Service routine
1660  *
1661  **********************************************************************/
1662
1663 static void
1664 em_msix_rx(void *arg)
1665 {
1666         struct rx_ring  *rxr = arg;
1667         struct adapter  *adapter = rxr->adapter;
1668         bool            more;
1669
1670         ++rxr->rx_irq;
1671         if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1672                 return;
1673         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1674         if (more)
1675                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1676         else {
1677                 /* Reenable this interrupt */
1678                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1679         }
1680         return;
1681 }
1682
1683 /*********************************************************************
1684  *
1685  *  MSIX Link Fast Interrupt Service routine
1686  *
1687  **********************************************************************/
1688 static void
1689 em_msix_link(void *arg)
1690 {
1691         struct adapter  *adapter = arg;
1692         u32             reg_icr;
1693
1694         ++adapter->link_irq;
1695         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1696
1697         if (reg_icr & E1000_ICR_RXO)
1698                 adapter->rx_overruns++;
1699
1700         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1701                 adapter->hw.mac.get_link_status = 1;
1702                 em_handle_link(adapter, 0);
1703         } else
1704                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1705                     EM_MSIX_LINK | E1000_IMS_LSC);
1706         /*
1707         ** Because we must read the ICR for this interrupt
1708         ** it may clear other causes using autoclear, for
1709         ** this reason we simply create a soft interrupt
1710         ** for all these vectors.
1711         */
1712         if (reg_icr) {
1713                 E1000_WRITE_REG(&adapter->hw,
1714                         E1000_ICS, adapter->ims);
1715         }
1716         return;
1717 }
1718
1719 static void
1720 em_handle_rx(void *context, int pending)
1721 {
1722         struct rx_ring  *rxr = context;
1723         struct adapter  *adapter = rxr->adapter;
1724         bool            more;
1725
1726         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1727         if (more)
1728                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1729         else {
1730                 /* Reenable this interrupt */
1731                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1732         }
1733 }
1734
1735 static void
1736 em_handle_tx(void *context, int pending)
1737 {
1738         struct tx_ring  *txr = context;
1739         struct adapter  *adapter = txr->adapter;
1740         if_t ifp = adapter->ifp;
1741
1742         EM_TX_LOCK(txr);
1743         em_txeof(txr);
1744 #ifdef EM_MULTIQUEUE
1745         if (!drbr_empty(ifp, txr->br))
1746                 em_mq_start_locked(ifp, txr);
1747 #else
1748         if (!if_sendq_empty(ifp))
1749                 em_start_locked(ifp, txr);
1750 #endif
1751         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1752         EM_TX_UNLOCK(txr);
1753 }
1754
1755 static void
1756 em_handle_link(void *context, int pending)
1757 {
1758         struct adapter  *adapter = context;
1759         struct tx_ring  *txr = adapter->tx_rings;
1760         if_t ifp = adapter->ifp;
1761
1762         if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1763                 return;
1764
1765         EM_CORE_LOCK(adapter);
1766         callout_stop(&adapter->timer);
1767         em_update_link_status(adapter);
1768         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1769         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1770             EM_MSIX_LINK | E1000_IMS_LSC);
1771         if (adapter->link_active) {
1772                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1773                         EM_TX_LOCK(txr);
1774 #ifdef EM_MULTIQUEUE
1775                         if (!drbr_empty(ifp, txr->br))
1776                                 em_mq_start_locked(ifp, txr);
1777 #else
1778                         if (if_sendq_empty(ifp))
1779                                 em_start_locked(ifp, txr);
1780 #endif
1781                         EM_TX_UNLOCK(txr);
1782                 }
1783         }
1784         EM_CORE_UNLOCK(adapter);
1785 }
1786
1787
1788 /*********************************************************************
1789  *
1790  *  Media Ioctl callback
1791  *
1792  *  This routine is called whenever the user queries the status of
1793  *  the interface using ifconfig.
1794  *
1795  **********************************************************************/
1796 static void
1797 em_media_status(if_t ifp, struct ifmediareq *ifmr)
1798 {
1799         struct adapter *adapter = if_getsoftc(ifp);
1800         u_char fiber_type = IFM_1000_SX;
1801
1802         INIT_DEBUGOUT("em_media_status: begin");
1803
1804         EM_CORE_LOCK(adapter);
1805         em_update_link_status(adapter);
1806
1807         ifmr->ifm_status = IFM_AVALID;
1808         ifmr->ifm_active = IFM_ETHER;
1809
1810         if (!adapter->link_active) {
1811                 EM_CORE_UNLOCK(adapter);
1812                 return;
1813         }
1814
1815         ifmr->ifm_status |= IFM_ACTIVE;
1816
1817         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1818             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1819                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1820         } else {
1821                 switch (adapter->link_speed) {
1822                 case 10:
1823                         ifmr->ifm_active |= IFM_10_T;
1824                         break;
1825                 case 100:
1826                         ifmr->ifm_active |= IFM_100_TX;
1827                         break;
1828                 case 1000:
1829                         ifmr->ifm_active |= IFM_1000_T;
1830                         break;
1831                 }
1832                 if (adapter->link_duplex == FULL_DUPLEX)
1833                         ifmr->ifm_active |= IFM_FDX;
1834                 else
1835                         ifmr->ifm_active |= IFM_HDX;
1836         }
1837         EM_CORE_UNLOCK(adapter);
1838 }
1839
1840 /*********************************************************************
1841  *
1842  *  Media Ioctl callback
1843  *
1844  *  This routine is called when the user changes speed/duplex using
1845  *  media/mediopt option with ifconfig.
1846  *
1847  **********************************************************************/
1848 static int
1849 em_media_change(if_t ifp)
1850 {
1851         struct adapter *adapter = if_getsoftc(ifp);
1852         struct ifmedia  *ifm = &adapter->media;
1853
1854         INIT_DEBUGOUT("em_media_change: begin");
1855
1856         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1857                 return (EINVAL);
1858
1859         EM_CORE_LOCK(adapter);
1860         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1861         case IFM_AUTO:
1862                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1863                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1864                 break;
1865         case IFM_1000_LX:
1866         case IFM_1000_SX:
1867         case IFM_1000_T:
1868                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1869                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1870                 break;
1871         case IFM_100_TX:
1872                 adapter->hw.mac.autoneg = FALSE;
1873                 adapter->hw.phy.autoneg_advertised = 0;
1874                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1875                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1876                 else
1877                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1878                 break;
1879         case IFM_10_T:
1880                 adapter->hw.mac.autoneg = FALSE;
1881                 adapter->hw.phy.autoneg_advertised = 0;
1882                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1883                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1884                 else
1885                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1886                 break;
1887         default:
1888                 device_printf(adapter->dev, "Unsupported media type\n");
1889         }
1890
1891         em_init_locked(adapter);
1892         EM_CORE_UNLOCK(adapter);
1893
1894         return (0);
1895 }
1896
1897 /*********************************************************************
1898  *
1899  *  This routine maps the mbufs to tx descriptors.
1900  *
1901  *  return 0 on success, positive on failure
1902  **********************************************************************/
1903
1904 static int
1905 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1906 {
1907         struct adapter          *adapter = txr->adapter;
1908         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1909         bus_dmamap_t            map;
1910         struct em_txbuffer      *tx_buffer, *tx_buffer_mapped;
1911         struct e1000_tx_desc    *ctxd = NULL;
1912         struct mbuf             *m_head;
1913         struct ether_header     *eh;
1914         struct ip               *ip = NULL;
1915         struct tcphdr           *tp = NULL;
1916         u32                     txd_upper = 0, txd_lower = 0;
1917         int                     ip_off, poff;
1918         int                     nsegs, i, j, first, last = 0;
1919         int                     error;
1920         bool                    do_tso, tso_desc, remap = TRUE;
1921
1922         m_head = *m_headp;
1923         do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO);
1924         tso_desc = FALSE;
1925         ip_off = poff = 0;
1926
1927         /*
1928          * Intel recommends entire IP/TCP header length reside in a single
1929          * buffer. If multiple descriptors are used to describe the IP and
1930          * TCP header, each descriptor should describe one or more
1931          * complete headers; descriptors referencing only parts of headers
1932          * are not supported. If all layer headers are not coalesced into
1933          * a single buffer, each buffer should not cross a 4KB boundary,
1934          * or be larger than the maximum read request size.
1935          * Controller also requires modifing IP/TCP header to make TSO work
1936          * so we firstly get a writable mbuf chain then coalesce ethernet/
1937          * IP/TCP header into a single buffer to meet the requirement of
1938          * controller. This also simplifies IP/TCP/UDP checksum offloading
1939          * which also has similar restrictions.
1940          */
1941         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1942                 if (do_tso || (m_head->m_next != NULL && 
1943                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1944                         if (M_WRITABLE(*m_headp) == 0) {
1945                                 m_head = m_dup(*m_headp, M_NOWAIT);
1946                                 m_freem(*m_headp);
1947                                 if (m_head == NULL) {
1948                                         *m_headp = NULL;
1949                                         return (ENOBUFS);
1950                                 }
1951                                 *m_headp = m_head;
1952                         }
1953                 }
1954                 /*
1955                  * XXX
1956                  * Assume IPv4, we don't have TSO/checksum offload support
1957                  * for IPv6 yet.
1958                  */
1959                 ip_off = sizeof(struct ether_header);
1960                 if (m_head->m_len < ip_off) {
1961                         m_head = m_pullup(m_head, ip_off);
1962                         if (m_head == NULL) {
1963                                 *m_headp = NULL;
1964                                 return (ENOBUFS);
1965                         }
1966                 }
1967                 eh = mtod(m_head, struct ether_header *);
1968                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1969                         ip_off = sizeof(struct ether_vlan_header);
1970                         if (m_head->m_len < ip_off) {
1971                                 m_head = m_pullup(m_head, ip_off);
1972                                 if (m_head == NULL) {
1973                                         *m_headp = NULL;
1974                                         return (ENOBUFS);
1975                                 }
1976                         }
1977                 }
1978                 if (m_head->m_len < ip_off + sizeof(struct ip)) {
1979                         m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1980                         if (m_head == NULL) {
1981                                 *m_headp = NULL;
1982                                 return (ENOBUFS);
1983                         }
1984                 }
1985                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1986                 poff = ip_off + (ip->ip_hl << 2);
1987
1988                 if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
1989                         if (m_head->m_len < poff + sizeof(struct tcphdr)) {
1990                                 m_head = m_pullup(m_head, poff +
1991                                     sizeof(struct tcphdr));
1992                                 if (m_head == NULL) {
1993                                         *m_headp = NULL;
1994                                         return (ENOBUFS);
1995                                 }
1996                         }
1997                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1998                         /*
1999                          * TSO workaround:
2000                          *   pull 4 more bytes of data into it.
2001                          */
2002                         if (m_head->m_len < poff + (tp->th_off << 2)) {
2003                                 m_head = m_pullup(m_head, poff +
2004                                                  (tp->th_off << 2) +
2005                                                  TSO_WORKAROUND);
2006                                 if (m_head == NULL) {
2007                                         *m_headp = NULL;
2008                                         return (ENOBUFS);
2009                                 }
2010                         }
2011                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2012                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2013                         if (do_tso) {
2014                                 ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2015                                                   (ip->ip_hl << 2) +
2016                                                   (tp->th_off << 2));
2017                                 ip->ip_sum = 0;
2018                                 /*
2019                                  * The pseudo TCP checksum does not include TCP
2020                                  * payload length so driver should recompute
2021                                  * the checksum here what hardware expect to
2022                                  * see. This is adherence of Microsoft's Large
2023                                  * Send specification.
2024                                 */
2025                                 tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2026                                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2027                         }
2028                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2029                         if (m_head->m_len < poff + sizeof(struct udphdr)) {
2030                                 m_head = m_pullup(m_head, poff +
2031                                     sizeof(struct udphdr));
2032                                 if (m_head == NULL) {
2033                                         *m_headp = NULL;
2034                                         return (ENOBUFS);
2035                                 }
2036                         }
2037                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2038                 }
2039                 *m_headp = m_head;
2040         }
2041
2042         /*
2043          * Map the packet for DMA
2044          *
2045          * Capture the first descriptor index,
2046          * this descriptor will have the index
2047          * of the EOP which is the only one that
2048          * now gets a DONE bit writeback.
2049          */
2050         first = txr->next_avail_desc;
2051         tx_buffer = &txr->tx_buffers[first];
2052         tx_buffer_mapped = tx_buffer;
2053         map = tx_buffer->map;
2054
2055 retry:
2056         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2057             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2058
2059         /*
2060          * There are two types of errors we can (try) to handle:
2061          * - EFBIG means the mbuf chain was too long and bus_dma ran
2062          *   out of segments.  Defragment the mbuf chain and try again.
2063          * - ENOMEM means bus_dma could not obtain enough bounce buffers
2064          *   at this point in time.  Defer sending and try again later.
2065          * All other errors, in particular EINVAL, are fatal and prevent the
2066          * mbuf chain from ever going through.  Drop it and report error.
2067          */
2068         if (error == EFBIG && remap) {
2069                 struct mbuf *m;
2070
2071                 m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2072                 if (m == NULL) {
2073                         adapter->mbuf_defrag_failed++;
2074                         m_freem(*m_headp);
2075                         *m_headp = NULL;
2076                         return (ENOBUFS);
2077                 }
2078                 *m_headp = m;
2079
2080                 /* Try it again, but only once */
2081                 remap = FALSE;
2082                 goto retry;
2083         } else if (error != 0) {
2084                 adapter->no_tx_dma_setup++;
2085                 m_freem(*m_headp);
2086                 *m_headp = NULL;
2087                 return (error);
2088         }
2089
2090         /*
2091          * TSO Hardware workaround, if this packet is not
2092          * TSO, and is only a single descriptor long, and
2093          * it follows a TSO burst, then we need to add a
2094          * sentinel descriptor to prevent premature writeback.
2095          */
2096         if ((!do_tso) && (txr->tx_tso == TRUE)) {
2097                 if (nsegs == 1)
2098                         tso_desc = TRUE;
2099                 txr->tx_tso = FALSE;
2100         }
2101
2102         if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
2103                 txr->no_desc_avail++;
2104                 bus_dmamap_unload(txr->txtag, map);
2105                 return (ENOBUFS);
2106         }
2107         m_head = *m_headp;
2108
2109         /* Do hardware assists */
2110         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2111                 em_tso_setup(txr, m_head, ip_off, ip, tp,
2112                     &txd_upper, &txd_lower);
2113                 /* we need to make a final sentinel transmit desc */
2114                 tso_desc = TRUE;
2115         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2116                 em_transmit_checksum_setup(txr, m_head,
2117                     ip_off, ip, &txd_upper, &txd_lower);
2118
2119         if (m_head->m_flags & M_VLANTAG) {
2120                 /* Set the vlan id. */
2121                 txd_upper |= htole16(if_getvtag(m_head)) << 16;
2122                 /* Tell hardware to add tag */
2123                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2124         }
2125
2126         i = txr->next_avail_desc;
2127
2128         /* Set up our transmit descriptors */
2129         for (j = 0; j < nsegs; j++) {
2130                 bus_size_t seg_len;
2131                 bus_addr_t seg_addr;
2132
2133                 tx_buffer = &txr->tx_buffers[i];
2134                 ctxd = &txr->tx_base[i];
2135                 seg_addr = segs[j].ds_addr;
2136                 seg_len  = segs[j].ds_len;
2137                 /*
2138                 ** TSO Workaround:
2139                 ** If this is the last descriptor, we want to
2140                 ** split it so we have a small final sentinel
2141                 */
2142                 if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2143                         seg_len -= TSO_WORKAROUND;
2144                         ctxd->buffer_addr = htole64(seg_addr);
2145                         ctxd->lower.data = htole32(
2146                                 adapter->txd_cmd | txd_lower | seg_len);
2147                         ctxd->upper.data = htole32(txd_upper);
2148                         if (++i == adapter->num_tx_desc)
2149                                 i = 0;
2150
2151                         /* Now make the sentinel */     
2152                         txr->tx_avail--;
2153                         ctxd = &txr->tx_base[i];
2154                         tx_buffer = &txr->tx_buffers[i];
2155                         ctxd->buffer_addr =
2156                             htole64(seg_addr + seg_len);
2157                         ctxd->lower.data = htole32(
2158                         adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2159                         ctxd->upper.data =
2160                             htole32(txd_upper);
2161                         last = i;
2162                         if (++i == adapter->num_tx_desc)
2163                                 i = 0;
2164                 } else {
2165                         ctxd->buffer_addr = htole64(seg_addr);
2166                         ctxd->lower.data = htole32(
2167                         adapter->txd_cmd | txd_lower | seg_len);
2168                         ctxd->upper.data = htole32(txd_upper);
2169                         last = i;
2170                         if (++i == adapter->num_tx_desc)
2171                                 i = 0;
2172                 }
2173                 tx_buffer->m_head = NULL;
2174                 tx_buffer->next_eop = -1;
2175         }
2176
2177         txr->next_avail_desc = i;
2178         txr->tx_avail -= nsegs;
2179
2180         tx_buffer->m_head = m_head;
2181         /*
2182         ** Here we swap the map so the last descriptor,
2183         ** which gets the completion interrupt has the
2184         ** real map, and the first descriptor gets the
2185         ** unused map from this descriptor.
2186         */
2187         tx_buffer_mapped->map = tx_buffer->map;
2188         tx_buffer->map = map;
2189         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2190
2191         /*
2192          * Last Descriptor of Packet
2193          * needs End Of Packet (EOP)
2194          * and Report Status (RS)
2195          */
2196         ctxd->lower.data |=
2197             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2198         /*
2199          * Keep track in the first buffer which
2200          * descriptor will be written back
2201          */
2202         tx_buffer = &txr->tx_buffers[first];
2203         tx_buffer->next_eop = last;
2204
2205         /*
2206          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2207          * that this frame is available to transmit.
2208          */
2209         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2210             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2211         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2212
2213         return (0);
2214 }
2215
2216 static void
2217 em_set_promisc(struct adapter *adapter)
2218 {
2219         if_t ifp = adapter->ifp;
2220         u32             reg_rctl;
2221
2222         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2223
2224         if (if_getflags(ifp) & IFF_PROMISC) {
2225                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2226                 /* Turn this on if you want to see bad packets */
2227                 if (em_debug_sbp)
2228                         reg_rctl |= E1000_RCTL_SBP;
2229                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2230         } else if (if_getflags(ifp) & IFF_ALLMULTI) {
2231                 reg_rctl |= E1000_RCTL_MPE;
2232                 reg_rctl &= ~E1000_RCTL_UPE;
2233                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2234         }
2235 }
2236
2237 static void
2238 em_disable_promisc(struct adapter *adapter)
2239 {
2240         if_t            ifp = adapter->ifp;
2241         u32             reg_rctl;
2242         int             mcnt = 0;
2243
2244         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2245         reg_rctl &=  (~E1000_RCTL_UPE);
2246         if (if_getflags(ifp) & IFF_ALLMULTI)
2247                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2248         else
2249                 mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2250         /* Don't disable if in MAX groups */
2251         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2252                 reg_rctl &=  (~E1000_RCTL_MPE);
2253         reg_rctl &=  (~E1000_RCTL_SBP);
2254         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2255 }
2256
2257
2258 /*********************************************************************
2259  *  Multicast Update
2260  *
2261  *  This routine is called whenever multicast address list is updated.
2262  *
2263  **********************************************************************/
2264
2265 static void
2266 em_set_multi(struct adapter *adapter)
2267 {
2268         if_t ifp = adapter->ifp;
2269         u32 reg_rctl = 0;
2270         u8  *mta; /* Multicast array memory */
2271         int mcnt = 0;
2272
2273         IOCTL_DEBUGOUT("em_set_multi: begin");
2274
2275         mta = adapter->mta;
2276         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2277
2278         if (adapter->hw.mac.type == e1000_82542 && 
2279             adapter->hw.revision_id == E1000_REVISION_2) {
2280                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2281                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2282                         e1000_pci_clear_mwi(&adapter->hw);
2283                 reg_rctl |= E1000_RCTL_RST;
2284                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2285                 msec_delay(5);
2286         }
2287
2288         if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2289
2290         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2291                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2292                 reg_rctl |= E1000_RCTL_MPE;
2293                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2294         } else
2295                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2296
2297         if (adapter->hw.mac.type == e1000_82542 && 
2298             adapter->hw.revision_id == E1000_REVISION_2) {
2299                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2300                 reg_rctl &= ~E1000_RCTL_RST;
2301                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2302                 msec_delay(5);
2303                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2304                         e1000_pci_set_mwi(&adapter->hw);
2305         }
2306 }
2307
2308
2309 /*********************************************************************
2310  *  Timer routine
2311  *
2312  *  This routine checks for link status and updates statistics.
2313  *
2314  **********************************************************************/
2315
2316 static void
2317 em_local_timer(void *arg)
2318 {
2319         struct adapter  *adapter = arg;
2320         if_t ifp = adapter->ifp;
2321         struct tx_ring  *txr = adapter->tx_rings;
2322         struct rx_ring  *rxr = adapter->rx_rings;
2323         u32             trigger = 0;
2324
2325         EM_CORE_LOCK_ASSERT(adapter);
2326
2327         em_update_link_status(adapter);
2328         em_update_stats_counters(adapter);
2329
2330         /* Reset LAA into RAR[0] on 82571 */
2331         if ((adapter->hw.mac.type == e1000_82571) &&
2332             e1000_get_laa_state_82571(&adapter->hw))
2333                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2334
2335         /* Mask to use in the irq trigger */
2336         if (adapter->msix_mem) {
2337                 for (int i = 0; i < adapter->num_queues; i++, rxr++)
2338                         trigger |= rxr->ims;
2339                 rxr = adapter->rx_rings;
2340         } else
2341                 trigger = E1000_ICS_RXDMT0;
2342
2343         /*
2344         ** Check on the state of the TX queue(s), this 
2345         ** can be done without the lock because its RO
2346         ** and the HUNG state will be static if set.
2347         */
2348         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2349                 if (txr->busy == EM_TX_HUNG)
2350                         goto hung;
2351                 if (txr->busy >= EM_TX_MAXTRIES)
2352                         txr->busy = EM_TX_HUNG;
2353                 /* Schedule a TX tasklet if needed */
2354                 if (txr->tx_avail <= EM_MAX_SCATTER)
2355                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2356         }
2357         
2358         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2359 #ifndef DEVICE_POLLING
2360         /* Trigger an RX interrupt to guarantee mbuf refresh */
2361         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2362 #endif
2363         return;
2364 hung:
2365         /* Looks like we're hung */
2366         device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2367                         txr->me);
2368         em_print_debug_info(adapter);
2369         if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2370         adapter->watchdog_events++;
2371         em_init_locked(adapter);
2372 }
2373
2374
2375 static void
2376 em_update_link_status(struct adapter *adapter)
2377 {
2378         struct e1000_hw *hw = &adapter->hw;
2379         if_t ifp = adapter->ifp;
2380         device_t dev = adapter->dev;
2381         struct tx_ring *txr = adapter->tx_rings;
2382         u32 link_check = 0;
2383
2384         /* Get the cached link value or read phy for real */
2385         switch (hw->phy.media_type) {
2386         case e1000_media_type_copper:
2387                 if (hw->mac.get_link_status) {
2388                         if (hw->mac.type == e1000_pch_spt)
2389                                 msec_delay(50);
2390                         /* Do the work to read phy */
2391                         e1000_check_for_link(hw);
2392                         link_check = !hw->mac.get_link_status;
2393                         if (link_check) /* ESB2 fix */
2394                                 e1000_cfg_on_link_up(hw);
2395                 } else
2396                         link_check = TRUE;
2397                 break;
2398         case e1000_media_type_fiber:
2399                 e1000_check_for_link(hw);
2400                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2401                                  E1000_STATUS_LU);
2402                 break;
2403         case e1000_media_type_internal_serdes:
2404                 e1000_check_for_link(hw);
2405                 link_check = adapter->hw.mac.serdes_has_link;
2406                 break;
2407         default:
2408         case e1000_media_type_unknown:
2409                 break;
2410         }
2411
2412         /* Now check for a transition */
2413         if (link_check && (adapter->link_active == 0)) {
2414                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2415                     &adapter->link_duplex);
2416                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2417                 if ((adapter->link_speed != SPEED_1000) &&
2418                     ((hw->mac.type == e1000_82571) ||
2419                     (hw->mac.type == e1000_82572))) {
2420                         int tarc0;
2421                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2422                         tarc0 &= ~TARC_SPEED_MODE_BIT;
2423                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2424                 }
2425                 if (bootverbose)
2426                         device_printf(dev, "Link is up %d Mbps %s\n",
2427                             adapter->link_speed,
2428                             ((adapter->link_duplex == FULL_DUPLEX) ?
2429                             "Full Duplex" : "Half Duplex"));
2430                 adapter->link_active = 1;
2431                 adapter->smartspeed = 0;
2432                 if_setbaudrate(ifp, adapter->link_speed * 1000000);
2433                 if_link_state_change(ifp, LINK_STATE_UP);
2434         } else if (!link_check && (adapter->link_active == 1)) {
2435                 if_setbaudrate(ifp, 0);
2436                 adapter->link_speed = 0;
2437                 adapter->link_duplex = 0;
2438                 if (bootverbose)
2439                         device_printf(dev, "Link is Down\n");
2440                 adapter->link_active = 0;
2441                 /* Link down, disable hang detection */
2442                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2443                         txr->busy = EM_TX_IDLE;
2444                 if_link_state_change(ifp, LINK_STATE_DOWN);
2445         }
2446 }
2447
2448 /*********************************************************************
2449  *
2450  *  This routine disables all traffic on the adapter by issuing a
2451  *  global reset on the MAC and deallocates TX/RX buffers.
2452  *
2453  *  This routine should always be called with BOTH the CORE
2454  *  and TX locks.
2455  **********************************************************************/
2456
2457 static void
2458 em_stop(void *arg)
2459 {
2460         struct adapter  *adapter = arg;
2461         if_t ifp = adapter->ifp;
2462         struct tx_ring  *txr = adapter->tx_rings;
2463
2464         EM_CORE_LOCK_ASSERT(adapter);
2465
2466         INIT_DEBUGOUT("em_stop: begin");
2467
2468         em_disable_intr(adapter);
2469         callout_stop(&adapter->timer);
2470
2471         /* Tell the stack that the interface is no longer active */
2472         if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2473
2474         /* Disarm Hang Detection. */
2475         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2476                 EM_TX_LOCK(txr);
2477                 txr->busy = EM_TX_IDLE;
2478                 EM_TX_UNLOCK(txr);
2479         }
2480
2481         /* I219 needs some special flushing to avoid hangs */
2482         if (adapter->hw.mac.type == e1000_pch_spt)
2483                 em_flush_desc_rings(adapter);
2484
2485         e1000_reset_hw(&adapter->hw);
2486         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2487
2488         e1000_led_off(&adapter->hw);
2489         e1000_cleanup_led(&adapter->hw);
2490 }
2491
2492
2493 /*********************************************************************
2494  *
2495  *  Determine hardware revision.
2496  *
2497  **********************************************************************/
2498 static void
2499 em_identify_hardware(struct adapter *adapter)
2500 {
2501         device_t dev = adapter->dev;
2502
2503         /* Make sure our PCI config space has the necessary stuff set */
2504         pci_enable_busmaster(dev);
2505         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2506
2507         /* Save off the information about this board */
2508         adapter->hw.vendor_id = pci_get_vendor(dev);
2509         adapter->hw.device_id = pci_get_device(dev);
2510         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2511         adapter->hw.subsystem_vendor_id =
2512             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2513         adapter->hw.subsystem_device_id =
2514             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2515
2516         /* Do Shared Code Init and Setup */
2517         if (e1000_set_mac_type(&adapter->hw)) {
2518                 device_printf(dev, "Setup init failure\n");
2519                 return;
2520         }
2521 }
2522
2523 static int
2524 em_allocate_pci_resources(struct adapter *adapter)
2525 {
2526         device_t        dev = adapter->dev;
2527         int             rid;
2528
2529         rid = PCIR_BAR(0);
2530         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2531             &rid, RF_ACTIVE);
2532         if (adapter->memory == NULL) {
2533                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2534                 return (ENXIO);
2535         }
2536         adapter->osdep.mem_bus_space_tag =
2537             rman_get_bustag(adapter->memory);
2538         adapter->osdep.mem_bus_space_handle =
2539             rman_get_bushandle(adapter->memory);
2540         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2541
2542         adapter->hw.back = &adapter->osdep;
2543
2544         return (0);
2545 }
2546
2547 /*********************************************************************
2548  *
2549  *  Setup the Legacy or MSI Interrupt handler
2550  *
2551  **********************************************************************/
2552 int
2553 em_allocate_legacy(struct adapter *adapter)
2554 {
2555         device_t dev = adapter->dev;
2556         struct tx_ring  *txr = adapter->tx_rings;
2557         int error, rid = 0;
2558
2559         /* Manually turn off all interrupts */
2560         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2561
2562         if (adapter->msix == 1) /* using MSI */
2563                 rid = 1;
2564         /* We allocate a single interrupt resource */
2565         adapter->res = bus_alloc_resource_any(dev,
2566             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2567         if (adapter->res == NULL) {
2568                 device_printf(dev, "Unable to allocate bus resource: "
2569                     "interrupt\n");
2570                 return (ENXIO);
2571         }
2572
2573         /*
2574          * Allocate a fast interrupt and the associated
2575          * deferred processing contexts.
2576          */
2577         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2578         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2579             taskqueue_thread_enqueue, &adapter->tq);
2580         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2581             device_get_nameunit(adapter->dev));
2582         /* Use a TX only tasklet for local timer */
2583         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2584         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2585             taskqueue_thread_enqueue, &txr->tq);
2586         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2587             device_get_nameunit(adapter->dev));
2588         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2589         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2590             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2591                 device_printf(dev, "Failed to register fast interrupt "
2592                             "handler: %d\n", error);
2593                 taskqueue_free(adapter->tq);
2594                 adapter->tq = NULL;
2595                 return (error);
2596         }
2597         
2598         return (0);
2599 }
2600
2601 /*********************************************************************
2602  *
2603  *  Setup the MSIX Interrupt handlers
2604  *   This is not really Multiqueue, rather
2605  *   its just separate interrupt vectors
2606  *   for TX, RX, and Link.
2607  *
2608  **********************************************************************/
2609 int
2610 em_allocate_msix(struct adapter *adapter)
2611 {
2612         device_t        dev = adapter->dev;
2613         struct          tx_ring *txr = adapter->tx_rings;
2614         struct          rx_ring *rxr = adapter->rx_rings;
2615         int             error, rid, vector = 0;
2616         int             cpu_id = 0;
2617
2618
2619         /* Make sure all interrupts are disabled */
2620         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2621
2622         /* First set up ring resources */
2623         for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2624
2625                 /* RX ring */
2626                 rid = vector + 1;
2627
2628                 rxr->res = bus_alloc_resource_any(dev,
2629                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2630                 if (rxr->res == NULL) {
2631                         device_printf(dev,
2632                             "Unable to allocate bus resource: "
2633                             "RX MSIX Interrupt %d\n", i);
2634                         return (ENXIO);
2635                 }
2636                 if ((error = bus_setup_intr(dev, rxr->res,
2637                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2638                     rxr, &rxr->tag)) != 0) {
2639                         device_printf(dev, "Failed to register RX handler");
2640                         return (error);
2641                 }
2642 #if __FreeBSD_version >= 800504
2643                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2644 #endif
2645                 rxr->msix = vector;
2646
2647                 if (em_last_bind_cpu < 0)
2648                         em_last_bind_cpu = CPU_FIRST();
2649                 cpu_id = em_last_bind_cpu;
2650                 bus_bind_intr(dev, rxr->res, cpu_id);
2651
2652                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2653                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2654                     taskqueue_thread_enqueue, &rxr->tq);
2655                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2656                     device_get_nameunit(adapter->dev), cpu_id);
2657                 /*
2658                 ** Set the bit to enable interrupt
2659                 ** in E1000_IMS -- bits 20 and 21
2660                 ** are for RX0 and RX1, note this has
2661                 ** NOTHING to do with the MSIX vector
2662                 */
2663                 rxr->ims = 1 << (20 + i);
2664                 adapter->ims |= rxr->ims;
2665                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2666
2667                 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2668         }
2669
2670         for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2671                 /* TX ring */
2672                 rid = vector + 1;
2673                 txr->res = bus_alloc_resource_any(dev,
2674                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2675                 if (txr->res == NULL) {
2676                         device_printf(dev,
2677                             "Unable to allocate bus resource: "
2678                             "TX MSIX Interrupt %d\n", i);
2679                         return (ENXIO);
2680                 }
2681                 if ((error = bus_setup_intr(dev, txr->res,
2682                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2683                     txr, &txr->tag)) != 0) {
2684                         device_printf(dev, "Failed to register TX handler");
2685                         return (error);
2686                 }
2687 #if __FreeBSD_version >= 800504
2688                 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2689 #endif
2690                 txr->msix = vector;
2691
2692                 if (em_last_bind_cpu < 0)
2693                         em_last_bind_cpu = CPU_FIRST();
2694                 cpu_id = em_last_bind_cpu;
2695                 bus_bind_intr(dev, txr->res, cpu_id);
2696
2697                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2698                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2699                     taskqueue_thread_enqueue, &txr->tq);
2700                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2701                     device_get_nameunit(adapter->dev), cpu_id);
2702                 /*
2703                 ** Set the bit to enable interrupt
2704                 ** in E1000_IMS -- bits 22 and 23
2705                 ** are for TX0 and TX1, note this has
2706                 ** NOTHING to do with the MSIX vector
2707                 */
2708                 txr->ims = 1 << (22 + i);
2709                 adapter->ims |= txr->ims;
2710                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2711
2712                 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2713         }
2714
2715         /* Link interrupt */
2716         rid = vector + 1;
2717         adapter->res = bus_alloc_resource_any(dev,
2718             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2719         if (!adapter->res) {
2720                 device_printf(dev,"Unable to allocate "
2721                     "bus resource: Link interrupt [%d]\n", rid);
2722                 return (ENXIO);
2723         }
2724         /* Set the link handler function */
2725         error = bus_setup_intr(dev, adapter->res,
2726             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2727             em_msix_link, adapter, &adapter->tag);
2728         if (error) {
2729                 adapter->res = NULL;
2730                 device_printf(dev, "Failed to register LINK handler");
2731                 return (error);
2732         }
2733 #if __FreeBSD_version >= 800504
2734         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2735 #endif
2736         adapter->linkvec = vector;
2737         adapter->ivars |=  (8 | vector) << 16;
2738         adapter->ivars |= 0x80000000;
2739
2740         return (0);
2741 }
2742
2743
2744 static void
2745 em_free_pci_resources(struct adapter *adapter)
2746 {
2747         device_t        dev = adapter->dev;
2748         struct tx_ring  *txr;
2749         struct rx_ring  *rxr;
2750         int             rid;
2751
2752
2753         /*
2754         ** Release all the queue interrupt resources:
2755         */
2756         for (int i = 0; i < adapter->num_queues; i++) {
2757                 txr = &adapter->tx_rings[i];
2758                 /* an early abort? */
2759                 if (txr == NULL)
2760                         break;
2761                 rid = txr->msix +1;
2762                 if (txr->tag != NULL) {
2763                         bus_teardown_intr(dev, txr->res, txr->tag);
2764                         txr->tag = NULL;
2765                 }
2766                 if (txr->res != NULL)
2767                         bus_release_resource(dev, SYS_RES_IRQ,
2768                             rid, txr->res);
2769
2770                 rxr = &adapter->rx_rings[i];
2771                 /* an early abort? */
2772                 if (rxr == NULL)
2773                         break;
2774                 rid = rxr->msix +1;
2775                 if (rxr->tag != NULL) {
2776                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2777                         rxr->tag = NULL;
2778                 }
2779                 if (rxr->res != NULL)
2780                         bus_release_resource(dev, SYS_RES_IRQ,
2781                             rid, rxr->res);
2782         }
2783
2784         if (adapter->linkvec) /* we are doing MSIX */
2785                 rid = adapter->linkvec + 1;
2786         else
2787                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2788
2789         if (adapter->tag != NULL) {
2790                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2791                 adapter->tag = NULL;
2792         }
2793
2794         if (adapter->res != NULL)
2795                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2796
2797
2798         if (adapter->msix)
2799                 pci_release_msi(dev);
2800
2801         if (adapter->msix_mem != NULL)
2802                 bus_release_resource(dev, SYS_RES_MEMORY,
2803                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2804
2805         if (adapter->memory != NULL)
2806                 bus_release_resource(dev, SYS_RES_MEMORY,
2807                     PCIR_BAR(0), adapter->memory);
2808
2809         if (adapter->flash != NULL)
2810                 bus_release_resource(dev, SYS_RES_MEMORY,
2811                     EM_FLASH, adapter->flash);
2812 }
2813
2814 /*
2815  * Setup MSI or MSI/X
2816  */
2817 static int
2818 em_setup_msix(struct adapter *adapter)
2819 {
2820         device_t dev = adapter->dev;
2821         int val;
2822
2823         /* Nearly always going to use one queue */
2824         adapter->num_queues = 1;
2825
2826         /*
2827         ** Try using MSI-X for Hartwell adapters
2828         */
2829         if ((adapter->hw.mac.type == e1000_82574) &&
2830             (em_enable_msix == TRUE)) {
2831 #ifdef EM_MULTIQUEUE
2832                 adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2833                 if (adapter->num_queues > 1)
2834                         em_enable_vectors_82574(adapter);
2835 #endif
2836                 /* Map the MSIX BAR */
2837                 int rid = PCIR_BAR(EM_MSIX_BAR);
2838                 adapter->msix_mem = bus_alloc_resource_any(dev,
2839                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2840                 if (adapter->msix_mem == NULL) {
2841                         /* May not be enabled */
2842                         device_printf(adapter->dev,
2843                             "Unable to map MSIX table \n");
2844                         goto msi;
2845                 }
2846                 val = pci_msix_count(dev); 
2847
2848 #ifdef EM_MULTIQUEUE
2849                 /* We need 5 vectors in the multiqueue case */
2850                 if (adapter->num_queues > 1 ) {
2851                         if (val >= 5)
2852                                 val = 5;
2853                         else {
2854                                 adapter->num_queues = 1;
2855                                 device_printf(adapter->dev,
2856                                     "Insufficient MSIX vectors for >1 queue, "
2857                                     "using single queue...\n");
2858                                 goto msix_one;
2859                         }
2860                 } else {
2861 msix_one:
2862 #endif
2863                         if (val >= 3)
2864                                 val = 3;
2865                         else {
2866                                 device_printf(adapter->dev,
2867                                 "Insufficient MSIX vectors, using MSI\n");
2868                                 goto msi;
2869                         }
2870 #ifdef EM_MULTIQUEUE
2871                 }
2872 #endif
2873
2874                 if ((pci_alloc_msix(dev, &val) == 0)) {
2875                         device_printf(adapter->dev,
2876                             "Using MSIX interrupts "
2877                             "with %d vectors\n", val);
2878                         return (val);
2879                 }
2880
2881                 /*
2882                 ** If MSIX alloc failed or provided us with
2883                 ** less than needed, free and fall through to MSI
2884                 */
2885                 pci_release_msi(dev);
2886         }
2887 msi:
2888         if (adapter->msix_mem != NULL) {
2889                 bus_release_resource(dev, SYS_RES_MEMORY,
2890                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2891                 adapter->msix_mem = NULL;
2892         }
2893         val = 1;
2894         if (pci_alloc_msi(dev, &val) == 0) {
2895                 device_printf(adapter->dev, "Using an MSI interrupt\n");
2896                 return (val);
2897         } 
2898         /* Should only happen due to manual configuration */
2899         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2900         return (0);
2901 }
2902
2903
2904 /*
2905 ** The 3 following flush routines are used as a workaround in the
2906 ** I219 client parts and only for them.
2907 **
2908 ** em_flush_tx_ring - remove all descriptors from the tx_ring
2909 **
2910 ** We want to clear all pending descriptors from the TX ring.
2911 ** zeroing happens when the HW reads the regs. We  assign the ring itself as
2912 ** the data of the next descriptor. We don't care about the data we are about
2913 ** to reset the HW.
2914 */
2915 static void
2916 em_flush_tx_ring(struct adapter *adapter)
2917 {
2918         struct e1000_hw         *hw = &adapter->hw;
2919         struct tx_ring          *txr = adapter->tx_rings;
2920         struct e1000_tx_desc    *txd;
2921         u32                     tctl, txd_lower = E1000_TXD_CMD_IFCS;
2922         u16                     size = 512;
2923
2924         tctl = E1000_READ_REG(hw, E1000_TCTL);
2925         E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2926
2927         txd = &txr->tx_base[txr->next_avail_desc++];
2928         if (txr->next_avail_desc == adapter->num_tx_desc)
2929                 txr->next_avail_desc = 0;
2930
2931         /* Just use the ring as a dummy buffer addr */
2932         txd->buffer_addr = txr->txdma.dma_paddr;
2933         txd->lower.data = htole32(txd_lower | size);
2934         txd->upper.data = 0;
2935
2936         /* flush descriptors to memory before notifying the HW */
2937         wmb();
2938
2939         E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
2940         mb();
2941         usec_delay(250);
2942 }
2943
2944 /*
2945 ** em_flush_rx_ring - remove all descriptors from the rx_ring
2946 **
2947 ** Mark all descriptors in the RX ring as consumed and disable the rx ring
2948 */
2949 static void
2950 em_flush_rx_ring(struct adapter *adapter)
2951 {
2952         struct e1000_hw *hw = &adapter->hw;
2953         u32             rctl, rxdctl;
2954
2955         rctl = E1000_READ_REG(hw, E1000_RCTL);
2956         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2957         E1000_WRITE_FLUSH(hw);
2958         usec_delay(150);
2959
2960         rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
2961         /* zero the lower 14 bits (prefetch and host thresholds) */
2962         rxdctl &= 0xffffc000;
2963         /*
2964          * update thresholds: prefetch threshold to 31, host threshold to 1
2965          * and make sure the granularity is "descriptors" and not "cache lines"
2966          */
2967         rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
2968         E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
2969
2970         /* momentarily enable the RX ring for the changes to take effect */
2971         E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
2972         E1000_WRITE_FLUSH(hw);
2973         usec_delay(150);
2974         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2975 }
2976
2977 /*
2978 ** em_flush_desc_rings - remove all descriptors from the descriptor rings
2979 **
2980 ** In i219, the descriptor rings must be emptied before resetting the HW
2981 ** or before changing the device state to D3 during runtime (runtime PM).
2982 **
2983 ** Failure to do this will cause the HW to enter a unit hang state which can
2984 ** only be released by PCI reset on the device
2985 **
2986 */
2987 static void
2988 em_flush_desc_rings(struct adapter *adapter)
2989 {
2990         struct e1000_hw *hw = &adapter->hw;
2991         device_t        dev = adapter->dev;
2992         u16             hang_state;
2993         u32             fext_nvm11, tdlen;
2994  
2995         /* First, disable MULR fix in FEXTNVM11 */
2996         fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
2997         fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
2998         E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
2999         
3000         /* do nothing if we're not in faulty state, or if the queue is empty */
3001         tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
3002         hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3003         if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3004                 return;
3005         em_flush_tx_ring(adapter);
3006
3007         /* recheck, maybe the fault is caused by the rx ring */
3008         hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3009         if (hang_state & FLUSH_DESC_REQUIRED)
3010                 em_flush_rx_ring(adapter);
3011 }
3012
3013
3014 /*********************************************************************
3015  *
3016  *  Initialize the hardware to a configuration
3017  *  as specified by the adapter structure.
3018  *
3019  **********************************************************************/
3020 static void
3021 em_reset(struct adapter *adapter)
3022 {
3023         device_t        dev = adapter->dev;
3024         if_t ifp = adapter->ifp;
3025         struct e1000_hw *hw = &adapter->hw;
3026         u16             rx_buffer_size;
3027         u32             pba;
3028
3029         INIT_DEBUGOUT("em_reset: begin");
3030
3031         /* Set up smart power down as default off on newer adapters. */
3032         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3033             hw->mac.type == e1000_82572)) {
3034                 u16 phy_tmp = 0;
3035
3036                 /* Speed up time to link by disabling smart power down. */
3037                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3038                 phy_tmp &= ~IGP02E1000_PM_SPD;
3039                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3040         }
3041
3042         /*
3043          * Packet Buffer Allocation (PBA)
3044          * Writing PBA sets the receive portion of the buffer
3045          * the remainder is used for the transmit buffer.
3046          */
3047         switch (hw->mac.type) {
3048         /* Total Packet Buffer on these is 48K */
3049         case e1000_82571:
3050         case e1000_82572:
3051         case e1000_80003es2lan:
3052                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3053                 break;
3054         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3055                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3056                 break;
3057         case e1000_82574:
3058         case e1000_82583:
3059                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3060                 break;
3061         case e1000_ich8lan:
3062                 pba = E1000_PBA_8K;
3063                 break;
3064         case e1000_ich9lan:
3065         case e1000_ich10lan:
3066                 /* Boost Receive side for jumbo frames */
3067                 if (adapter->hw.mac.max_frame_size > 4096)
3068                         pba = E1000_PBA_14K;
3069                 else
3070                         pba = E1000_PBA_10K;
3071                 break;
3072         case e1000_pchlan:
3073         case e1000_pch2lan:
3074         case e1000_pch_lpt:
3075         case e1000_pch_spt:
3076                 pba = E1000_PBA_26K;
3077                 break;
3078         default:
3079                 if (adapter->hw.mac.max_frame_size > 8192)
3080                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3081                 else
3082                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3083         }
3084         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3085
3086         /*
3087          * These parameters control the automatic generation (Tx) and
3088          * response (Rx) to Ethernet PAUSE frames.
3089          * - High water mark should allow for at least two frames to be
3090          *   received after sending an XOFF.
3091          * - Low water mark works best when it is very near the high water mark.
3092          *   This allows the receiver to restart by sending XON when it has
3093          *   drained a bit. Here we use an arbitrary value of 1500 which will
3094          *   restart after one full frame is pulled from the buffer. There
3095          *   could be several smaller frames in the buffer and if so they will
3096          *   not trigger the XON until their total number reduces the buffer
3097          *   by 1500.
3098          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3099          */
3100         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3101         hw->fc.high_water = rx_buffer_size -
3102             roundup2(adapter->hw.mac.max_frame_size, 1024);
3103         hw->fc.low_water = hw->fc.high_water - 1500;
3104
3105         if (adapter->fc) /* locally set flow control value? */
3106                 hw->fc.requested_mode = adapter->fc;
3107         else
3108                 hw->fc.requested_mode = e1000_fc_full;
3109
3110         if (hw->mac.type == e1000_80003es2lan)
3111                 hw->fc.pause_time = 0xFFFF;
3112         else
3113                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
3114
3115         hw->fc.send_xon = TRUE;
3116
3117         /* Device specific overrides/settings */
3118         switch (hw->mac.type) {
3119         case e1000_pchlan:
3120                 /* Workaround: no TX flow ctrl for PCH */
3121                 hw->fc.requested_mode = e1000_fc_rx_pause;
3122                 hw->fc.pause_time = 0xFFFF; /* override */
3123                 if (if_getmtu(ifp) > ETHERMTU) {
3124                         hw->fc.high_water = 0x3500;
3125                         hw->fc.low_water = 0x1500;
3126                 } else {
3127                         hw->fc.high_water = 0x5000;
3128                         hw->fc.low_water = 0x3000;
3129                 }
3130                 hw->fc.refresh_time = 0x1000;
3131                 break;
3132         case e1000_pch2lan:
3133         case e1000_pch_lpt:
3134         case e1000_pch_spt:
3135                 hw->fc.high_water = 0x5C20;
3136                 hw->fc.low_water = 0x5048;
3137                 hw->fc.pause_time = 0x0650;
3138                 hw->fc.refresh_time = 0x0400;
3139                 /* Jumbos need adjusted PBA */
3140                 if (if_getmtu(ifp) > ETHERMTU)
3141                         E1000_WRITE_REG(hw, E1000_PBA, 12);
3142                 else
3143                         E1000_WRITE_REG(hw, E1000_PBA, 26);
3144                 break;
3145         case e1000_ich9lan:
3146         case e1000_ich10lan:
3147                 if (if_getmtu(ifp) > ETHERMTU) {
3148                         hw->fc.high_water = 0x2800;
3149                         hw->fc.low_water = hw->fc.high_water - 8;
3150                         break;
3151                 } 
3152                 /* else fall thru */
3153         default:
3154                 if (hw->mac.type == e1000_80003es2lan)
3155                         hw->fc.pause_time = 0xFFFF;
3156                 break;
3157         }
3158
3159         /* I219 needs some special flushing to avoid hangs */
3160         if (hw->mac.type == e1000_pch_spt)
3161                 em_flush_desc_rings(adapter);
3162
3163         /* Issue a global reset */
3164         e1000_reset_hw(hw);
3165         E1000_WRITE_REG(hw, E1000_WUC, 0);
3166         em_disable_aspm(adapter);
3167         /* and a re-init */
3168         if (e1000_init_hw(hw) < 0) {
3169                 device_printf(dev, "Hardware Initialization Failed\n");
3170                 return;
3171         }
3172
3173         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3174         e1000_get_phy_info(hw);
3175         e1000_check_for_link(hw);
3176         return;
3177 }
3178
3179 /*********************************************************************
3180  *
3181  *  Setup networking device structure and register an interface.
3182  *
3183  **********************************************************************/
3184 static int
3185 em_setup_interface(device_t dev, struct adapter *adapter)
3186 {
3187         if_t ifp;
3188
3189         INIT_DEBUGOUT("em_setup_interface: begin");
3190
3191         ifp = adapter->ifp = if_gethandle(IFT_ETHER);
3192         if (ifp == 0) {
3193                 device_printf(dev, "can not allocate ifnet structure\n");
3194                 return (-1);
3195         }
3196         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3197         if_setdev(ifp, dev);
3198         if_setinitfn(ifp, em_init);
3199         if_setsoftc(ifp, adapter);
3200         if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
3201         if_setioctlfn(ifp, em_ioctl);
3202         if_setgetcounterfn(ifp, em_get_counter);
3203
3204         /* TSO parameters */
3205         ifp->if_hw_tsomax = IP_MAXPACKET;
3206         /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
3207         ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
3208         ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3209
3210 #ifdef EM_MULTIQUEUE
3211         /* Multiqueue stack interface */
3212         if_settransmitfn(ifp, em_mq_start);
3213         if_setqflushfn(ifp, em_qflush);
3214 #else
3215         if_setstartfn(ifp, em_start);
3216         if_setsendqlen(ifp, adapter->num_tx_desc - 1);
3217         if_setsendqready(ifp);
3218 #endif  
3219
3220         ether_ifattach(ifp, adapter->hw.mac.addr);
3221
3222         if_setcapabilities(ifp, 0);
3223         if_setcapenable(ifp, 0);
3224
3225
3226         if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
3227             IFCAP_TSO4, 0);
3228         /*
3229          * Tell the upper layer(s) we
3230          * support full VLAN capability
3231          */
3232         if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
3233         if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
3234             IFCAP_VLAN_MTU, 0);
3235         if_setcapenable(ifp, if_getcapabilities(ifp));
3236
3237         /*
3238         ** Don't turn this on by default, if vlans are
3239         ** created on another pseudo device (eg. lagg)
3240         ** then vlan events are not passed thru, breaking
3241         ** operation, but with HW FILTER off it works. If
3242         ** using vlans directly on the em driver you can
3243         ** enable this and get full hardware tag filtering.
3244         */
3245         if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
3246
3247 #ifdef DEVICE_POLLING
3248         if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
3249 #endif
3250
3251         /* Enable only WOL MAGIC by default */
3252         if (adapter->wol) {
3253                 if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
3254                 if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
3255         }
3256                 
3257         /*
3258          * Specify the media types supported by this adapter and register
3259          * callbacks to update media and link information
3260          */
3261         ifmedia_init(&adapter->media, IFM_IMASK,
3262             em_media_change, em_media_status);
3263         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3264             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3265                 u_char fiber_type = IFM_1000_SX;        /* default type */
3266
3267                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
3268                             0, NULL);
3269                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3270         } else {
3271                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3272                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3273                             0, NULL);
3274                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3275                             0, NULL);
3276                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3277                             0, NULL);
3278                 if (adapter->hw.phy.type != e1000_phy_ife) {
3279                         ifmedia_add(&adapter->media,
3280                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3281                         ifmedia_add(&adapter->media,
3282                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3283                 }
3284         }
3285         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3286         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3287         return (0);
3288 }
3289
3290
3291 /*
3292  * Manage DMA'able memory.
3293  */
3294 static void
3295 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3296 {
3297         if (error)
3298                 return;
3299         *(bus_addr_t *) arg = segs[0].ds_addr;
3300 }
3301
3302 static int
3303 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3304         struct em_dma_alloc *dma, int mapflags)
3305 {
3306         int error;
3307
3308         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3309                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3310                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3311                                 BUS_SPACE_MAXADDR,      /* highaddr */
3312                                 NULL, NULL,             /* filter, filterarg */
3313                                 size,                   /* maxsize */
3314                                 1,                      /* nsegments */
3315                                 size,                   /* maxsegsize */
3316                                 0,                      /* flags */
3317                                 NULL,                   /* lockfunc */
3318                                 NULL,                   /* lockarg */
3319                                 &dma->dma_tag);
3320         if (error) {
3321                 device_printf(adapter->dev,
3322                     "%s: bus_dma_tag_create failed: %d\n",
3323                     __func__, error);
3324                 goto fail_0;
3325         }
3326
3327         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3328             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3329         if (error) {
3330                 device_printf(adapter->dev,
3331                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3332                     __func__, (uintmax_t)size, error);
3333                 goto fail_2;
3334         }
3335
3336         dma->dma_paddr = 0;
3337         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3338             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3339         if (error || dma->dma_paddr == 0) {
3340                 device_printf(adapter->dev,
3341                     "%s: bus_dmamap_load failed: %d\n",
3342                     __func__, error);
3343                 goto fail_3;
3344         }
3345
3346         return (0);
3347
3348 fail_3:
3349         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3350 fail_2:
3351         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3352         bus_dma_tag_destroy(dma->dma_tag);
3353 fail_0:
3354         dma->dma_tag = NULL;
3355
3356         return (error);
3357 }
3358
3359 static void
3360 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3361 {
3362         if (dma->dma_tag == NULL)
3363                 return;
3364         if (dma->dma_paddr != 0) {
3365                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3366                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3367                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3368                 dma->dma_paddr = 0;
3369         }
3370         if (dma->dma_vaddr != NULL) {
3371                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3372                 dma->dma_vaddr = NULL;
3373         }
3374         bus_dma_tag_destroy(dma->dma_tag);
3375         dma->dma_tag = NULL;
3376 }
3377
3378
3379 /*********************************************************************
3380  *
3381  *  Allocate memory for the transmit and receive rings, and then
3382  *  the descriptors associated with each, called only once at attach.
3383  *
3384  **********************************************************************/
3385 static int
3386 em_allocate_queues(struct adapter *adapter)
3387 {
3388         device_t                dev = adapter->dev;
3389         struct tx_ring          *txr = NULL;
3390         struct rx_ring          *rxr = NULL;
3391         int rsize, tsize, error = E1000_SUCCESS;
3392         int txconf = 0, rxconf = 0;
3393
3394
3395         /* Allocate the TX ring struct memory */
3396         if (!(adapter->tx_rings =
3397             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3398             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3399                 device_printf(dev, "Unable to allocate TX ring memory\n");
3400                 error = ENOMEM;
3401                 goto fail;
3402         }
3403
3404         /* Now allocate the RX */
3405         if (!(adapter->rx_rings =
3406             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3407             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3408                 device_printf(dev, "Unable to allocate RX ring memory\n");
3409                 error = ENOMEM;
3410                 goto rx_fail;
3411         }
3412
3413         tsize = roundup2(adapter->num_tx_desc *
3414             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3415         /*
3416          * Now set up the TX queues, txconf is needed to handle the
3417          * possibility that things fail midcourse and we need to
3418          * undo memory gracefully
3419          */ 
3420         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3421                 /* Set up some basics */
3422                 txr = &adapter->tx_rings[i];
3423                 txr->adapter = adapter;
3424                 txr->me = i;
3425
3426                 /* Initialize the TX lock */
3427                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3428                     device_get_nameunit(dev), txr->me);
3429                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3430
3431                 if (em_dma_malloc(adapter, tsize,
3432                         &txr->txdma, BUS_DMA_NOWAIT)) {
3433                         device_printf(dev,
3434                             "Unable to allocate TX Descriptor memory\n");
3435                         error = ENOMEM;
3436                         goto err_tx_desc;
3437                 }
3438                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3439                 bzero((void *)txr->tx_base, tsize);
3440
3441                 if (em_allocate_transmit_buffers(txr)) {
3442                         device_printf(dev,
3443                             "Critical Failure setting up transmit buffers\n");
3444                         error = ENOMEM;
3445                         goto err_tx_desc;
3446                 }
3447 #if __FreeBSD_version >= 800000
3448                 /* Allocate a buf ring */
3449                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3450                     M_WAITOK, &txr->tx_mtx);
3451 #endif
3452         }
3453
3454         /*
3455          * Next the RX queues...
3456          */ 
3457         rsize = roundup2(adapter->num_rx_desc *
3458             sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3459         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3460                 rxr = &adapter->rx_rings[i];
3461                 rxr->adapter = adapter;
3462                 rxr->me = i;
3463
3464                 /* Initialize the RX lock */
3465                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3466                     device_get_nameunit(dev), txr->me);
3467                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3468
3469                 if (em_dma_malloc(adapter, rsize,
3470                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3471                         device_printf(dev,
3472                             "Unable to allocate RxDescriptor memory\n");
3473                         error = ENOMEM;
3474                         goto err_rx_desc;
3475                 }
3476                 rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3477                 bzero((void *)rxr->rx_base, rsize);
3478
3479                 /* Allocate receive buffers for the ring*/
3480                 if (em_allocate_receive_buffers(rxr)) {
3481                         device_printf(dev,
3482                             "Critical Failure setting up receive buffers\n");
3483                         error = ENOMEM;
3484                         goto err_rx_desc;
3485                 }
3486         }
3487
3488         return (0);
3489
3490 err_rx_desc:
3491         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3492                 em_dma_free(adapter, &rxr->rxdma);
3493 err_tx_desc:
3494         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3495                 em_dma_free(adapter, &txr->txdma);
3496         free(adapter->rx_rings, M_DEVBUF);
3497 rx_fail:
3498 #if __FreeBSD_version >= 800000
3499         buf_ring_free(txr->br, M_DEVBUF);
3500 #endif
3501         free(adapter->tx_rings, M_DEVBUF);
3502 fail:
3503         return (error);
3504 }
3505
3506
3507 /*********************************************************************
3508  *
3509  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3510  *  the information needed to transmit a packet on the wire. This is
3511  *  called only once at attach, setup is done every reset.
3512  *
3513  **********************************************************************/
3514 static int
3515 em_allocate_transmit_buffers(struct tx_ring *txr)
3516 {
3517         struct adapter *adapter = txr->adapter;
3518         device_t dev = adapter->dev;
3519         struct em_txbuffer *txbuf;
3520         int error, i;
3521
3522         /*
3523          * Setup DMA descriptor areas.
3524          */
3525         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3526                                1, 0,                    /* alignment, bounds */
3527                                BUS_SPACE_MAXADDR,       /* lowaddr */
3528                                BUS_SPACE_MAXADDR,       /* highaddr */
3529                                NULL, NULL,              /* filter, filterarg */
3530                                EM_TSO_SIZE,             /* maxsize */
3531                                EM_MAX_SCATTER,          /* nsegments */
3532                                PAGE_SIZE,               /* maxsegsize */
3533                                0,                       /* flags */
3534                                NULL,                    /* lockfunc */
3535                                NULL,                    /* lockfuncarg */
3536                                &txr->txtag))) {
3537                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3538                 goto fail;
3539         }
3540
3541         if (!(txr->tx_buffers =
3542             (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3543             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3544                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3545                 error = ENOMEM;
3546                 goto fail;
3547         }
3548
3549         /* Create the descriptor buffer dma maps */
3550         txbuf = txr->tx_buffers;
3551         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3552                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3553                 if (error != 0) {
3554                         device_printf(dev, "Unable to create TX DMA map\n");
3555                         goto fail;
3556                 }
3557         }
3558
3559         return 0;
3560 fail:
3561         /* We free all, it handles case where we are in the middle */
3562         em_free_transmit_structures(adapter);
3563         return (error);
3564 }
3565
3566 /*********************************************************************
3567  *
3568  *  Initialize a transmit ring.
3569  *
3570  **********************************************************************/
3571 static void
3572 em_setup_transmit_ring(struct tx_ring *txr)
3573 {
3574         struct adapter *adapter = txr->adapter;
3575         struct em_txbuffer *txbuf;
3576         int i;
3577 #ifdef DEV_NETMAP
3578         struct netmap_slot *slot;
3579         struct netmap_adapter *na = netmap_getna(adapter->ifp);
3580 #endif /* DEV_NETMAP */
3581
3582         /* Clear the old descriptor contents */
3583         EM_TX_LOCK(txr);
3584 #ifdef DEV_NETMAP
3585         slot = netmap_reset(na, NR_TX, txr->me, 0);
3586 #endif /* DEV_NETMAP */
3587
3588         bzero((void *)txr->tx_base,
3589               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3590         /* Reset indices */
3591         txr->next_avail_desc = 0;
3592         txr->next_to_clean = 0;
3593
3594         /* Free any existing tx buffers. */
3595         txbuf = txr->tx_buffers;
3596         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3597                 if (txbuf->m_head != NULL) {
3598                         bus_dmamap_sync(txr->txtag, txbuf->map,
3599                             BUS_DMASYNC_POSTWRITE);
3600                         bus_dmamap_unload(txr->txtag, txbuf->map);
3601                         m_freem(txbuf->m_head);
3602                         txbuf->m_head = NULL;
3603                 }
3604 #ifdef DEV_NETMAP
3605                 if (slot) {
3606                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3607                         uint64_t paddr;
3608                         void *addr;
3609
3610                         addr = PNMB(na, slot + si, &paddr);
3611                         txr->tx_base[i].buffer_addr = htole64(paddr);
3612                         /* reload the map for netmap mode */
3613                         netmap_load_map(na, txr->txtag, txbuf->map, addr);
3614                 }
3615 #endif /* DEV_NETMAP */
3616
3617                 /* clear the watch index */
3618                 txbuf->next_eop = -1;
3619         }
3620
3621         /* Set number of descriptors available */
3622         txr->tx_avail = adapter->num_tx_desc;
3623         txr->busy = EM_TX_IDLE;
3624
3625         /* Clear checksum offload context. */
3626         txr->last_hw_offload = 0;
3627         txr->last_hw_ipcss = 0;
3628         txr->last_hw_ipcso = 0;
3629         txr->last_hw_tucss = 0;
3630         txr->last_hw_tucso = 0;
3631
3632         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3633             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3634         EM_TX_UNLOCK(txr);
3635 }
3636
3637 /*********************************************************************
3638  *
3639  *  Initialize all transmit rings.
3640  *
3641  **********************************************************************/
3642 static void
3643 em_setup_transmit_structures(struct adapter *adapter)
3644 {
3645         struct tx_ring *txr = adapter->tx_rings;
3646
3647         for (int i = 0; i < adapter->num_queues; i++, txr++)
3648                 em_setup_transmit_ring(txr);
3649
3650         return;
3651 }
3652
3653 /*********************************************************************
3654  *
3655  *  Enable transmit unit.
3656  *
3657  **********************************************************************/
3658 static void
3659 em_initialize_transmit_unit(struct adapter *adapter)
3660 {
3661         struct tx_ring  *txr = adapter->tx_rings;
3662         struct e1000_hw *hw = &adapter->hw;
3663         u32     tctl, txdctl = 0, tarc, tipg = 0;
3664
3665          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3666
3667         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3668                 u64 bus_addr = txr->txdma.dma_paddr;
3669                 /* Base and Len of TX Ring */
3670                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3671                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3672                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3673                     (u32)(bus_addr >> 32));
3674                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3675                     (u32)bus_addr);
3676                 /* Init the HEAD/TAIL indices */
3677                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3678                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3679
3680                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3681                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3682                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3683
3684                 txr->busy = EM_TX_IDLE;
3685                 txdctl = 0; /* clear txdctl */
3686                 txdctl |= 0x1f; /* PTHRESH */
3687                 txdctl |= 1 << 8; /* HTHRESH */
3688                 txdctl |= 1 << 16;/* WTHRESH */
3689                 txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3690                 txdctl |= E1000_TXDCTL_GRAN;
3691                 txdctl |= 1 << 25; /* LWTHRESH */
3692
3693                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3694         }
3695
3696         /* Set the default values for the Tx Inter Packet Gap timer */
3697         switch (adapter->hw.mac.type) {
3698         case e1000_80003es2lan:
3699                 tipg = DEFAULT_82543_TIPG_IPGR1;
3700                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3701                     E1000_TIPG_IPGR2_SHIFT;
3702                 break;
3703         default:
3704                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3705                     (adapter->hw.phy.media_type ==
3706                     e1000_media_type_internal_serdes))
3707                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3708                 else
3709                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3710                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3711                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3712         }
3713
3714         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3715         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3716
3717         if(adapter->hw.mac.type >= e1000_82540)
3718                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3719                     adapter->tx_abs_int_delay.value);
3720
3721         if ((adapter->hw.mac.type == e1000_82571) ||
3722             (adapter->hw.mac.type == e1000_82572)) {
3723                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3724                 tarc |= TARC_SPEED_MODE_BIT;
3725                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3726         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3727                 /* errata: program both queues to unweighted RR */
3728                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3729                 tarc |= 1;
3730                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3731                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3732                 tarc |= 1;
3733                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3734         } else if (adapter->hw.mac.type == e1000_82574) {
3735                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3736                 tarc |= TARC_ERRATA_BIT;
3737                 if ( adapter->num_queues > 1) {
3738                         tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3739                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3740                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3741                 } else
3742                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3743         }
3744
3745         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3746         if (adapter->tx_int_delay.value > 0)
3747                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3748
3749         /* Program the Transmit Control Register */
3750         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3751         tctl &= ~E1000_TCTL_CT;
3752         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3753                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3754
3755         if (adapter->hw.mac.type >= e1000_82571)
3756                 tctl |= E1000_TCTL_MULR;
3757
3758         /* This write will effectively turn on the transmit unit. */
3759         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3760
3761         if (hw->mac.type == e1000_pch_spt) {
3762                 u32 reg;
3763                 reg = E1000_READ_REG(hw, E1000_IOSFPC);
3764                 reg |= E1000_RCTL_RDMTS_HEX;
3765                 E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3766                 reg = E1000_READ_REG(hw, E1000_TARC(0));
3767                 reg |= E1000_TARC0_CB_MULTIQ_3_REQ;
3768                 E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3769         }
3770 }
3771
3772
3773 /*********************************************************************
3774  *
3775  *  Free all transmit rings.
3776  *
3777  **********************************************************************/
3778 static void
3779 em_free_transmit_structures(struct adapter *adapter)
3780 {
3781         struct tx_ring *txr = adapter->tx_rings;
3782
3783         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3784                 EM_TX_LOCK(txr);
3785                 em_free_transmit_buffers(txr);
3786                 em_dma_free(adapter, &txr->txdma);
3787                 EM_TX_UNLOCK(txr);
3788                 EM_TX_LOCK_DESTROY(txr);
3789         }
3790
3791         free(adapter->tx_rings, M_DEVBUF);
3792 }
3793
3794 /*********************************************************************
3795  *
3796  *  Free transmit ring related data structures.
3797  *
3798  **********************************************************************/
3799 static void
3800 em_free_transmit_buffers(struct tx_ring *txr)
3801 {
3802         struct adapter          *adapter = txr->adapter;
3803         struct em_txbuffer      *txbuf;
3804
3805         INIT_DEBUGOUT("free_transmit_ring: begin");
3806
3807         if (txr->tx_buffers == NULL)
3808                 return;
3809
3810         for (int i = 0; i < adapter->num_tx_desc; i++) {
3811                 txbuf = &txr->tx_buffers[i];
3812                 if (txbuf->m_head != NULL) {
3813                         bus_dmamap_sync(txr->txtag, txbuf->map,
3814                             BUS_DMASYNC_POSTWRITE);
3815                         bus_dmamap_unload(txr->txtag,
3816                             txbuf->map);
3817                         m_freem(txbuf->m_head);
3818                         txbuf->m_head = NULL;
3819                         if (txbuf->map != NULL) {
3820                                 bus_dmamap_destroy(txr->txtag,
3821                                     txbuf->map);
3822                                 txbuf->map = NULL;
3823                         }
3824                 } else if (txbuf->map != NULL) {
3825                         bus_dmamap_unload(txr->txtag,
3826                             txbuf->map);
3827                         bus_dmamap_destroy(txr->txtag,
3828                             txbuf->map);
3829                         txbuf->map = NULL;
3830                 }
3831         }
3832 #if __FreeBSD_version >= 800000
3833         if (txr->br != NULL)
3834                 buf_ring_free(txr->br, M_DEVBUF);
3835 #endif
3836         if (txr->tx_buffers != NULL) {
3837                 free(txr->tx_buffers, M_DEVBUF);
3838                 txr->tx_buffers = NULL;
3839         }
3840         if (txr->txtag != NULL) {
3841                 bus_dma_tag_destroy(txr->txtag);
3842                 txr->txtag = NULL;
3843         }
3844         return;
3845 }
3846
3847
3848 /*********************************************************************
3849  *  The offload context is protocol specific (TCP/UDP) and thus
3850  *  only needs to be set when the protocol changes. The occasion
3851  *  of a context change can be a performance detriment, and
3852  *  might be better just disabled. The reason arises in the way
3853  *  in which the controller supports pipelined requests from the
3854  *  Tx data DMA. Up to four requests can be pipelined, and they may
3855  *  belong to the same packet or to multiple packets. However all
3856  *  requests for one packet are issued before a request is issued
3857  *  for a subsequent packet and if a request for the next packet
3858  *  requires a context change, that request will be stalled
3859  *  until the previous request completes. This means setting up
3860  *  a new context effectively disables pipelined Tx data DMA which
3861  *  in turn greatly slow down performance to send small sized
3862  *  frames. 
3863  **********************************************************************/
3864 static void
3865 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3866     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3867 {
3868         struct adapter                  *adapter = txr->adapter;
3869         struct e1000_context_desc       *TXD = NULL;
3870         struct em_txbuffer              *tx_buffer;
3871         int                             cur, hdr_len;
3872         u32                             cmd = 0;
3873         u16                             offload = 0;
3874         u8                              ipcso, ipcss, tucso, tucss;
3875
3876         ipcss = ipcso = tucss = tucso = 0;
3877         hdr_len = ip_off + (ip->ip_hl << 2);
3878         cur = txr->next_avail_desc;
3879
3880         /* Setup of IP header checksum. */
3881         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3882                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3883                 offload |= CSUM_IP;
3884                 ipcss = ip_off;
3885                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3886                 /*
3887                  * Start offset for header checksum calculation.
3888                  * End offset for header checksum calculation.
3889                  * Offset of place to put the checksum.
3890                  */
3891                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3892                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3893                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3894                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3895                 cmd |= E1000_TXD_CMD_IP;
3896         }
3897
3898         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3899                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3900                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3901                 offload |= CSUM_TCP;
3902                 tucss = hdr_len;
3903                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3904                 /*
3905                  * The 82574L can only remember the *last* context used
3906                  * regardless of queue that it was use for.  We cannot reuse
3907                  * contexts on this hardware platform and must generate a new
3908                  * context every time.  82574L hardware spec, section 7.2.6,
3909                  * second note.
3910                  */
3911                 if (adapter->num_queues < 2) {
3912                         /*
3913                         * Setting up new checksum offload context for every
3914                         * frames takes a lot of processing time for hardware.
3915                         * This also reduces performance a lot for small sized
3916                         * frames so avoid it if driver can use previously
3917                         * configured checksum offload context.
3918                         */
3919                         if (txr->last_hw_offload == offload) {
3920                                 if (offload & CSUM_IP) {
3921                                         if (txr->last_hw_ipcss == ipcss &&
3922                                         txr->last_hw_ipcso == ipcso &&
3923                                         txr->last_hw_tucss == tucss &&
3924                                         txr->last_hw_tucso == tucso)
3925                                                 return;
3926                                 } else {
3927                                         if (txr->last_hw_tucss == tucss &&
3928                                         txr->last_hw_tucso == tucso)
3929                                                 return;
3930                                 }
3931                         }
3932                         txr->last_hw_offload = offload;
3933                         txr->last_hw_tucss = tucss;
3934                         txr->last_hw_tucso = tucso;
3935                 }
3936                 /*
3937                  * Start offset for payload checksum calculation.
3938                  * End offset for payload checksum calculation.
3939                  * Offset of place to put the checksum.
3940                  */
3941                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3942                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3943                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3944                 TXD->upper_setup.tcp_fields.tucso = tucso;
3945                 cmd |= E1000_TXD_CMD_TCP;
3946         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3947                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3948                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3949                 tucss = hdr_len;
3950                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3951                 /*
3952                  * The 82574L can only remember the *last* context used
3953                  * regardless of queue that it was use for.  We cannot reuse
3954                  * contexts on this hardware platform and must generate a new
3955                  * context every time.  82574L hardware spec, section 7.2.6,
3956                  * second note.
3957                  */
3958                 if (adapter->num_queues < 2) {
3959                         /*
3960                         * Setting up new checksum offload context for every
3961                         * frames takes a lot of processing time for hardware.
3962                         * This also reduces performance a lot for small sized
3963                         * frames so avoid it if driver can use previously
3964                         * configured checksum offload context.
3965                         */
3966                         if (txr->last_hw_offload == offload) {
3967                                 if (offload & CSUM_IP) {
3968                                         if (txr->last_hw_ipcss == ipcss &&
3969                                         txr->last_hw_ipcso == ipcso &&
3970                                         txr->last_hw_tucss == tucss &&
3971                                         txr->last_hw_tucso == tucso)
3972                                                 return;
3973                                 } else {
3974                                         if (txr->last_hw_tucss == tucss &&
3975                                         txr->last_hw_tucso == tucso)
3976                                                 return;
3977                                 }
3978                         }
3979                         txr->last_hw_offload = offload;
3980                         txr->last_hw_tucss = tucss;
3981                         txr->last_hw_tucso = tucso;
3982                 }
3983                 /*
3984                  * Start offset for header checksum calculation.
3985                  * End offset for header checksum calculation.
3986                  * Offset of place to put the checksum.
3987                  */
3988                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3989                 TXD->upper_setup.tcp_fields.tucss = tucss;
3990                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3991                 TXD->upper_setup.tcp_fields.tucso = tucso;
3992         }
3993   
3994         if (offload & CSUM_IP) {
3995                 txr->last_hw_ipcss = ipcss;
3996                 txr->last_hw_ipcso = ipcso;
3997         }
3998
3999         TXD->tcp_seg_setup.data = htole32(0);
4000         TXD->cmd_and_length =
4001             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
4002         tx_buffer = &txr->tx_buffers[cur];
4003         tx_buffer->m_head = NULL;
4004         tx_buffer->next_eop = -1;
4005
4006         if (++cur == adapter->num_tx_desc)
4007                 cur = 0;
4008
4009         txr->tx_avail--;
4010         txr->next_avail_desc = cur;
4011 }
4012
4013
4014 /**********************************************************************
4015  *
4016  *  Setup work for hardware segmentation offload (TSO)
4017  *
4018  **********************************************************************/
4019 static void
4020 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4021     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4022 {
4023         struct adapter                  *adapter = txr->adapter;
4024         struct e1000_context_desc       *TXD;
4025         struct em_txbuffer              *tx_buffer;
4026         int cur, hdr_len;
4027
4028         /*
4029          * In theory we can use the same TSO context if and only if
4030          * frame is the same type(IP/TCP) and the same MSS. However
4031          * checking whether a frame has the same IP/TCP structure is
4032          * hard thing so just ignore that and always restablish a
4033          * new TSO context.
4034          */
4035         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4036         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
4037                       E1000_TXD_DTYP_D |        /* Data descr type */
4038                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
4039
4040         /* IP and/or TCP header checksum calculation and insertion. */
4041         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4042
4043         cur = txr->next_avail_desc;
4044         tx_buffer = &txr->tx_buffers[cur];
4045         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4046
4047         /*
4048          * Start offset for header checksum calculation.
4049          * End offset for header checksum calculation.
4050          * Offset of place put the checksum.
4051          */
4052         TXD->lower_setup.ip_fields.ipcss = ip_off;
4053         TXD->lower_setup.ip_fields.ipcse =
4054             htole16(ip_off + (ip->ip_hl << 2) - 1);
4055         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4056         /*
4057          * Start offset for payload checksum calculation.
4058          * End offset for payload checksum calculation.
4059          * Offset of place to put the checksum.
4060          */
4061         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4062         TXD->upper_setup.tcp_fields.tucse = 0;
4063         TXD->upper_setup.tcp_fields.tucso =
4064             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4065         /*
4066          * Payload size per packet w/o any headers.
4067          * Length of all headers up to payload.
4068          */
4069         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4070         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4071
4072         TXD->cmd_and_length = htole32(adapter->txd_cmd |
4073                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
4074                                 E1000_TXD_CMD_TSE |     /* TSE context */
4075                                 E1000_TXD_CMD_IP |      /* Do IP csum */
4076                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
4077                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
4078
4079         tx_buffer->m_head = NULL;
4080         tx_buffer->next_eop = -1;
4081
4082         if (++cur == adapter->num_tx_desc)
4083                 cur = 0;
4084
4085         txr->tx_avail--;
4086         txr->next_avail_desc = cur;
4087         txr->tx_tso = TRUE;
4088 }
4089
4090
4091 /**********************************************************************
4092  *
4093  *  Examine each tx_buffer in the used queue. If the hardware is done
4094  *  processing the packet then free associated resources. The
4095  *  tx_buffer is put back on the free queue.
4096  *
4097  **********************************************************************/
4098 static void
4099 em_txeof(struct tx_ring *txr)
4100 {
4101         struct adapter  *adapter = txr->adapter;
4102         int first, last, done, processed;
4103         struct em_txbuffer *tx_buffer;
4104         struct e1000_tx_desc   *tx_desc, *eop_desc;
4105         if_t ifp = adapter->ifp;
4106
4107         EM_TX_LOCK_ASSERT(txr);
4108 #ifdef DEV_NETMAP
4109         if (netmap_tx_irq(ifp, txr->me))
4110                 return;
4111 #endif /* DEV_NETMAP */
4112
4113         /* No work, make sure hang detection is disabled */
4114         if (txr->tx_avail == adapter->num_tx_desc) {
4115                 txr->busy = EM_TX_IDLE;
4116                 return;
4117         }
4118
4119         processed = 0;
4120         first = txr->next_to_clean;
4121         tx_desc = &txr->tx_base[first];
4122         tx_buffer = &txr->tx_buffers[first];
4123         last = tx_buffer->next_eop;
4124         eop_desc = &txr->tx_base[last];
4125
4126         /*
4127          * What this does is get the index of the
4128          * first descriptor AFTER the EOP of the 
4129          * first packet, that way we can do the
4130          * simple comparison on the inner while loop.
4131          */
4132         if (++last == adapter->num_tx_desc)
4133                 last = 0;
4134         done = last;
4135
4136         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4137             BUS_DMASYNC_POSTREAD);
4138
4139         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4140                 /* We clean the range of the packet */
4141                 while (first != done) {
4142                         tx_desc->upper.data = 0;
4143                         tx_desc->lower.data = 0;
4144                         tx_desc->buffer_addr = 0;
4145                         ++txr->tx_avail;
4146                         ++processed;
4147
4148                         if (tx_buffer->m_head) {
4149                                 bus_dmamap_sync(txr->txtag,
4150                                     tx_buffer->map,
4151                                     BUS_DMASYNC_POSTWRITE);
4152                                 bus_dmamap_unload(txr->txtag,
4153                                     tx_buffer->map);
4154                                 m_freem(tx_buffer->m_head);
4155                                 tx_buffer->m_head = NULL;
4156                         }
4157                         tx_buffer->next_eop = -1;
4158
4159                         if (++first == adapter->num_tx_desc)
4160                                 first = 0;
4161
4162                         tx_buffer = &txr->tx_buffers[first];
4163                         tx_desc = &txr->tx_base[first];
4164                 }
4165                 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
4166                 /* See if we can continue to the next packet */
4167                 last = tx_buffer->next_eop;
4168                 if (last != -1) {
4169                         eop_desc = &txr->tx_base[last];
4170                         /* Get new done point */
4171                         if (++last == adapter->num_tx_desc) last = 0;
4172                         done = last;
4173                 } else
4174                         break;
4175         }
4176         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4177             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4178
4179         txr->next_to_clean = first;
4180
4181         /*
4182         ** Hang detection: we know there's work outstanding
4183         ** or the entry return would have been taken, so no
4184         ** descriptor processed here indicates a potential hang.
4185         ** The local timer will examine this and do a reset if needed.
4186         */
4187         if (processed == 0) {
4188                 if (txr->busy != EM_TX_HUNG)
4189                         ++txr->busy;
4190         } else /* At least one descriptor was cleaned */
4191                 txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4192
4193         /*
4194          * If we have a minimum free, clear IFF_DRV_OACTIVE
4195          * to tell the stack that it is OK to send packets.
4196          * Notice that all writes of OACTIVE happen under the
4197          * TX lock which, with a single queue, guarantees 
4198          * sanity.
4199          */
4200         if (txr->tx_avail >= EM_MAX_SCATTER) {
4201                 if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
4202         }
4203
4204         /* Disable hang detection if all clean */
4205         if (txr->tx_avail == adapter->num_tx_desc)
4206                 txr->busy = EM_TX_IDLE;
4207 }
4208
4209 /*********************************************************************
4210  *
4211  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4212  *
4213  **********************************************************************/
4214 static void
4215 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4216 {
4217         struct adapter          *adapter = rxr->adapter;
4218         struct mbuf             *m;
4219         bus_dma_segment_t       segs;
4220         struct em_rxbuffer      *rxbuf;
4221         int                     i, j, error, nsegs;
4222         bool                    cleaned = FALSE;
4223
4224         i = j = rxr->next_to_refresh;
4225         /*
4226         ** Get one descriptor beyond
4227         ** our work mark to control
4228         ** the loop.
4229         */
4230         if (++j == adapter->num_rx_desc)
4231                 j = 0;
4232
4233         while (j != limit) {
4234                 rxbuf = &rxr->rx_buffers[i];
4235                 if (rxbuf->m_head == NULL) {
4236                         m = m_getjcl(M_NOWAIT, MT_DATA,
4237                             M_PKTHDR, adapter->rx_mbuf_sz);
4238                         /*
4239                         ** If we have a temporary resource shortage
4240                         ** that causes a failure, just abort refresh
4241                         ** for now, we will return to this point when
4242                         ** reinvoked from em_rxeof.
4243                         */
4244                         if (m == NULL)
4245                                 goto update;
4246                 } else
4247                         m = rxbuf->m_head;
4248
4249                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4250                 m->m_flags |= M_PKTHDR;
4251                 m->m_data = m->m_ext.ext_buf;
4252
4253                 /* Use bus_dma machinery to setup the memory mapping  */
4254                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4255                     m, &segs, &nsegs, BUS_DMA_NOWAIT);
4256                 if (error != 0) {
4257                         printf("Refresh mbufs: hdr dmamap load"
4258                             " failure - %d\n", error);
4259                         m_free(m);
4260                         rxbuf->m_head = NULL;
4261                         goto update;
4262                 }
4263                 rxbuf->m_head = m;
4264                 rxbuf->paddr = segs.ds_addr;
4265                 bus_dmamap_sync(rxr->rxtag,
4266                     rxbuf->map, BUS_DMASYNC_PREREAD);
4267                 em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4268                 cleaned = TRUE;
4269
4270                 i = j; /* Next is precalulated for us */
4271                 rxr->next_to_refresh = i;
4272                 /* Calculate next controlling index */
4273                 if (++j == adapter->num_rx_desc)
4274                         j = 0;
4275         }
4276 update:
4277         /*
4278         ** Update the tail pointer only if,
4279         ** and as far as we have refreshed.
4280         */
4281         if (cleaned)
4282                 E1000_WRITE_REG(&adapter->hw,
4283                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4284
4285         return;
4286 }
4287
4288
4289 /*********************************************************************
4290  *
4291  *  Allocate memory for rx_buffer structures. Since we use one
4292  *  rx_buffer per received packet, the maximum number of rx_buffer's
4293  *  that we'll need is equal to the number of receive descriptors
4294  *  that we've allocated.
4295  *
4296  **********************************************************************/
4297 static int
4298 em_allocate_receive_buffers(struct rx_ring *rxr)
4299 {
4300         struct adapter          *adapter = rxr->adapter;
4301         device_t                dev = adapter->dev;
4302         struct em_rxbuffer      *rxbuf;
4303         int                     error;
4304
4305         rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4306             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4307         if (rxr->rx_buffers == NULL) {
4308                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4309                 return (ENOMEM);
4310         }
4311
4312         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4313                                 1, 0,                   /* alignment, bounds */
4314                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4315                                 BUS_SPACE_MAXADDR,      /* highaddr */
4316                                 NULL, NULL,             /* filter, filterarg */
4317                                 MJUM9BYTES,             /* maxsize */
4318                                 1,                      /* nsegments */
4319                                 MJUM9BYTES,             /* maxsegsize */
4320                                 0,                      /* flags */
4321                                 NULL,                   /* lockfunc */
4322                                 NULL,                   /* lockarg */
4323                                 &rxr->rxtag);
4324         if (error) {
4325                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4326                     __func__, error);
4327                 goto fail;
4328         }
4329
4330         rxbuf = rxr->rx_buffers;
4331         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4332                 rxbuf = &rxr->rx_buffers[i];
4333                 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4334                 if (error) {
4335                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4336                             __func__, error);
4337                         goto fail;
4338                 }
4339         }
4340
4341         return (0);
4342
4343 fail:
4344         em_free_receive_structures(adapter);
4345         return (error);
4346 }
4347
4348
4349 /*********************************************************************
4350  *
4351  *  Initialize a receive ring and its buffers.
4352  *
4353  **********************************************************************/
4354 static int
4355 em_setup_receive_ring(struct rx_ring *rxr)
4356 {
4357         struct  adapter         *adapter = rxr->adapter;
4358         struct em_rxbuffer      *rxbuf;
4359         bus_dma_segment_t       seg[1];
4360         int                     rsize, nsegs, error = 0;
4361 #ifdef DEV_NETMAP
4362         struct netmap_slot *slot;
4363         struct netmap_adapter *na = netmap_getna(adapter->ifp);
4364 #endif
4365
4366
4367         /* Clear the ring contents */
4368         EM_RX_LOCK(rxr);
4369         rsize = roundup2(adapter->num_rx_desc *
4370             sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4371         bzero((void *)rxr->rx_base, rsize);
4372 #ifdef DEV_NETMAP
4373         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4374 #endif
4375
4376         /*
4377         ** Free current RX buffer structs and their mbufs
4378         */
4379         for (int i = 0; i < adapter->num_rx_desc; i++) {
4380                 rxbuf = &rxr->rx_buffers[i];
4381                 if (rxbuf->m_head != NULL) {
4382                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4383                             BUS_DMASYNC_POSTREAD);
4384                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4385                         m_freem(rxbuf->m_head);
4386                         rxbuf->m_head = NULL; /* mark as freed */
4387                 }
4388         }
4389
4390         /* Now replenish the mbufs */
4391         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4392                 rxbuf = &rxr->rx_buffers[j];
4393 #ifdef DEV_NETMAP
4394                 if (slot) {
4395                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4396                         uint64_t paddr;
4397                         void *addr;
4398
4399                         addr = PNMB(na, slot + si, &paddr);
4400                         netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4401                         rxbuf->paddr = paddr;
4402                         em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4403                         continue;
4404                 }
4405 #endif /* DEV_NETMAP */
4406                 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4407                     M_PKTHDR, adapter->rx_mbuf_sz);
4408                 if (rxbuf->m_head == NULL) {
4409                         error = ENOBUFS;
4410                         goto fail;
4411                 }
4412                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4413                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4414                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4415
4416                 /* Get the memory mapping */
4417                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4418                     rxbuf->map, rxbuf->m_head, seg,
4419                     &nsegs, BUS_DMA_NOWAIT);
4420                 if (error != 0) {
4421                         m_freem(rxbuf->m_head);
4422                         rxbuf->m_head = NULL;
4423                         goto fail;
4424                 }
4425                 bus_dmamap_sync(rxr->rxtag,
4426                     rxbuf->map, BUS_DMASYNC_PREREAD);
4427
4428                 rxbuf->paddr = seg[0].ds_addr;
4429                 em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4430         }
4431         rxr->next_to_check = 0;
4432         rxr->next_to_refresh = 0;
4433         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4434             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4435
4436 fail:
4437         EM_RX_UNLOCK(rxr);
4438         return (error);
4439 }
4440
4441 /*********************************************************************
4442  *
4443  *  Initialize all receive rings.
4444  *
4445  **********************************************************************/
4446 static int
4447 em_setup_receive_structures(struct adapter *adapter)
4448 {
4449         struct rx_ring *rxr = adapter->rx_rings;
4450         int q;
4451
4452         for (q = 0; q < adapter->num_queues; q++, rxr++)
4453                 if (em_setup_receive_ring(rxr))
4454                         goto fail;
4455
4456         return (0);
4457 fail:
4458         /*
4459          * Free RX buffers allocated so far, we will only handle
4460          * the rings that completed, the failing case will have
4461          * cleaned up for itself. 'q' failed, so its the terminus.
4462          */
4463         for (int i = 0; i < q; ++i) {
4464                 rxr = &adapter->rx_rings[i];
4465                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4466                         struct em_rxbuffer *rxbuf;
4467                         rxbuf = &rxr->rx_buffers[n];
4468                         if (rxbuf->m_head != NULL) {
4469                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4470                                   BUS_DMASYNC_POSTREAD);
4471                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4472                                 m_freem(rxbuf->m_head);
4473                                 rxbuf->m_head = NULL;
4474                         }
4475                 }
4476                 rxr->next_to_check = 0;
4477                 rxr->next_to_refresh = 0;
4478         }
4479
4480         return (ENOBUFS);
4481 }
4482
4483 /*********************************************************************
4484  *
4485  *  Free all receive rings.
4486  *
4487  **********************************************************************/
4488 static void
4489 em_free_receive_structures(struct adapter *adapter)
4490 {
4491         struct rx_ring *rxr = adapter->rx_rings;
4492
4493         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4494                 em_free_receive_buffers(rxr);
4495                 /* Free the ring memory as well */
4496                 em_dma_free(adapter, &rxr->rxdma);
4497                 EM_RX_LOCK_DESTROY(rxr);
4498         }
4499
4500         free(adapter->rx_rings, M_DEVBUF);
4501 }
4502
4503
4504 /*********************************************************************
4505  *
4506  *  Free receive ring data structures
4507  *
4508  **********************************************************************/
4509 static void
4510 em_free_receive_buffers(struct rx_ring *rxr)
4511 {
4512         struct adapter          *adapter = rxr->adapter;
4513         struct em_rxbuffer      *rxbuf = NULL;
4514
4515         INIT_DEBUGOUT("free_receive_buffers: begin");
4516
4517         if (rxr->rx_buffers != NULL) {
4518                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4519                         rxbuf = &rxr->rx_buffers[i];
4520                         if (rxbuf->map != NULL) {
4521                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4522                                     BUS_DMASYNC_POSTREAD);
4523                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4524                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4525                         }
4526                         if (rxbuf->m_head != NULL) {
4527                                 m_freem(rxbuf->m_head);
4528                                 rxbuf->m_head = NULL;
4529                         }
4530                 }
4531                 free(rxr->rx_buffers, M_DEVBUF);
4532                 rxr->rx_buffers = NULL;
4533                 rxr->next_to_check = 0;
4534                 rxr->next_to_refresh = 0;
4535         }
4536
4537         if (rxr->rxtag != NULL) {
4538                 bus_dma_tag_destroy(rxr->rxtag);
4539                 rxr->rxtag = NULL;
4540         }
4541
4542         return;
4543 }
4544
4545
4546 /*********************************************************************
4547  *
4548  *  Enable receive unit.
4549  *
4550  **********************************************************************/
4551
4552 static void
4553 em_initialize_receive_unit(struct adapter *adapter)
4554 {
4555         struct rx_ring *rxr = adapter->rx_rings;
4556         if_t ifp = adapter->ifp;
4557         struct e1000_hw *hw = &adapter->hw;
4558         u32     rctl, rxcsum, rfctl;
4559
4560         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4561
4562         /*
4563          * Make sure receives are disabled while setting
4564          * up the descriptor ring
4565          */
4566         rctl = E1000_READ_REG(hw, E1000_RCTL);
4567         /* Do not disable if ever enabled on this hardware */
4568         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4569                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4570
4571         /* Setup the Receive Control Register */
4572         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4573         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4574             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4575             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4576
4577         /* Do not store bad packets */
4578         rctl &= ~E1000_RCTL_SBP;
4579
4580         /* Enable Long Packet receive */
4581         if (if_getmtu(ifp) > ETHERMTU)
4582                 rctl |= E1000_RCTL_LPE;
4583         else
4584                 rctl &= ~E1000_RCTL_LPE;
4585
4586         /* Strip the CRC */
4587         if (!em_disable_crc_stripping)
4588                 rctl |= E1000_RCTL_SECRC;
4589
4590         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4591             adapter->rx_abs_int_delay.value);
4592
4593         E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4594             adapter->rx_int_delay.value);
4595         /*
4596          * Set the interrupt throttling rate. Value is calculated
4597          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4598          */
4599         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4600
4601         /* Use extended rx descriptor formats */
4602         rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4603         rfctl |= E1000_RFCTL_EXTEN;
4604         /*
4605         ** When using MSIX interrupts we need to throttle
4606         ** using the EITR register (82574 only)
4607         */
4608         if (hw->mac.type == e1000_82574) {
4609                 for (int i = 0; i < 4; i++)
4610                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4611                             DEFAULT_ITR);
4612                 /* Disable accelerated acknowledge */
4613                 rfctl |= E1000_RFCTL_ACK_DIS;
4614         }
4615         E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4616
4617         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4618         if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
4619 #ifdef EM_MULTIQUEUE
4620                 rxcsum |= E1000_RXCSUM_TUOFL |
4621                           E1000_RXCSUM_IPOFL |
4622                           E1000_RXCSUM_PCSD;
4623 #else
4624                 rxcsum |= E1000_RXCSUM_TUOFL;
4625 #endif
4626         } else
4627                 rxcsum &= ~E1000_RXCSUM_TUOFL;
4628
4629         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4630
4631 #ifdef EM_MULTIQUEUE
4632 #define RSSKEYLEN 10
4633         if (adapter->num_queues > 1) {
4634                 uint8_t  rss_key[4 * RSSKEYLEN];
4635                 uint32_t reta = 0;
4636                 int i;
4637
4638                 /*
4639                 * Configure RSS key
4640                 */
4641                 arc4rand(rss_key, sizeof(rss_key), 0);
4642                 for (i = 0; i < RSSKEYLEN; ++i) {
4643                         uint32_t rssrk = 0;
4644
4645                         rssrk = EM_RSSRK_VAL(rss_key, i);
4646                         E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4647                 }
4648
4649                 /*
4650                 * Configure RSS redirect table in following fashion:
4651                 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4652                 */
4653                 for (i = 0; i < sizeof(reta); ++i) {
4654                         uint32_t q;
4655
4656                         q = (i % adapter->num_queues) << 7;
4657                         reta |= q << (8 * i);
4658                 }
4659
4660                 for (i = 0; i < 32; ++i) {
4661                         E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4662                 }
4663
4664                 E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | 
4665                                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
4666                                 E1000_MRQC_RSS_FIELD_IPV4 |
4667                                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4668                                 E1000_MRQC_RSS_FIELD_IPV6_EX |
4669                                 E1000_MRQC_RSS_FIELD_IPV6);
4670         }
4671 #endif
4672         /*
4673         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4674         ** long latencies are observed, like Lenovo X60. This
4675         ** change eliminates the problem, but since having positive
4676         ** values in RDTR is a known source of problems on other
4677         ** platforms another solution is being sought.
4678         */
4679         if (hw->mac.type == e1000_82573)
4680                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4681
4682         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4683                 /* Setup the Base and Length of the Rx Descriptor Ring */
4684                 u64 bus_addr = rxr->rxdma.dma_paddr;
4685                 u32 rdt = adapter->num_rx_desc - 1; /* default */
4686
4687                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4688                     adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4689                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4690                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4691                 /* Setup the Head and Tail Descriptor Pointers */
4692                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4693 #ifdef DEV_NETMAP
4694                 /*
4695                  * an init() while a netmap client is active must
4696                  * preserve the rx buffers passed to userspace.
4697                  */
4698                 if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4699                         struct netmap_adapter *na = netmap_getna(adapter->ifp);
4700                         rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4701                 }
4702 #endif /* DEV_NETMAP */
4703                 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4704         }
4705
4706         /*
4707          * Set PTHRESH for improved jumbo performance
4708          * According to 10.2.5.11 of Intel 82574 Datasheet,
4709          * RXDCTL(1) is written whenever RXDCTL(0) is written.
4710          * Only write to RXDCTL(1) if there is a need for different
4711          * settings.
4712          */
4713         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4714             (adapter->hw.mac.type == e1000_pch2lan) ||
4715             (adapter->hw.mac.type == e1000_ich10lan)) &&
4716             (if_getmtu(ifp) > ETHERMTU)) {
4717                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4718                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4719         } else if (adapter->hw.mac.type == e1000_82574) {
4720                 for (int i = 0; i < adapter->num_queues; i++) {
4721                         u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4722
4723                         rxdctl |= 0x20; /* PTHRESH */
4724                         rxdctl |= 4 << 8; /* HTHRESH */
4725                         rxdctl |= 4 << 16;/* WTHRESH */
4726                         rxdctl |= 1 << 24; /* Switch to granularity */
4727                         E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4728                 }
4729         }
4730                 
4731         if (adapter->hw.mac.type >= e1000_pch2lan) {
4732                 if (if_getmtu(ifp) > ETHERMTU)
4733                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4734                 else
4735                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4736         }
4737
4738         /* Make sure VLAN Filters are off */
4739         rctl &= ~E1000_RCTL_VFE;
4740
4741         if (adapter->rx_mbuf_sz == MCLBYTES)
4742                 rctl |= E1000_RCTL_SZ_2048;
4743         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4744                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4745         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4746                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4747
4748         /* ensure we clear use DTYPE of 00 here */
4749         rctl &= ~0x00000C00;
4750         /* Write out the settings */
4751         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4752
4753         return;
4754 }
4755
4756
4757 /*********************************************************************
4758  *
4759  *  This routine executes in interrupt context. It replenishes
4760  *  the mbufs in the descriptor and sends data which has been
4761  *  dma'ed into host memory to upper layer.
4762  *
4763  *  We loop at most count times if count is > 0, or until done if
4764  *  count < 0.
4765  *  
4766  *  For polling we also now return the number of cleaned packets
4767  *********************************************************************/
4768 static bool
4769 em_rxeof(struct rx_ring *rxr, int count, int *done)
4770 {
4771         struct adapter          *adapter = rxr->adapter;
4772         if_t ifp = adapter->ifp;
4773         struct mbuf             *mp, *sendmp;
4774         u32                     status = 0;
4775         u16                     len;
4776         int                     i, processed, rxdone = 0;
4777         bool                    eop;
4778         union e1000_rx_desc_extended    *cur;
4779
4780         EM_RX_LOCK(rxr);
4781
4782         /* Sync the ring */
4783         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4784             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4785
4786
4787 #ifdef DEV_NETMAP
4788         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4789                 EM_RX_UNLOCK(rxr);
4790                 return (FALSE);
4791         }
4792 #endif /* DEV_NETMAP */
4793
4794         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4795                 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4796                         break;
4797
4798                 cur = &rxr->rx_base[i];
4799                 status = le32toh(cur->wb.upper.status_error);
4800                 mp = sendmp = NULL;
4801
4802                 if ((status & E1000_RXD_STAT_DD) == 0)
4803                         break;
4804
4805                 len = le16toh(cur->wb.upper.length);
4806                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4807
4808                 if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4809                     (rxr->discard == TRUE)) {
4810                         adapter->dropped_pkts++;
4811                         ++rxr->rx_discarded;
4812                         if (!eop) /* Catch subsequent segs */
4813                                 rxr->discard = TRUE;
4814                         else
4815                                 rxr->discard = FALSE;
4816                         em_rx_discard(rxr, i);
4817                         goto next_desc;
4818                 }
4819                 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4820
4821                 /* Assign correct length to the current fragment */
4822                 mp = rxr->rx_buffers[i].m_head;
4823                 mp->m_len = len;
4824
4825                 /* Trigger for refresh */
4826                 rxr->rx_buffers[i].m_head = NULL;
4827
4828                 /* First segment? */
4829                 if (rxr->fmp == NULL) {
4830                         mp->m_pkthdr.len = len;
4831                         rxr->fmp = rxr->lmp = mp;
4832                 } else {
4833                         /* Chain mbuf's together */
4834                         mp->m_flags &= ~M_PKTHDR;
4835                         rxr->lmp->m_next = mp;
4836                         rxr->lmp = mp;
4837                         rxr->fmp->m_pkthdr.len += len;
4838                 }
4839
4840                 if (eop) {
4841                         --count;
4842                         sendmp = rxr->fmp;
4843                         if_setrcvif(sendmp, ifp);
4844                         if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4845                         em_receive_checksum(status, sendmp);
4846 #ifndef __NO_STRICT_ALIGNMENT
4847                         if (adapter->hw.mac.max_frame_size >
4848                             (MCLBYTES - ETHER_ALIGN) &&
4849                             em_fixup_rx(rxr) != 0)
4850                                 goto skip;
4851 #endif
4852                         if (status & E1000_RXD_STAT_VP) {
4853                                 if_setvtag(sendmp, 
4854                                     le16toh(cur->wb.upper.vlan));
4855                                 sendmp->m_flags |= M_VLANTAG;
4856                         }
4857 #ifndef __NO_STRICT_ALIGNMENT
4858 skip:
4859 #endif
4860                         rxr->fmp = rxr->lmp = NULL;
4861                 }
4862 next_desc:
4863                 /* Sync the ring */
4864                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4865                         BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4866
4867                 /* Zero out the receive descriptors status. */
4868                 cur->wb.upper.status_error &= htole32(~0xFF);
4869                 ++rxdone;       /* cumulative for POLL */
4870                 ++processed;
4871
4872                 /* Advance our pointers to the next descriptor. */
4873                 if (++i == adapter->num_rx_desc)
4874                         i = 0;
4875
4876                 /* Send to the stack */
4877                 if (sendmp != NULL) {
4878                         rxr->next_to_check = i;
4879                         EM_RX_UNLOCK(rxr);
4880                         if_input(ifp, sendmp);
4881                         EM_RX_LOCK(rxr);
4882                         i = rxr->next_to_check;
4883                 }
4884
4885                 /* Only refresh mbufs every 8 descriptors */
4886                 if (processed == 8) {
4887                         em_refresh_mbufs(rxr, i);
4888                         processed = 0;
4889                 }
4890         }
4891
4892         /* Catch any remaining refresh work */
4893         if (e1000_rx_unrefreshed(rxr))
4894                 em_refresh_mbufs(rxr, i);
4895
4896         rxr->next_to_check = i;
4897         if (done != NULL)
4898                 *done = rxdone;
4899         EM_RX_UNLOCK(rxr);
4900
4901         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4902 }
4903
4904 static __inline void
4905 em_rx_discard(struct rx_ring *rxr, int i)
4906 {
4907         struct em_rxbuffer      *rbuf;
4908
4909         rbuf = &rxr->rx_buffers[i];
4910         bus_dmamap_unload(rxr->rxtag, rbuf->map);
4911
4912         /* Free any previous pieces */
4913         if (rxr->fmp != NULL) {
4914                 rxr->fmp->m_flags |= M_PKTHDR;
4915                 m_freem(rxr->fmp);
4916                 rxr->fmp = NULL;
4917                 rxr->lmp = NULL;
4918         }
4919         /*
4920         ** Free buffer and allow em_refresh_mbufs()
4921         ** to clean up and recharge buffer.
4922         */
4923         if (rbuf->m_head) {
4924                 m_free(rbuf->m_head);
4925                 rbuf->m_head = NULL;
4926         }
4927         return;
4928 }
4929
4930 #ifndef __NO_STRICT_ALIGNMENT
4931 /*
4932  * When jumbo frames are enabled we should realign entire payload on
4933  * architecures with strict alignment. This is serious design mistake of 8254x
4934  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4935  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4936  * payload. On architecures without strict alignment restrictions 8254x still
4937  * performs unaligned memory access which would reduce the performance too.
4938  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4939  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4940  * existing mbuf chain.
4941  *
4942  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4943  * not used at all on architectures with strict alignment.
4944  */
4945 static int
4946 em_fixup_rx(struct rx_ring *rxr)
4947 {
4948         struct adapter *adapter = rxr->adapter;
4949         struct mbuf *m, *n;
4950         int error;
4951
4952         error = 0;
4953         m = rxr->fmp;
4954         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4955                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4956                 m->m_data += ETHER_HDR_LEN;
4957         } else {
4958                 MGETHDR(n, M_NOWAIT, MT_DATA);
4959                 if (n != NULL) {
4960                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4961                         m->m_data += ETHER_HDR_LEN;
4962                         m->m_len -= ETHER_HDR_LEN;
4963                         n->m_len = ETHER_HDR_LEN;
4964                         M_MOVE_PKTHDR(n, m);
4965                         n->m_next = m;
4966                         rxr->fmp = n;
4967                 } else {
4968                         adapter->dropped_pkts++;
4969                         m_freem(rxr->fmp);
4970                         rxr->fmp = NULL;
4971                         error = ENOMEM;
4972                 }
4973         }
4974
4975         return (error);
4976 }
4977 #endif
4978
4979 static void
4980 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
4981 {
4982         rxd->read.buffer_addr = htole64(rxbuf->paddr);
4983         /* DD bits must be cleared */
4984         rxd->wb.upper.status_error= 0;
4985 }
4986
4987 /*********************************************************************
4988  *
4989  *  Verify that the hardware indicated that the checksum is valid.
4990  *  Inform the stack about the status of checksum so that stack
4991  *  doesn't spend time verifying the checksum.
4992  *
4993  *********************************************************************/
4994 static void
4995 em_receive_checksum(uint32_t status, struct mbuf *mp)
4996 {
4997         mp->m_pkthdr.csum_flags = 0;
4998
4999         /* Ignore Checksum bit is set */
5000         if (status & E1000_RXD_STAT_IXSM)
5001                 return;
5002
5003         /* If the IP checksum exists and there is no IP Checksum error */
5004         if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
5005                 E1000_RXD_STAT_IPCS) {
5006                 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
5007         }
5008
5009         /* TCP or UDP checksum */
5010         if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5011             E1000_RXD_STAT_TCPCS) {
5012                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5013                 mp->m_pkthdr.csum_data = htons(0xffff);
5014         }
5015         if (status & E1000_RXD_STAT_UDPCS) {
5016                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5017                 mp->m_pkthdr.csum_data = htons(0xffff);
5018         }
5019 }
5020
5021 /*
5022  * This routine is run via an vlan
5023  * config EVENT
5024  */
5025 static void
5026 em_register_vlan(void *arg, if_t ifp, u16 vtag)
5027 {
5028         struct adapter  *adapter = if_getsoftc(ifp);
5029         u32             index, bit;
5030
5031         if ((void*)adapter !=  arg)   /* Not our event */
5032                 return;
5033
5034         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
5035                 return;
5036
5037         EM_CORE_LOCK(adapter);
5038         index = (vtag >> 5) & 0x7F;
5039         bit = vtag & 0x1F;
5040         adapter->shadow_vfta[index] |= (1 << bit);
5041         ++adapter->num_vlans;
5042         /* Re-init to load the changes */
5043         if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5044                 em_init_locked(adapter);
5045         EM_CORE_UNLOCK(adapter);
5046 }
5047
5048 /*
5049  * This routine is run via an vlan
5050  * unconfig EVENT
5051  */
5052 static void
5053 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
5054 {
5055         struct adapter  *adapter = if_getsoftc(ifp);
5056         u32             index, bit;
5057
5058         if (adapter != arg)
5059                 return;
5060
5061         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5062                 return;
5063
5064         EM_CORE_LOCK(adapter);
5065         index = (vtag >> 5) & 0x7F;
5066         bit = vtag & 0x1F;
5067         adapter->shadow_vfta[index] &= ~(1 << bit);
5068         --adapter->num_vlans;
5069         /* Re-init to load the changes */
5070         if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5071                 em_init_locked(adapter);
5072         EM_CORE_UNLOCK(adapter);
5073 }
5074
5075 static void
5076 em_setup_vlan_hw_support(struct adapter *adapter)
5077 {
5078         struct e1000_hw *hw = &adapter->hw;
5079         u32             reg;
5080
5081         /*
5082         ** We get here thru init_locked, meaning
5083         ** a soft reset, this has already cleared
5084         ** the VFTA and other state, so if there
5085         ** have been no vlan's registered do nothing.
5086         */
5087         if (adapter->num_vlans == 0)
5088                 return;
5089
5090         /*
5091         ** A soft reset zero's out the VFTA, so
5092         ** we need to repopulate it now.
5093         */
5094         for (int i = 0; i < EM_VFTA_SIZE; i++)
5095                 if (adapter->shadow_vfta[i] != 0)
5096                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5097                             i, adapter->shadow_vfta[i]);
5098
5099         reg = E1000_READ_REG(hw, E1000_CTRL);
5100         reg |= E1000_CTRL_VME;
5101         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5102
5103         /* Enable the Filter Table */
5104         reg = E1000_READ_REG(hw, E1000_RCTL);
5105         reg &= ~E1000_RCTL_CFIEN;
5106         reg |= E1000_RCTL_VFE;
5107         E1000_WRITE_REG(hw, E1000_RCTL, reg);
5108 }
5109
5110 static void
5111 em_enable_intr(struct adapter *adapter)
5112 {
5113         struct e1000_hw *hw = &adapter->hw;
5114         u32 ims_mask = IMS_ENABLE_MASK;
5115
5116         if (hw->mac.type == e1000_82574) {
5117                 E1000_WRITE_REG(hw, EM_EIAC, adapter->ims);
5118                 ims_mask |= adapter->ims;
5119         } 
5120         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5121 }
5122
5123 static void
5124 em_disable_intr(struct adapter *adapter)
5125 {
5126         struct e1000_hw *hw = &adapter->hw;
5127
5128         if (hw->mac.type == e1000_82574)
5129                 E1000_WRITE_REG(hw, EM_EIAC, 0);
5130         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5131 }
5132
5133 /*
5134  * Bit of a misnomer, what this really means is
5135  * to enable OS management of the system... aka
5136  * to disable special hardware management features 
5137  */
5138 static void
5139 em_init_manageability(struct adapter *adapter)
5140 {
5141         /* A shared code workaround */
5142 #define E1000_82542_MANC2H E1000_MANC2H
5143         if (adapter->has_manage) {
5144                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5145                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5146
5147                 /* disable hardware interception of ARP */
5148                 manc &= ~(E1000_MANC_ARP_EN);
5149
5150                 /* enable receiving management packets to the host */
5151                 manc |= E1000_MANC_EN_MNG2HOST;
5152 #define E1000_MNG2HOST_PORT_623 (1 << 5)
5153 #define E1000_MNG2HOST_PORT_664 (1 << 6)
5154                 manc2h |= E1000_MNG2HOST_PORT_623;
5155                 manc2h |= E1000_MNG2HOST_PORT_664;
5156                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5157                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5158         }
5159 }
5160
5161 /*
5162  * Give control back to hardware management
5163  * controller if there is one.
5164  */
5165 static void
5166 em_release_manageability(struct adapter *adapter)
5167 {
5168         if (adapter->has_manage) {
5169                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5170
5171                 /* re-enable hardware interception of ARP */
5172                 manc |= E1000_MANC_ARP_EN;
5173                 manc &= ~E1000_MANC_EN_MNG2HOST;
5174
5175                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5176         }
5177 }
5178
5179 /*
5180  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5181  * For ASF and Pass Through versions of f/w this means
5182  * that the driver is loaded. For AMT version type f/w
5183  * this means that the network i/f is open.
5184  */
5185 static void
5186 em_get_hw_control(struct adapter *adapter)
5187 {
5188         u32 ctrl_ext, swsm;
5189
5190         if (adapter->hw.mac.type == e1000_82573) {
5191                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5192                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5193                     swsm | E1000_SWSM_DRV_LOAD);
5194                 return;
5195         }
5196         /* else */
5197         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5198         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5199             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5200         return;
5201 }
5202
5203 /*
5204  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5205  * For ASF and Pass Through versions of f/w this means that
5206  * the driver is no longer loaded. For AMT versions of the
5207  * f/w this means that the network i/f is closed.
5208  */
5209 static void
5210 em_release_hw_control(struct adapter *adapter)
5211 {
5212         u32 ctrl_ext, swsm;
5213
5214         if (!adapter->has_manage)
5215                 return;
5216
5217         if (adapter->hw.mac.type == e1000_82573) {
5218                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5219                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5220                     swsm & ~E1000_SWSM_DRV_LOAD);
5221                 return;
5222         }
5223         /* else */
5224         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5225         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5226             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5227         return;
5228 }
5229
5230 static int
5231 em_is_valid_ether_addr(u8 *addr)
5232 {
5233         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5234
5235         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5236                 return (FALSE);
5237         }
5238
5239         return (TRUE);
5240 }
5241
5242 /*
5243 ** Parse the interface capabilities with regard
5244 ** to both system management and wake-on-lan for
5245 ** later use.
5246 */
5247 static void
5248 em_get_wakeup(device_t dev)
5249 {
5250         struct adapter  *adapter = device_get_softc(dev);
5251         u16             eeprom_data = 0, device_id, apme_mask;
5252
5253         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5254         apme_mask = EM_EEPROM_APME;
5255
5256         switch (adapter->hw.mac.type) {
5257         case e1000_82573:
5258         case e1000_82583:
5259                 adapter->has_amt = TRUE;
5260                 /* Falls thru */
5261         case e1000_82571:
5262         case e1000_82572:
5263         case e1000_80003es2lan:
5264                 if (adapter->hw.bus.func == 1) {
5265                         e1000_read_nvm(&adapter->hw,
5266                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5267                         break;
5268                 } else
5269                         e1000_read_nvm(&adapter->hw,
5270                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5271                 break;
5272         case e1000_ich8lan:
5273         case e1000_ich9lan:
5274         case e1000_ich10lan:
5275         case e1000_pchlan:
5276         case e1000_pch2lan:
5277                 apme_mask = E1000_WUC_APME;
5278                 adapter->has_amt = TRUE;
5279                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5280                 break;
5281         default:
5282                 e1000_read_nvm(&adapter->hw,
5283                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5284                 break;
5285         }
5286         if (eeprom_data & apme_mask)
5287                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5288         /*
5289          * We have the eeprom settings, now apply the special cases
5290          * where the eeprom may be wrong or the board won't support
5291          * wake on lan on a particular port
5292          */
5293         device_id = pci_get_device(dev);
5294         switch (device_id) {
5295         case E1000_DEV_ID_82571EB_FIBER:
5296                 /* Wake events only supported on port A for dual fiber
5297                  * regardless of eeprom setting */
5298                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5299                     E1000_STATUS_FUNC_1)
5300                         adapter->wol = 0;
5301                 break;
5302         case E1000_DEV_ID_82571EB_QUAD_COPPER:
5303         case E1000_DEV_ID_82571EB_QUAD_FIBER:
5304         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5305                 /* if quad port adapter, disable WoL on all but port A */
5306                 if (global_quad_port_a != 0)
5307                         adapter->wol = 0;
5308                 /* Reset for multiple quad port adapters */
5309                 if (++global_quad_port_a == 4)
5310                         global_quad_port_a = 0;
5311                 break;
5312         }
5313         return;
5314 }
5315
5316
5317 /*
5318  * Enable PCI Wake On Lan capability
5319  */
5320 static void
5321 em_enable_wakeup(device_t dev)
5322 {
5323         struct adapter  *adapter = device_get_softc(dev);
5324         if_t ifp = adapter->ifp;
5325         u32             pmc, ctrl, ctrl_ext, rctl;
5326         u16             status;
5327
5328         if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
5329                 return;
5330
5331         /* Advertise the wakeup capability */
5332         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5333         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5334         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5335         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5336
5337         if ((adapter->hw.mac.type == e1000_ich8lan) ||
5338             (adapter->hw.mac.type == e1000_pchlan) ||
5339             (adapter->hw.mac.type == e1000_ich9lan) ||
5340             (adapter->hw.mac.type == e1000_ich10lan))
5341                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
5342
5343         /* Keep the laser running on Fiber adapters */
5344         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5345             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5346                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5347                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5348                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5349         }
5350
5351         /*
5352         ** Determine type of Wakeup: note that wol
5353         ** is set with all bits on by default.
5354         */
5355         if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
5356                 adapter->wol &= ~E1000_WUFC_MAG;
5357
5358         if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
5359                 adapter->wol &= ~E1000_WUFC_MC;
5360         else {
5361                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5362                 rctl |= E1000_RCTL_MPE;
5363                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5364         }
5365
5366         if ((adapter->hw.mac.type == e1000_pchlan) ||
5367             (adapter->hw.mac.type == e1000_pch2lan)) {
5368                 if (em_enable_phy_wakeup(adapter))
5369                         return;
5370         } else {
5371                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5372                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5373         }
5374
5375         if (adapter->hw.phy.type == e1000_phy_igp_3)
5376                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5377
5378         /* Request PME */
5379         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5380         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5381         if (if_getcapenable(ifp) & IFCAP_WOL)
5382                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5383         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5384
5385         return;
5386 }
5387
5388 /*
5389 ** WOL in the newer chipset interfaces (pchlan)
5390 ** require thing to be copied into the phy
5391 */
5392 static int
5393 em_enable_phy_wakeup(struct adapter *adapter)
5394 {
5395         struct e1000_hw *hw = &adapter->hw;
5396         u32 mreg, ret = 0;
5397         u16 preg;
5398
5399         /* copy MAC RARs to PHY RARs */
5400         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5401
5402         /* copy MAC MTA to PHY MTA */
5403         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5404                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5405                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5406                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5407                     (u16)((mreg >> 16) & 0xFFFF));
5408         }
5409
5410         /* configure PHY Rx Control register */
5411         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5412         mreg = E1000_READ_REG(hw, E1000_RCTL);
5413         if (mreg & E1000_RCTL_UPE)
5414                 preg |= BM_RCTL_UPE;
5415         if (mreg & E1000_RCTL_MPE)
5416                 preg |= BM_RCTL_MPE;
5417         preg &= ~(BM_RCTL_MO_MASK);
5418         if (mreg & E1000_RCTL_MO_3)
5419                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5420                                 << BM_RCTL_MO_SHIFT);
5421         if (mreg & E1000_RCTL_BAM)
5422                 preg |= BM_RCTL_BAM;
5423         if (mreg & E1000_RCTL_PMCF)
5424                 preg |= BM_RCTL_PMCF;
5425         mreg = E1000_READ_REG(hw, E1000_CTRL);
5426         if (mreg & E1000_CTRL_RFCE)
5427                 preg |= BM_RCTL_RFCE;
5428         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5429
5430         /* enable PHY wakeup in MAC register */
5431         E1000_WRITE_REG(hw, E1000_WUC,
5432             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5433         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5434
5435         /* configure and enable PHY wakeup in PHY registers */
5436         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5437         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5438
5439         /* activate PHY wakeup */
5440         ret = hw->phy.ops.acquire(hw);
5441         if (ret) {
5442                 printf("Could not acquire PHY\n");
5443                 return ret;
5444         }
5445         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5446                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5447         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5448         if (ret) {
5449                 printf("Could not read PHY page 769\n");
5450                 goto out;
5451         }
5452         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5453         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5454         if (ret)
5455                 printf("Could not set PHY Host Wakeup bit\n");
5456 out:
5457         hw->phy.ops.release(hw);
5458
5459         return ret;
5460 }
5461
5462 static void
5463 em_led_func(void *arg, int onoff)
5464 {
5465         struct adapter  *adapter = arg;
5466  
5467         EM_CORE_LOCK(adapter);
5468         if (onoff) {
5469                 e1000_setup_led(&adapter->hw);
5470                 e1000_led_on(&adapter->hw);
5471         } else {
5472                 e1000_led_off(&adapter->hw);
5473                 e1000_cleanup_led(&adapter->hw);
5474         }
5475         EM_CORE_UNLOCK(adapter);
5476 }
5477
5478 /*
5479 ** Disable the L0S and L1 LINK states
5480 */
5481 static void
5482 em_disable_aspm(struct adapter *adapter)
5483 {
5484         int             base, reg;
5485         u16             link_cap,link_ctrl;
5486         device_t        dev = adapter->dev;
5487
5488         switch (adapter->hw.mac.type) {
5489                 case e1000_82573:
5490                 case e1000_82574:
5491                 case e1000_82583:
5492                         break;
5493                 default:
5494                         return;
5495         }
5496         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5497                 return;
5498         reg = base + PCIER_LINK_CAP;
5499         link_cap = pci_read_config(dev, reg, 2);
5500         if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5501                 return;
5502         reg = base + PCIER_LINK_CTL;
5503         link_ctrl = pci_read_config(dev, reg, 2);
5504         link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5505         pci_write_config(dev, reg, link_ctrl, 2);
5506         return;
5507 }
5508
5509 /**********************************************************************
5510  *
5511  *  Update the board statistics counters.
5512  *
5513  **********************************************************************/
5514 static void
5515 em_update_stats_counters(struct adapter *adapter)
5516 {
5517
5518         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5519            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5520                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5521                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5522         }
5523         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5524         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5525         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5526         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5527
5528         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5529         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5530         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5531         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5532         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5533         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5534         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5535         adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5536         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5537         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5538         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5539         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5540         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5541         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5542         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5543         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5544         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5545         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5546         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5547         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5548
5549         /* For the 64-bit byte counters the low dword must be read first. */
5550         /* Both registers clear on the read of the high dword */
5551
5552         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5553             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5554         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5555             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5556
5557         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5558         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5559         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5560         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5561         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5562
5563         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5564         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5565
5566         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5567         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5568         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5569         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5570         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5571         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5572         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5573         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5574         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5575         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5576
5577         /* Interrupt Counts */
5578
5579         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5580         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5581         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5582         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5583         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5584         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5585         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5586         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5587         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5588
5589         if (adapter->hw.mac.type >= e1000_82543) {
5590                 adapter->stats.algnerrc += 
5591                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5592                 adapter->stats.rxerrc += 
5593                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5594                 adapter->stats.tncrs += 
5595                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5596                 adapter->stats.cexterr += 
5597                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5598                 adapter->stats.tsctc += 
5599                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5600                 adapter->stats.tsctfc += 
5601                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5602         }
5603 }
5604
5605 static uint64_t
5606 em_get_counter(if_t ifp, ift_counter cnt)
5607 {
5608         struct adapter *adapter;
5609
5610         adapter = if_getsoftc(ifp);
5611
5612         switch (cnt) {
5613         case IFCOUNTER_COLLISIONS:
5614                 return (adapter->stats.colc);
5615         case IFCOUNTER_IERRORS:
5616                 return (adapter->dropped_pkts + adapter->stats.rxerrc +
5617                     adapter->stats.crcerrs + adapter->stats.algnerrc +
5618                     adapter->stats.ruc + adapter->stats.roc +
5619                     adapter->stats.mpc + adapter->stats.cexterr);
5620         case IFCOUNTER_OERRORS:
5621                 return (adapter->stats.ecol + adapter->stats.latecol +
5622                     adapter->watchdog_events);
5623         default:
5624                 return (if_get_counter_default(ifp, cnt));
5625         }
5626 }
5627
5628 /* Export a single 32-bit register via a read-only sysctl. */
5629 static int
5630 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5631 {
5632         struct adapter *adapter;
5633         u_int val;
5634
5635         adapter = oidp->oid_arg1;
5636         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5637         return (sysctl_handle_int(oidp, &val, 0, req));
5638 }
5639
5640 /*
5641  * Add sysctl variables, one per statistic, to the system.
5642  */
5643 static void
5644 em_add_hw_stats(struct adapter *adapter)
5645 {
5646         device_t dev = adapter->dev;
5647
5648         struct tx_ring *txr = adapter->tx_rings;
5649         struct rx_ring *rxr = adapter->rx_rings;
5650
5651         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5652         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5653         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5654         struct e1000_hw_stats *stats = &adapter->stats;
5655
5656         struct sysctl_oid *stat_node, *queue_node, *int_node;
5657         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5658
5659 #define QUEUE_NAME_LEN 32
5660         char namebuf[QUEUE_NAME_LEN];
5661         
5662         /* Driver Statistics */
5663         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5664                         CTLFLAG_RD, &adapter->dropped_pkts,
5665                         "Driver dropped packets");
5666         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5667                         CTLFLAG_RD, &adapter->link_irq,
5668                         "Link MSIX IRQ Handled");
5669         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", 
5670                          CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5671                          "Defragmenting mbuf chain failed");
5672         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5673                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5674                         "Driver tx dma failure in xmit");
5675         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5676                         CTLFLAG_RD, &adapter->rx_overruns,
5677                         "RX overruns");
5678         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5679                         CTLFLAG_RD, &adapter->watchdog_events,
5680                         "Watchdog timeouts");
5681         
5682         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5683                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5684                         em_sysctl_reg_handler, "IU",
5685                         "Device Control Register");
5686         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5687                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5688                         em_sysctl_reg_handler, "IU",
5689                         "Receiver Control Register");
5690         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5691                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5692                         "Flow Control High Watermark");
5693         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5694                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5695                         "Flow Control Low Watermark");
5696
5697         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5698                 snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5699                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5700                                             CTLFLAG_RD, NULL, "TX Queue Name");
5701                 queue_list = SYSCTL_CHILDREN(queue_node);
5702
5703                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5704                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5705                                 E1000_TDH(txr->me),
5706                                 em_sysctl_reg_handler, "IU",
5707                                 "Transmit Descriptor Head");
5708                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5709                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5710                                 E1000_TDT(txr->me),
5711                                 em_sysctl_reg_handler, "IU",
5712                                 "Transmit Descriptor Tail");
5713                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5714                                 CTLFLAG_RD, &txr->tx_irq,
5715                                 "Queue MSI-X Transmit Interrupts");
5716                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5717                                 CTLFLAG_RD, &txr->no_desc_avail,
5718                                 "Queue No Descriptor Available");
5719
5720                 snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5721                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5722                                             CTLFLAG_RD, NULL, "RX Queue Name");
5723                 queue_list = SYSCTL_CHILDREN(queue_node);
5724
5725                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5726                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5727                                 E1000_RDH(rxr->me),
5728                                 em_sysctl_reg_handler, "IU",
5729                                 "Receive Descriptor Head");
5730                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5731                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5732                                 E1000_RDT(rxr->me),
5733                                 em_sysctl_reg_handler, "IU",
5734                                 "Receive Descriptor Tail");
5735                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5736                                 CTLFLAG_RD, &rxr->rx_irq,
5737                                 "Queue MSI-X Receive Interrupts");
5738         }
5739
5740         /* MAC stats get their own sub node */
5741
5742         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5743                                     CTLFLAG_RD, NULL, "Statistics");
5744         stat_list = SYSCTL_CHILDREN(stat_node);
5745
5746         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5747                         CTLFLAG_RD, &stats->ecol,
5748                         "Excessive collisions");
5749         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5750                         CTLFLAG_RD, &stats->scc,
5751                         "Single collisions");
5752         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5753                         CTLFLAG_RD, &stats->mcc,
5754                         "Multiple collisions");
5755         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5756                         CTLFLAG_RD, &stats->latecol,
5757                         "Late collisions");
5758         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5759                         CTLFLAG_RD, &stats->colc,
5760                         "Collision Count");
5761         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5762                         CTLFLAG_RD, &adapter->stats.symerrs,
5763                         "Symbol Errors");
5764         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5765                         CTLFLAG_RD, &adapter->stats.sec,
5766                         "Sequence Errors");
5767         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5768                         CTLFLAG_RD, &adapter->stats.dc,
5769                         "Defer Count");
5770         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5771                         CTLFLAG_RD, &adapter->stats.mpc,
5772                         "Missed Packets");
5773         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5774                         CTLFLAG_RD, &adapter->stats.rnbc,
5775                         "Receive No Buffers");
5776         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5777                         CTLFLAG_RD, &adapter->stats.ruc,
5778                         "Receive Undersize");
5779         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5780                         CTLFLAG_RD, &adapter->stats.rfc,
5781                         "Fragmented Packets Received ");
5782         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5783                         CTLFLAG_RD, &adapter->stats.roc,
5784                         "Oversized Packets Received");
5785         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5786                         CTLFLAG_RD, &adapter->stats.rjc,
5787                         "Recevied Jabber");
5788         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5789                         CTLFLAG_RD, &adapter->stats.rxerrc,
5790                         "Receive Errors");
5791         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5792                         CTLFLAG_RD, &adapter->stats.crcerrs,
5793                         "CRC errors");
5794         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5795                         CTLFLAG_RD, &adapter->stats.algnerrc,
5796                         "Alignment Errors");
5797         /* On 82575 these are collision counts */
5798         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5799                         CTLFLAG_RD, &adapter->stats.cexterr,
5800                         "Collision/Carrier extension errors");
5801         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5802                         CTLFLAG_RD, &adapter->stats.xonrxc,
5803                         "XON Received");
5804         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5805                         CTLFLAG_RD, &adapter->stats.xontxc,
5806                         "XON Transmitted");
5807         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5808                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5809                         "XOFF Received");
5810         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5811                         CTLFLAG_RD, &adapter->stats.xofftxc,
5812                         "XOFF Transmitted");
5813
5814         /* Packet Reception Stats */
5815         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5816                         CTLFLAG_RD, &adapter->stats.tpr,
5817                         "Total Packets Received ");
5818         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5819                         CTLFLAG_RD, &adapter->stats.gprc,
5820                         "Good Packets Received");
5821         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5822                         CTLFLAG_RD, &adapter->stats.bprc,
5823                         "Broadcast Packets Received");
5824         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5825                         CTLFLAG_RD, &adapter->stats.mprc,
5826                         "Multicast Packets Received");
5827         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5828                         CTLFLAG_RD, &adapter->stats.prc64,
5829                         "64 byte frames received ");
5830         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5831                         CTLFLAG_RD, &adapter->stats.prc127,
5832                         "65-127 byte frames received");
5833         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5834                         CTLFLAG_RD, &adapter->stats.prc255,
5835                         "128-255 byte frames received");
5836         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5837                         CTLFLAG_RD, &adapter->stats.prc511,
5838                         "256-511 byte frames received");
5839         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5840                         CTLFLAG_RD, &adapter->stats.prc1023,
5841                         "512-1023 byte frames received");
5842         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5843                         CTLFLAG_RD, &adapter->stats.prc1522,
5844                         "1023-1522 byte frames received");
5845         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5846                         CTLFLAG_RD, &adapter->stats.gorc, 
5847                         "Good Octets Received"); 
5848
5849         /* Packet Transmission Stats */
5850         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5851                         CTLFLAG_RD, &adapter->stats.gotc, 
5852                         "Good Octets Transmitted"); 
5853         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5854                         CTLFLAG_RD, &adapter->stats.tpt,
5855                         "Total Packets Transmitted");
5856         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5857                         CTLFLAG_RD, &adapter->stats.gptc,
5858                         "Good Packets Transmitted");
5859         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5860                         CTLFLAG_RD, &adapter->stats.bptc,
5861                         "Broadcast Packets Transmitted");
5862         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5863                         CTLFLAG_RD, &adapter->stats.mptc,
5864                         "Multicast Packets Transmitted");
5865         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5866                         CTLFLAG_RD, &adapter->stats.ptc64,
5867                         "64 byte frames transmitted ");
5868         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5869                         CTLFLAG_RD, &adapter->stats.ptc127,
5870                         "65-127 byte frames transmitted");
5871         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5872                         CTLFLAG_RD, &adapter->stats.ptc255,
5873                         "128-255 byte frames transmitted");
5874         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5875                         CTLFLAG_RD, &adapter->stats.ptc511,
5876                         "256-511 byte frames transmitted");
5877         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5878                         CTLFLAG_RD, &adapter->stats.ptc1023,
5879                         "512-1023 byte frames transmitted");
5880         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5881                         CTLFLAG_RD, &adapter->stats.ptc1522,
5882                         "1024-1522 byte frames transmitted");
5883         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5884                         CTLFLAG_RD, &adapter->stats.tsctc,
5885                         "TSO Contexts Transmitted");
5886         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5887                         CTLFLAG_RD, &adapter->stats.tsctfc,
5888                         "TSO Contexts Failed");
5889
5890
5891         /* Interrupt Stats */
5892
5893         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5894                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5895         int_list = SYSCTL_CHILDREN(int_node);
5896
5897         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5898                         CTLFLAG_RD, &adapter->stats.iac,
5899                         "Interrupt Assertion Count");
5900
5901         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5902                         CTLFLAG_RD, &adapter->stats.icrxptc,
5903                         "Interrupt Cause Rx Pkt Timer Expire Count");
5904
5905         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5906                         CTLFLAG_RD, &adapter->stats.icrxatc,
5907                         "Interrupt Cause Rx Abs Timer Expire Count");
5908
5909         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5910                         CTLFLAG_RD, &adapter->stats.ictxptc,
5911                         "Interrupt Cause Tx Pkt Timer Expire Count");
5912
5913         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5914                         CTLFLAG_RD, &adapter->stats.ictxatc,
5915                         "Interrupt Cause Tx Abs Timer Expire Count");
5916
5917         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5918                         CTLFLAG_RD, &adapter->stats.ictxqec,
5919                         "Interrupt Cause Tx Queue Empty Count");
5920
5921         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5922                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5923                         "Interrupt Cause Tx Queue Min Thresh Count");
5924
5925         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5926                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5927                         "Interrupt Cause Rx Desc Min Thresh Count");
5928
5929         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5930                         CTLFLAG_RD, &adapter->stats.icrxoc,
5931                         "Interrupt Cause Receiver Overrun Count");
5932 }
5933
5934 /**********************************************************************
5935  *
5936  *  This routine provides a way to dump out the adapter eeprom,
5937  *  often a useful debug/service tool. This only dumps the first
5938  *  32 words, stuff that matters is in that extent.
5939  *
5940  **********************************************************************/
5941 static int
5942 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5943 {
5944         struct adapter *adapter = (struct adapter *)arg1;
5945         int error;
5946         int result;
5947
5948         result = -1;
5949         error = sysctl_handle_int(oidp, &result, 0, req);
5950
5951         if (error || !req->newptr)
5952                 return (error);
5953
5954         /*
5955          * This value will cause a hex dump of the
5956          * first 32 16-bit words of the EEPROM to
5957          * the screen.
5958          */
5959         if (result == 1)
5960                 em_print_nvm_info(adapter);
5961
5962         return (error);
5963 }
5964
5965 static void
5966 em_print_nvm_info(struct adapter *adapter)
5967 {
5968         u16     eeprom_data;
5969         int     i, j, row = 0;
5970
5971         /* Its a bit crude, but it gets the job done */
5972         printf("\nInterface EEPROM Dump:\n");
5973         printf("Offset\n0x0000  ");
5974         for (i = 0, j = 0; i < 32; i++, j++) {
5975                 if (j == 8) { /* Make the offset block */
5976                         j = 0; ++row;
5977                         printf("\n0x00%x0  ",row);
5978                 }
5979                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5980                 printf("%04x ", eeprom_data);
5981         }
5982         printf("\n");
5983 }
5984
5985 static int
5986 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5987 {
5988         struct em_int_delay_info *info;
5989         struct adapter *adapter;
5990         u32 regval;
5991         int error, usecs, ticks;
5992
5993         info = (struct em_int_delay_info *)arg1;
5994         usecs = info->value;
5995         error = sysctl_handle_int(oidp, &usecs, 0, req);
5996         if (error != 0 || req->newptr == NULL)
5997                 return (error);
5998         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5999                 return (EINVAL);
6000         info->value = usecs;
6001         ticks = EM_USECS_TO_TICKS(usecs);
6002         if (info->offset == E1000_ITR)  /* units are 256ns here */
6003                 ticks *= 4;
6004
6005         adapter = info->adapter;
6006         
6007         EM_CORE_LOCK(adapter);
6008         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
6009         regval = (regval & ~0xffff) | (ticks & 0xffff);
6010         /* Handle a few special cases. */
6011         switch (info->offset) {
6012         case E1000_RDTR:
6013                 break;
6014         case E1000_TIDV:
6015                 if (ticks == 0) {
6016                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6017                         /* Don't write 0 into the TIDV register. */
6018                         regval++;
6019                 } else
6020                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6021                 break;
6022         }
6023         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6024         EM_CORE_UNLOCK(adapter);
6025         return (0);
6026 }
6027
6028 static void
6029 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6030         const char *description, struct em_int_delay_info *info,
6031         int offset, int value)
6032 {
6033         info->adapter = adapter;
6034         info->offset = offset;
6035         info->value = value;
6036         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6037             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6038             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6039             info, 0, em_sysctl_int_delay, "I", description);
6040 }
6041
6042 static void
6043 em_set_sysctl_value(struct adapter *adapter, const char *name,
6044         const char *description, int *limit, int value)
6045 {
6046         *limit = value;
6047         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6048             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6049             OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6050 }
6051
6052
6053 /*
6054 ** Set flow control using sysctl:
6055 ** Flow control values:
6056 **      0 - off
6057 **      1 - rx pause
6058 **      2 - tx pause
6059 **      3 - full
6060 */
6061 static int
6062 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6063 {       
6064         int             error;
6065         static int      input = 3; /* default is full */
6066         struct adapter  *adapter = (struct adapter *) arg1;
6067                     
6068         error = sysctl_handle_int(oidp, &input, 0, req);
6069     
6070         if ((error) || (req->newptr == NULL))
6071                 return (error);
6072                 
6073         if (input == adapter->fc) /* no change? */
6074                 return (error);
6075
6076         switch (input) {
6077                 case e1000_fc_rx_pause:
6078                 case e1000_fc_tx_pause:
6079                 case e1000_fc_full:
6080                 case e1000_fc_none:
6081                         adapter->hw.fc.requested_mode = input;
6082                         adapter->fc = input;
6083                         break;
6084                 default:
6085                         /* Do nothing */
6086                         return (error);
6087         }
6088
6089         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6090         e1000_force_mac_fc(&adapter->hw);
6091         return (error);
6092 }
6093
6094 /*
6095 ** Manage Energy Efficient Ethernet:
6096 ** Control values:
6097 **     0/1 - enabled/disabled
6098 */
6099 static int
6100 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6101 {
6102        struct adapter *adapter = (struct adapter *) arg1;
6103        int             error, value;
6104
6105        value = adapter->hw.dev_spec.ich8lan.eee_disable;
6106        error = sysctl_handle_int(oidp, &value, 0, req);
6107        if (error || req->newptr == NULL)
6108                return (error);
6109        EM_CORE_LOCK(adapter);
6110        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6111        em_init_locked(adapter);
6112        EM_CORE_UNLOCK(adapter);
6113        return (0);
6114 }
6115
6116 static int
6117 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6118 {
6119         struct adapter *adapter;
6120         int error;
6121         int result;
6122
6123         result = -1;
6124         error = sysctl_handle_int(oidp, &result, 0, req);
6125
6126         if (error || !req->newptr)
6127                 return (error);
6128
6129         if (result == 1) {
6130                 adapter = (struct adapter *)arg1;
6131                 em_print_debug_info(adapter);
6132         }
6133
6134         return (error);
6135 }
6136
6137 /*
6138 ** This routine is meant to be fluid, add whatever is
6139 ** needed for debugging a problem.  -jfv
6140 */
6141 static void
6142 em_print_debug_info(struct adapter *adapter)
6143 {
6144         device_t dev = adapter->dev;
6145         struct tx_ring *txr = adapter->tx_rings;
6146         struct rx_ring *rxr = adapter->rx_rings;
6147
6148         if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
6149                 printf("Interface is RUNNING ");
6150         else
6151                 printf("Interface is NOT RUNNING\n");
6152
6153         if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
6154                 printf("and INACTIVE\n");
6155         else
6156                 printf("and ACTIVE\n");
6157
6158         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6159                 device_printf(dev, "TX Queue %d ------\n", i);
6160                 device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6161                         E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6162                         E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6163                 device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6164                 device_printf(dev, "TX descriptors avail = %d\n",
6165                         txr->tx_avail);
6166                 device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6167                         txr->no_desc_avail);
6168                 device_printf(dev, "RX Queue %d ------\n", i);
6169                 device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6170                         E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6171                         E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6172                 device_printf(dev, "RX discarded packets = %ld\n",
6173                         rxr->rx_discarded);
6174                 device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6175                 device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6176         }
6177 }
6178
6179 #ifdef EM_MULTIQUEUE
6180 /*
6181  * 82574 only:
6182  * Write a new value to the EEPROM increasing the number of MSIX
6183  * vectors from 3 to 5, for proper multiqueue support.
6184  */
6185 static void
6186 em_enable_vectors_82574(struct adapter *adapter)
6187 {
6188         struct e1000_hw *hw = &adapter->hw;
6189         device_t dev = adapter->dev;
6190         u16 edata;
6191
6192         e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6193         printf("Current cap: %#06x\n", edata);
6194         if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6195                 device_printf(dev, "Writing to eeprom: increasing "
6196                     "reported MSIX vectors from 3 to 5...\n");
6197                 edata &= ~(EM_NVM_MSIX_N_MASK);
6198                 edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6199                 e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6200                 e1000_update_nvm_checksum(hw);
6201                 device_printf(dev, "Writing to eeprom: done\n");
6202         }
6203 }
6204 #endif
6205
6206 #ifdef DDB
6207 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6208 {
6209         devclass_t      dc;
6210         int max_em;
6211
6212         dc = devclass_find("em");
6213         max_em = devclass_get_maxunit(dc);
6214
6215         for (int index = 0; index < (max_em - 1); index++) {
6216                 device_t dev;
6217                 dev = devclass_get_device(dc, index);
6218                 if (device_get_driver(dev) == &em_driver) {
6219                         struct adapter *adapter = device_get_softc(dev);
6220                         EM_CORE_LOCK(adapter);
6221                         em_init_locked(adapter);
6222                         EM_CORE_UNLOCK(adapter);
6223                 }
6224         }
6225 }
6226 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6227 {
6228         devclass_t      dc;
6229         int max_em;
6230
6231         dc = devclass_find("em");
6232         max_em = devclass_get_maxunit(dc);
6233
6234         for (int index = 0; index < (max_em - 1); index++) {
6235                 device_t dev;
6236                 dev = devclass_get_device(dc, index);
6237                 if (device_get_driver(dev) == &em_driver)
6238                         em_print_debug_info(device_get_softc(dev));
6239         }
6240
6241 }
6242 #endif