]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/e1000/if_em.c
MFV r308265: Update tzdata to 2016i.
[FreeBSD/FreeBSD.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_var.h>
74 #include <net/if_arp.h>
75 #include <net/if_dl.h>
76 #include <net/if_media.h>
77
78 #include <net/if_types.h>
79 #include <net/if_vlan_var.h>
80
81 #include <netinet/in_systm.h>
82 #include <netinet/in.h>
83 #include <netinet/if_ether.h>
84 #include <netinet/ip.h>
85 #include <netinet/ip6.h>
86 #include <netinet/tcp.h>
87 #include <netinet/udp.h>
88
89 #include <machine/in_cksum.h>
90 #include <dev/led/led.h>
91 #include <dev/pci/pcivar.h>
92 #include <dev/pci/pcireg.h>
93
94 #include "e1000_api.h"
95 #include "e1000_82571.h"
96 #include "if_em.h"
97
98 /*********************************************************************
99  *  Driver version:
100  *********************************************************************/
101 char em_driver_version[] = "7.6.1-k";
102
103 /*********************************************************************
104  *  PCI Device ID Table
105  *
106  *  Used by probe to select devices to load on
107  *  Last field stores an index into e1000_strings
108  *  Last entry must be all 0s
109  *
110  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
111  *********************************************************************/
112
113 static em_vendor_info_t em_vendor_info_array[] =
114 {
115         /* Intel(R) PRO/1000 Network Connection */
116         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
117         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
119         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
120                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
122                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
124                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
126                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
128                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
130                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
135
136         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
141                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
143                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
145                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
147                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
175         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
176         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
177         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
178         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
179         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
180         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
181         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
182                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
183         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
184                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
185         { 0x8086, E1000_DEV_ID_PCH_I218_LM2,    PCI_ANY_ID, PCI_ANY_ID, 0},
186         { 0x8086, E1000_DEV_ID_PCH_I218_V2,     PCI_ANY_ID, PCI_ANY_ID, 0},
187         { 0x8086, E1000_DEV_ID_PCH_I218_LM3,    PCI_ANY_ID, PCI_ANY_ID, 0},
188         { 0x8086, E1000_DEV_ID_PCH_I218_V3,     PCI_ANY_ID, PCI_ANY_ID, 0},
189         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
190         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
191         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
192                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
193         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
194         { 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
195                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
196         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
197                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
198         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
199         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
200                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
201         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
202         /* required last entry */
203         { 0, 0, 0, 0, 0}
204 };
205
206 /*********************************************************************
207  *  Table of branding strings for all supported NICs.
208  *********************************************************************/
209
210 static char *em_strings[] = {
211         "Intel(R) PRO/1000 Network Connection"
212 };
213
214 /*********************************************************************
215  *  Function prototypes
216  *********************************************************************/
217 static int      em_probe(device_t);
218 static int      em_attach(device_t);
219 static int      em_detach(device_t);
220 static int      em_shutdown(device_t);
221 static int      em_suspend(device_t);
222 static int      em_resume(device_t);
223 #ifdef EM_MULTIQUEUE
224 static int      em_mq_start(if_t, struct mbuf *);
225 static int      em_mq_start_locked(if_t,
226                     struct tx_ring *);
227 static void     em_qflush(if_t);
228 #else
229 static void     em_start(if_t);
230 static void     em_start_locked(if_t, struct tx_ring *);
231 #endif
232 static int      em_ioctl(if_t, u_long, caddr_t);
233 static uint64_t em_get_counter(if_t, ift_counter);
234 static void     em_init(void *);
235 static void     em_init_locked(struct adapter *);
236 static void     em_stop(void *);
237 static void     em_media_status(if_t, struct ifmediareq *);
238 static int      em_media_change(if_t);
239 static void     em_identify_hardware(struct adapter *);
240 static int      em_allocate_pci_resources(struct adapter *);
241 static int      em_allocate_legacy(struct adapter *);
242 static int      em_allocate_msix(struct adapter *);
243 static int      em_allocate_queues(struct adapter *);
244 static int      em_setup_msix(struct adapter *);
245 static void     em_free_pci_resources(struct adapter *);
246 static void     em_local_timer(void *);
247 static void     em_reset(struct adapter *);
248 static int      em_setup_interface(device_t, struct adapter *);
249 static void     em_flush_desc_rings(struct adapter *);
250
251 static void     em_setup_transmit_structures(struct adapter *);
252 static void     em_initialize_transmit_unit(struct adapter *);
253 static int      em_allocate_transmit_buffers(struct tx_ring *);
254 static void     em_free_transmit_structures(struct adapter *);
255 static void     em_free_transmit_buffers(struct tx_ring *);
256
257 static int      em_setup_receive_structures(struct adapter *);
258 static int      em_allocate_receive_buffers(struct rx_ring *);
259 static void     em_initialize_receive_unit(struct adapter *);
260 static void     em_free_receive_structures(struct adapter *);
261 static void     em_free_receive_buffers(struct rx_ring *);
262
263 static void     em_enable_intr(struct adapter *);
264 static void     em_disable_intr(struct adapter *);
265 static void     em_update_stats_counters(struct adapter *);
266 static void     em_add_hw_stats(struct adapter *adapter);
267 static void     em_txeof(struct tx_ring *);
268 static bool     em_rxeof(struct rx_ring *, int, int *);
269 #ifndef __NO_STRICT_ALIGNMENT
270 static int      em_fixup_rx(struct rx_ring *);
271 #endif
272 static void     em_setup_rxdesc(union e1000_rx_desc_extended *,
273                     const struct em_rxbuffer *rxbuf);
274 static void     em_receive_checksum(uint32_t status, struct mbuf *);
275 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
276                     struct ip *, u32 *, u32 *);
277 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
278                     struct tcphdr *, u32 *, u32 *);
279 static void     em_set_promisc(struct adapter *);
280 static void     em_disable_promisc(struct adapter *);
281 static void     em_set_multi(struct adapter *);
282 static void     em_update_link_status(struct adapter *);
283 static void     em_refresh_mbufs(struct rx_ring *, int);
284 static void     em_register_vlan(void *, if_t, u16);
285 static void     em_unregister_vlan(void *, if_t, u16);
286 static void     em_setup_vlan_hw_support(struct adapter *);
287 static int      em_xmit(struct tx_ring *, struct mbuf **);
288 static int      em_dma_malloc(struct adapter *, bus_size_t,
289                     struct em_dma_alloc *, int);
290 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
291 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
292 static void     em_print_nvm_info(struct adapter *);
293 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
294 static void     em_print_debug_info(struct adapter *);
295 static int      em_is_valid_ether_addr(u8 *);
296 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
297 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
298                     const char *, struct em_int_delay_info *, int, int);
299 /* Management and WOL Support */
300 static void     em_init_manageability(struct adapter *);
301 static void     em_release_manageability(struct adapter *);
302 static void     em_get_hw_control(struct adapter *);
303 static void     em_release_hw_control(struct adapter *);
304 static void     em_get_wakeup(device_t);
305 static void     em_enable_wakeup(device_t);
306 static int      em_enable_phy_wakeup(struct adapter *);
307 static void     em_led_func(void *, int);
308 static void     em_disable_aspm(struct adapter *);
309
310 static int      em_irq_fast(void *);
311
312 /* MSIX handlers */
313 static void     em_msix_tx(void *);
314 static void     em_msix_rx(void *);
315 static void     em_msix_link(void *);
316 static void     em_handle_tx(void *context, int pending);
317 static void     em_handle_rx(void *context, int pending);
318 static void     em_handle_link(void *context, int pending);
319
320 #ifdef EM_MULTIQUEUE
321 static void     em_enable_vectors_82574(struct adapter *);
322 #endif
323
324 static void     em_set_sysctl_value(struct adapter *, const char *,
325                     const char *, int *, int);
326 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
327 static int      em_sysctl_eee(SYSCTL_HANDLER_ARGS);
328
329 static __inline void em_rx_discard(struct rx_ring *, int);
330
331 #ifdef DEVICE_POLLING
332 static poll_handler_t em_poll;
333 #endif /* POLLING */
334
335 /*********************************************************************
336  *  FreeBSD Device Interface Entry Points
337  *********************************************************************/
338
339 static device_method_t em_methods[] = {
340         /* Device interface */
341         DEVMETHOD(device_probe, em_probe),
342         DEVMETHOD(device_attach, em_attach),
343         DEVMETHOD(device_detach, em_detach),
344         DEVMETHOD(device_shutdown, em_shutdown),
345         DEVMETHOD(device_suspend, em_suspend),
346         DEVMETHOD(device_resume, em_resume),
347         DEVMETHOD_END
348 };
349
350 static driver_t em_driver = {
351         "em", em_methods, sizeof(struct adapter),
352 };
353
354 devclass_t em_devclass;
355 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
356 MODULE_DEPEND(em, pci, 1, 1, 1);
357 MODULE_DEPEND(em, ether, 1, 1, 1);
358 #ifdef DEV_NETMAP
359 MODULE_DEPEND(em, netmap, 1, 1, 1);
360 #endif /* DEV_NETMAP */
361
362 /*********************************************************************
363  *  Tunable default values.
364  *********************************************************************/
365
366 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
367 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
368
369 #define MAX_INTS_PER_SEC        8000
370 #define DEFAULT_ITR             (1000000000/(MAX_INTS_PER_SEC * 256))
371
372 /* Allow common code without TSO */
373 #ifndef CSUM_TSO
374 #define CSUM_TSO        0
375 #endif
376
377 #define TSO_WORKAROUND  4
378
379 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
380
381 static int em_disable_crc_stripping = 0;
382 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
383     &em_disable_crc_stripping, 0, "Disable CRC Stripping");
384
385 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
386 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
387 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
388     0, "Default transmit interrupt delay in usecs");
389 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
390     0, "Default receive interrupt delay in usecs");
391
392 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
393 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
394 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
395     &em_tx_abs_int_delay_dflt, 0,
396     "Default transmit interrupt delay limit in usecs");
397 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
398     &em_rx_abs_int_delay_dflt, 0,
399     "Default receive interrupt delay limit in usecs");
400
401 static int em_rxd = EM_DEFAULT_RXD;
402 static int em_txd = EM_DEFAULT_TXD;
403 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
404     "Number of receive descriptors per queue");
405 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
406     "Number of transmit descriptors per queue");
407
408 static int em_smart_pwr_down = FALSE;
409 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
410     0, "Set to true to leave smart power down enabled on newer adapters");
411
412 /* Controls whether promiscuous also shows bad packets */
413 static int em_debug_sbp = FALSE;
414 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
415     "Show bad packets in promiscuous mode");
416
417 static int em_enable_msix = TRUE;
418 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
419     "Enable MSI-X interrupts");
420
421 #ifdef EM_MULTIQUEUE
422 static int em_num_queues = 1;
423 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
424     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
425 #endif
426
427 /*
428 ** Global variable to store last used CPU when binding queues
429 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
430 ** queue is bound to a cpu.
431 */
432 static int em_last_bind_cpu = -1;
433
434 /* How many packets rxeof tries to clean at a time */
435 static int em_rx_process_limit = 100;
436 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
437     &em_rx_process_limit, 0,
438     "Maximum number of received packets to process "
439     "at a time, -1 means unlimited");
440
441 /* Energy efficient ethernet - default to OFF */
442 static int eee_setting = 1;
443 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
444     "Enable Energy Efficient Ethernet");
445
446 /* Global used in WOL setup with multiport cards */
447 static int global_quad_port_a = 0;
448
449 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
450 #include <dev/netmap/if_em_netmap.h>
451 #endif /* DEV_NETMAP */
452
453 /*********************************************************************
454  *  Device identification routine
455  *
456  *  em_probe determines if the driver should be loaded on
457  *  adapter based on PCI vendor/device id of the adapter.
458  *
459  *  return BUS_PROBE_DEFAULT on success, positive on failure
460  *********************************************************************/
461
462 static int
463 em_probe(device_t dev)
464 {
465         char            adapter_name[60];
466         uint16_t        pci_vendor_id = 0;
467         uint16_t        pci_device_id = 0;
468         uint16_t        pci_subvendor_id = 0;
469         uint16_t        pci_subdevice_id = 0;
470         em_vendor_info_t *ent;
471
472         INIT_DEBUGOUT("em_probe: begin");
473
474         pci_vendor_id = pci_get_vendor(dev);
475         if (pci_vendor_id != EM_VENDOR_ID)
476                 return (ENXIO);
477
478         pci_device_id = pci_get_device(dev);
479         pci_subvendor_id = pci_get_subvendor(dev);
480         pci_subdevice_id = pci_get_subdevice(dev);
481
482         ent = em_vendor_info_array;
483         while (ent->vendor_id != 0) {
484                 if ((pci_vendor_id == ent->vendor_id) &&
485                     (pci_device_id == ent->device_id) &&
486
487                     ((pci_subvendor_id == ent->subvendor_id) ||
488                     (ent->subvendor_id == PCI_ANY_ID)) &&
489
490                     ((pci_subdevice_id == ent->subdevice_id) ||
491                     (ent->subdevice_id == PCI_ANY_ID))) {
492                         sprintf(adapter_name, "%s %s",
493                                 em_strings[ent->index],
494                                 em_driver_version);
495                         device_set_desc_copy(dev, adapter_name);
496                         return (BUS_PROBE_DEFAULT);
497                 }
498                 ent++;
499         }
500
501         return (ENXIO);
502 }
503
504 /*********************************************************************
505  *  Device initialization routine
506  *
507  *  The attach entry point is called when the driver is being loaded.
508  *  This routine identifies the type of hardware, allocates all resources
509  *  and initializes the hardware.
510  *
511  *  return 0 on success, positive on failure
512  *********************************************************************/
513
514 static int
515 em_attach(device_t dev)
516 {
517         struct adapter  *adapter;
518         struct e1000_hw *hw;
519         int             error = 0;
520
521         INIT_DEBUGOUT("em_attach: begin");
522
523         if (resource_disabled("em", device_get_unit(dev))) {
524                 device_printf(dev, "Disabled by device hint\n");
525                 return (ENXIO);
526         }
527
528         adapter = device_get_softc(dev);
529         adapter->dev = adapter->osdep.dev = dev;
530         hw = &adapter->hw;
531         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
532
533         /* SYSCTL stuff */
534         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
535             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
536             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
537             em_sysctl_nvm_info, "I", "NVM Information");
538
539         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
540             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
541             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
542             em_sysctl_debug_info, "I", "Debug Information");
543
544         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
545             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
546             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
547             em_set_flowcntl, "I", "Flow Control");
548
549         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
550
551         /* Determine hardware and mac info */
552         em_identify_hardware(adapter);
553
554         /* Setup PCI resources */
555         if (em_allocate_pci_resources(adapter)) {
556                 device_printf(dev, "Allocation of PCI resources failed\n");
557                 error = ENXIO;
558                 goto err_pci;
559         }
560
561         /*
562         ** For ICH8 and family we need to
563         ** map the flash memory, and this
564         ** must happen after the MAC is 
565         ** identified
566         */
567         if ((hw->mac.type == e1000_ich8lan) ||
568             (hw->mac.type == e1000_ich9lan) ||
569             (hw->mac.type == e1000_ich10lan) ||
570             (hw->mac.type == e1000_pchlan) ||
571             (hw->mac.type == e1000_pch2lan) ||
572             (hw->mac.type == e1000_pch_lpt)) {
573                 int rid = EM_BAR_TYPE_FLASH;
574                 adapter->flash = bus_alloc_resource_any(dev,
575                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
576                 if (adapter->flash == NULL) {
577                         device_printf(dev, "Mapping of Flash failed\n");
578                         error = ENXIO;
579                         goto err_pci;
580                 }
581                 /* This is used in the shared code */
582                 hw->flash_address = (u8 *)adapter->flash;
583                 adapter->osdep.flash_bus_space_tag =
584                     rman_get_bustag(adapter->flash);
585                 adapter->osdep.flash_bus_space_handle =
586                     rman_get_bushandle(adapter->flash);
587         }
588         /*
589         ** In the new SPT device flash is not  a
590         ** separate BAR, rather it is also in BAR0,
591         ** so use the same tag and an offset handle for the
592         ** FLASH read/write macros in the shared code.
593         */
594         else if (hw->mac.type == e1000_pch_spt) {
595                 adapter->osdep.flash_bus_space_tag =
596                     adapter->osdep.mem_bus_space_tag;
597                 adapter->osdep.flash_bus_space_handle =
598                     adapter->osdep.mem_bus_space_handle
599                     + E1000_FLASH_BASE_ADDR;
600         }
601
602         /* Do Shared Code initialization */
603         error = e1000_setup_init_funcs(hw, TRUE);
604         if (error) {
605                 device_printf(dev, "Setup of Shared code failed, error %d\n",
606                     error);
607                 error = ENXIO;
608                 goto err_pci;
609         }
610
611         /*
612          * Setup MSI/X or MSI if PCI Express
613          */
614         adapter->msix = em_setup_msix(adapter);
615
616         e1000_get_bus_info(hw);
617
618         /* Set up some sysctls for the tunable interrupt delays */
619         em_add_int_delay_sysctl(adapter, "rx_int_delay",
620             "receive interrupt delay in usecs", &adapter->rx_int_delay,
621             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
622         em_add_int_delay_sysctl(adapter, "tx_int_delay",
623             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
624             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
625         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
626             "receive interrupt delay limit in usecs",
627             &adapter->rx_abs_int_delay,
628             E1000_REGISTER(hw, E1000_RADV),
629             em_rx_abs_int_delay_dflt);
630         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
631             "transmit interrupt delay limit in usecs",
632             &adapter->tx_abs_int_delay,
633             E1000_REGISTER(hw, E1000_TADV),
634             em_tx_abs_int_delay_dflt);
635         em_add_int_delay_sysctl(adapter, "itr",
636             "interrupt delay limit in usecs/4",
637             &adapter->tx_itr,
638             E1000_REGISTER(hw, E1000_ITR),
639             DEFAULT_ITR);
640
641         /* Sysctl for limiting the amount of work done in the taskqueue */
642         em_set_sysctl_value(adapter, "rx_processing_limit",
643             "max number of rx packets to process", &adapter->rx_process_limit,
644             em_rx_process_limit);
645
646         /*
647          * Validate number of transmit and receive descriptors. It
648          * must not exceed hardware maximum, and must be multiple
649          * of E1000_DBA_ALIGN.
650          */
651         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
652             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
653                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
654                     EM_DEFAULT_TXD, em_txd);
655                 adapter->num_tx_desc = EM_DEFAULT_TXD;
656         } else
657                 adapter->num_tx_desc = em_txd;
658
659         if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
660             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
661                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
662                     EM_DEFAULT_RXD, em_rxd);
663                 adapter->num_rx_desc = EM_DEFAULT_RXD;
664         } else
665                 adapter->num_rx_desc = em_rxd;
666
667         hw->mac.autoneg = DO_AUTO_NEG;
668         hw->phy.autoneg_wait_to_complete = FALSE;
669         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
670
671         /* Copper options */
672         if (hw->phy.media_type == e1000_media_type_copper) {
673                 hw->phy.mdix = AUTO_ALL_MODES;
674                 hw->phy.disable_polarity_correction = FALSE;
675                 hw->phy.ms_type = EM_MASTER_SLAVE;
676         }
677
678         /*
679          * Set the frame limits assuming
680          * standard ethernet sized frames.
681          */
682         adapter->hw.mac.max_frame_size =
683             ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
684
685         /*
686          * This controls when hardware reports transmit completion
687          * status.
688          */
689         hw->mac.report_tx_early = 1;
690
691         /* 
692         ** Get queue/ring memory
693         */
694         if (em_allocate_queues(adapter)) {
695                 error = ENOMEM;
696                 goto err_pci;
697         }
698
699         /* Allocate multicast array memory. */
700         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
701             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
702         if (adapter->mta == NULL) {
703                 device_printf(dev, "Can not allocate multicast setup array\n");
704                 error = ENOMEM;
705                 goto err_late;
706         }
707
708         /* Check SOL/IDER usage */
709         if (e1000_check_reset_block(hw))
710                 device_printf(dev, "PHY reset is blocked"
711                     " due to SOL/IDER session.\n");
712
713         /* Sysctl for setting Energy Efficient Ethernet */
714         hw->dev_spec.ich8lan.eee_disable = eee_setting;
715         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
716             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
717             OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
718             adapter, 0, em_sysctl_eee, "I",
719             "Disable Energy Efficient Ethernet");
720
721         /*
722         ** Start from a known state, this is
723         ** important in reading the nvm and
724         ** mac from that.
725         */
726         e1000_reset_hw(hw);
727
728
729         /* Make sure we have a good EEPROM before we read from it */
730         if (e1000_validate_nvm_checksum(hw) < 0) {
731                 /*
732                 ** Some PCI-E parts fail the first check due to
733                 ** the link being in sleep state, call it again,
734                 ** if it fails a second time its a real issue.
735                 */
736                 if (e1000_validate_nvm_checksum(hw) < 0) {
737                         device_printf(dev,
738                             "The EEPROM Checksum Is Not Valid\n");
739                         error = EIO;
740                         goto err_late;
741                 }
742         }
743
744         /* Copy the permanent MAC address out of the EEPROM */
745         if (e1000_read_mac_addr(hw) < 0) {
746                 device_printf(dev, "EEPROM read error while reading MAC"
747                     " address\n");
748                 error = EIO;
749                 goto err_late;
750         }
751
752         if (!em_is_valid_ether_addr(hw->mac.addr)) {
753                 device_printf(dev, "Invalid MAC address\n");
754                 error = EIO;
755                 goto err_late;
756         }
757
758         /* Disable ULP support */
759         e1000_disable_ulp_lpt_lp(hw, TRUE);
760
761         /*
762         **  Do interrupt configuration
763         */
764         if (adapter->msix > 1) /* Do MSIX */
765                 error = em_allocate_msix(adapter);
766         else  /* MSI or Legacy */
767                 error = em_allocate_legacy(adapter);
768         if (error)
769                 goto err_late;
770
771         /*
772          * Get Wake-on-Lan and Management info for later use
773          */
774         em_get_wakeup(dev);
775
776         /* Setup OS specific network interface */
777         if (em_setup_interface(dev, adapter) != 0)
778                 goto err_late;
779
780         em_reset(adapter);
781
782         /* Initialize statistics */
783         em_update_stats_counters(adapter);
784
785         hw->mac.get_link_status = 1;
786         em_update_link_status(adapter);
787
788         /* Register for VLAN events */
789         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
790             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
791         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
792             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
793
794         em_add_hw_stats(adapter);
795
796         /* Non-AMT based hardware can now take control from firmware */
797         if (adapter->has_manage && !adapter->has_amt)
798                 em_get_hw_control(adapter);
799
800         /* Tell the stack that the interface is not active */
801         if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
802
803         adapter->led_dev = led_create(em_led_func, adapter,
804             device_get_nameunit(dev));
805 #ifdef DEV_NETMAP
806         em_netmap_attach(adapter);
807 #endif /* DEV_NETMAP */
808
809         INIT_DEBUGOUT("em_attach: end");
810
811         return (0);
812
813 err_late:
814         em_free_transmit_structures(adapter);
815         em_free_receive_structures(adapter);
816         em_release_hw_control(adapter);
817         if (adapter->ifp != (void *)NULL)
818                 if_free(adapter->ifp);
819 err_pci:
820         em_free_pci_resources(adapter);
821         free(adapter->mta, M_DEVBUF);
822         EM_CORE_LOCK_DESTROY(adapter);
823
824         return (error);
825 }
826
827 /*********************************************************************
828  *  Device removal routine
829  *
830  *  The detach entry point is called when the driver is being removed.
831  *  This routine stops the adapter and deallocates all the resources
832  *  that were allocated for driver operation.
833  *
834  *  return 0 on success, positive on failure
835  *********************************************************************/
836
837 static int
838 em_detach(device_t dev)
839 {
840         struct adapter  *adapter = device_get_softc(dev);
841         if_t ifp = adapter->ifp;
842
843         INIT_DEBUGOUT("em_detach: begin");
844
845         /* Make sure VLANS are not using driver */
846         if (if_vlantrunkinuse(ifp)) {
847                 device_printf(dev,"Vlan in use, detach first\n");
848                 return (EBUSY);
849         }
850
851 #ifdef DEVICE_POLLING
852         if (if_getcapenable(ifp) & IFCAP_POLLING)
853                 ether_poll_deregister(ifp);
854 #endif
855
856         if (adapter->led_dev != NULL)
857                 led_destroy(adapter->led_dev);
858
859         EM_CORE_LOCK(adapter);
860         adapter->in_detach = 1;
861         em_stop(adapter);
862         EM_CORE_UNLOCK(adapter);
863         EM_CORE_LOCK_DESTROY(adapter);
864
865         e1000_phy_hw_reset(&adapter->hw);
866
867         em_release_manageability(adapter);
868         em_release_hw_control(adapter);
869
870         /* Unregister VLAN events */
871         if (adapter->vlan_attach != NULL)
872                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
873         if (adapter->vlan_detach != NULL)
874                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
875
876         ether_ifdetach(adapter->ifp);
877         callout_drain(&adapter->timer);
878
879 #ifdef DEV_NETMAP
880         netmap_detach(ifp);
881 #endif /* DEV_NETMAP */
882
883         em_free_pci_resources(adapter);
884         bus_generic_detach(dev);
885         if_free(ifp);
886
887         em_free_transmit_structures(adapter);
888         em_free_receive_structures(adapter);
889
890         em_release_hw_control(adapter);
891         free(adapter->mta, M_DEVBUF);
892
893         return (0);
894 }
895
896 /*********************************************************************
897  *
898  *  Shutdown entry point
899  *
900  **********************************************************************/
901
902 static int
903 em_shutdown(device_t dev)
904 {
905         return em_suspend(dev);
906 }
907
908 /*
909  * Suspend/resume device methods.
910  */
911 static int
912 em_suspend(device_t dev)
913 {
914         struct adapter *adapter = device_get_softc(dev);
915
916         EM_CORE_LOCK(adapter);
917
918         em_release_manageability(adapter);
919         em_release_hw_control(adapter);
920         em_enable_wakeup(dev);
921
922         EM_CORE_UNLOCK(adapter);
923
924         return bus_generic_suspend(dev);
925 }
926
927 static int
928 em_resume(device_t dev)
929 {
930         struct adapter *adapter = device_get_softc(dev);
931         struct tx_ring  *txr = adapter->tx_rings;
932         if_t ifp = adapter->ifp;
933
934         EM_CORE_LOCK(adapter);
935         if (adapter->hw.mac.type == e1000_pch2lan)
936                 e1000_resume_workarounds_pchlan(&adapter->hw);
937         em_init_locked(adapter);
938         em_init_manageability(adapter);
939
940         if ((if_getflags(ifp) & IFF_UP) &&
941             (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
942                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
943                         EM_TX_LOCK(txr);
944 #ifdef EM_MULTIQUEUE
945                         if (!drbr_empty(ifp, txr->br))
946                                 em_mq_start_locked(ifp, txr);
947 #else
948                         if (!if_sendq_empty(ifp))
949                                 em_start_locked(ifp, txr);
950 #endif
951                         EM_TX_UNLOCK(txr);
952                 }
953         }
954         EM_CORE_UNLOCK(adapter);
955
956         return bus_generic_resume(dev);
957 }
958
959
960 #ifndef EM_MULTIQUEUE
961 static void
962 em_start_locked(if_t ifp, struct tx_ring *txr)
963 {
964         struct adapter  *adapter = if_getsoftc(ifp);
965         struct mbuf     *m_head;
966
967         EM_TX_LOCK_ASSERT(txr);
968
969         if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
970             IFF_DRV_RUNNING)
971                 return;
972
973         if (!adapter->link_active)
974                 return;
975
976         while (!if_sendq_empty(ifp)) {
977                 /* Call cleanup if number of TX descriptors low */
978                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
979                         em_txeof(txr);
980                 if (txr->tx_avail < EM_MAX_SCATTER) {
981                         if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
982                         break;
983                 }
984                 m_head = if_dequeue(ifp);
985                 if (m_head == NULL)
986                         break;
987                 /*
988                  *  Encapsulation can modify our pointer, and or make it
989                  *  NULL on failure.  In that event, we can't requeue.
990                  */
991                 if (em_xmit(txr, &m_head)) {
992                         if (m_head == NULL)
993                                 break;
994                         if_sendq_prepend(ifp, m_head);
995                         break;
996                 }
997
998                 /* Mark the queue as having work */
999                 if (txr->busy == EM_TX_IDLE)
1000                         txr->busy = EM_TX_BUSY;
1001
1002                 /* Send a copy of the frame to the BPF listener */
1003                 ETHER_BPF_MTAP(ifp, m_head);
1004
1005         }
1006
1007         return;
1008 }
1009
1010 static void
1011 em_start(if_t ifp)
1012 {
1013         struct adapter  *adapter = if_getsoftc(ifp);
1014         struct tx_ring  *txr = adapter->tx_rings;
1015
1016         if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1017                 EM_TX_LOCK(txr);
1018                 em_start_locked(ifp, txr);
1019                 EM_TX_UNLOCK(txr);
1020         }
1021         return;
1022 }
1023 #else /* EM_MULTIQUEUE */
1024 /*********************************************************************
1025  *  Multiqueue Transmit routines 
1026  *
1027  *  em_mq_start is called by the stack to initiate a transmit.
1028  *  however, if busy the driver can queue the request rather
1029  *  than do an immediate send. It is this that is an advantage
1030  *  in this driver, rather than also having multiple tx queues.
1031  **********************************************************************/
1032 /*
1033 ** Multiqueue capable stack interface
1034 */
1035 static int
1036 em_mq_start(if_t ifp, struct mbuf *m)
1037 {
1038         struct adapter  *adapter = if_getsoftc(ifp);
1039         struct tx_ring  *txr = adapter->tx_rings;
1040         unsigned int    i, error;
1041
1042         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1043                 i = m->m_pkthdr.flowid % adapter->num_queues;
1044         else
1045                 i = curcpu % adapter->num_queues;
1046
1047         txr = &adapter->tx_rings[i];
1048
1049         error = drbr_enqueue(ifp, txr->br, m);
1050         if (error)
1051                 return (error);
1052
1053         if (EM_TX_TRYLOCK(txr)) {
1054                 em_mq_start_locked(ifp, txr);
1055                 EM_TX_UNLOCK(txr);
1056         } else 
1057                 taskqueue_enqueue(txr->tq, &txr->tx_task);
1058
1059         return (0);
1060 }
1061
1062 static int
1063 em_mq_start_locked(if_t ifp, struct tx_ring *txr)
1064 {
1065         struct adapter  *adapter = txr->adapter;
1066         struct mbuf     *next;
1067         int             err = 0, enq = 0;
1068
1069         EM_TX_LOCK_ASSERT(txr);
1070
1071         if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
1072             adapter->link_active == 0) {
1073                 return (ENETDOWN);
1074         }
1075
1076         /* Process the queue */
1077         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1078                 if ((err = em_xmit(txr, &next)) != 0) {
1079                         if (next == NULL) {
1080                                 /* It was freed, move forward */
1081                                 drbr_advance(ifp, txr->br);
1082                         } else {
1083                                 /* 
1084                                  * Still have one left, it may not be
1085                                  * the same since the transmit function
1086                                  * may have changed it.
1087                                  */
1088                                 drbr_putback(ifp, txr->br, next);
1089                         }
1090                         break;
1091                 }
1092                 drbr_advance(ifp, txr->br);
1093                 enq++;
1094                 if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
1095                 if (next->m_flags & M_MCAST)
1096                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1097                 ETHER_BPF_MTAP(ifp, next);
1098                 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
1099                         break;
1100         }
1101
1102         /* Mark the queue as having work */
1103         if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1104                 txr->busy = EM_TX_BUSY;
1105
1106         if (txr->tx_avail < EM_MAX_SCATTER)
1107                 em_txeof(txr);
1108         if (txr->tx_avail < EM_MAX_SCATTER) {
1109                 if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
1110         }
1111         return (err);
1112 }
1113
1114 /*
1115 ** Flush all ring buffers
1116 */
1117 static void
1118 em_qflush(if_t ifp)
1119 {
1120         struct adapter  *adapter = if_getsoftc(ifp);
1121         struct tx_ring  *txr = adapter->tx_rings;
1122         struct mbuf     *m;
1123
1124         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1125                 EM_TX_LOCK(txr);
1126                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1127                         m_freem(m);
1128                 EM_TX_UNLOCK(txr);
1129         }
1130         if_qflush(ifp);
1131 }
1132 #endif /* EM_MULTIQUEUE */
1133
1134 /*********************************************************************
1135  *  Ioctl entry point
1136  *
1137  *  em_ioctl is called when the user wants to configure the
1138  *  interface.
1139  *
1140  *  return 0 on success, positive on failure
1141  **********************************************************************/
1142
1143 static int
1144 em_ioctl(if_t ifp, u_long command, caddr_t data)
1145 {
1146         struct adapter  *adapter = if_getsoftc(ifp);
1147         struct ifreq    *ifr = (struct ifreq *)data;
1148 #if defined(INET) || defined(INET6)
1149         struct ifaddr   *ifa = (struct ifaddr *)data;
1150 #endif
1151         bool            avoid_reset = FALSE;
1152         int             error = 0;
1153
1154         if (adapter->in_detach)
1155                 return (error);
1156
1157         switch (command) {
1158         case SIOCSIFADDR:
1159 #ifdef INET
1160                 if (ifa->ifa_addr->sa_family == AF_INET)
1161                         avoid_reset = TRUE;
1162 #endif
1163 #ifdef INET6
1164                 if (ifa->ifa_addr->sa_family == AF_INET6)
1165                         avoid_reset = TRUE;
1166 #endif
1167                 /*
1168                 ** Calling init results in link renegotiation,
1169                 ** so we avoid doing it when possible.
1170                 */
1171                 if (avoid_reset) {
1172                         if_setflagbits(ifp,IFF_UP,0);
1173                         if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1174                                 em_init(adapter);
1175 #ifdef INET
1176                         if (!(if_getflags(ifp) & IFF_NOARP))
1177                                 arp_ifinit(ifp, ifa);
1178 #endif
1179                 } else
1180                         error = ether_ioctl(ifp, command, data);
1181                 break;
1182         case SIOCSIFMTU:
1183             {
1184                 int max_frame_size;
1185
1186                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1187
1188                 EM_CORE_LOCK(adapter);
1189                 switch (adapter->hw.mac.type) {
1190                 case e1000_82571:
1191                 case e1000_82572:
1192                 case e1000_ich9lan:
1193                 case e1000_ich10lan:
1194                 case e1000_pch2lan:
1195                 case e1000_pch_lpt:
1196                 case e1000_pch_spt:
1197                 case e1000_82574:
1198                 case e1000_82583:
1199                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1200                         max_frame_size = 9234;
1201                         break;
1202                 case e1000_pchlan:
1203                         max_frame_size = 4096;
1204                         break;
1205                         /* Adapters that do not support jumbo frames */
1206                 case e1000_ich8lan:
1207                         max_frame_size = ETHER_MAX_LEN;
1208                         break;
1209                 default:
1210                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1211                 }
1212                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1213                     ETHER_CRC_LEN) {
1214                         EM_CORE_UNLOCK(adapter);
1215                         error = EINVAL;
1216                         break;
1217                 }
1218
1219                 if_setmtu(ifp, ifr->ifr_mtu);
1220                 adapter->hw.mac.max_frame_size =
1221                     if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1222                 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1223                         em_init_locked(adapter);
1224                 EM_CORE_UNLOCK(adapter);
1225                 break;
1226             }
1227         case SIOCSIFFLAGS:
1228                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1229                     SIOCSIFFLAGS (Set Interface Flags)");
1230                 EM_CORE_LOCK(adapter);
1231                 if (if_getflags(ifp) & IFF_UP) {
1232                         if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1233                                 if ((if_getflags(ifp) ^ adapter->if_flags) &
1234                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1235                                         em_disable_promisc(adapter);
1236                                         em_set_promisc(adapter);
1237                                 }
1238                         } else
1239                                 em_init_locked(adapter);
1240                 } else
1241                         if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1242                                 em_stop(adapter);
1243                 adapter->if_flags = if_getflags(ifp);
1244                 EM_CORE_UNLOCK(adapter);
1245                 break;
1246         case SIOCADDMULTI:
1247         case SIOCDELMULTI:
1248                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1249                 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1250                         EM_CORE_LOCK(adapter);
1251                         em_disable_intr(adapter);
1252                         em_set_multi(adapter);
1253 #ifdef DEVICE_POLLING
1254                         if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1255 #endif
1256                                 em_enable_intr(adapter);
1257                         EM_CORE_UNLOCK(adapter);
1258                 }
1259                 break;
1260         case SIOCSIFMEDIA:
1261                 /* Check SOL/IDER usage */
1262                 EM_CORE_LOCK(adapter);
1263                 if (e1000_check_reset_block(&adapter->hw)) {
1264                         EM_CORE_UNLOCK(adapter);
1265                         device_printf(adapter->dev, "Media change is"
1266                             " blocked due to SOL/IDER session.\n");
1267                         break;
1268                 }
1269                 EM_CORE_UNLOCK(adapter);
1270                 /* falls thru */
1271         case SIOCGIFMEDIA:
1272                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1273                     SIOCxIFMEDIA (Get/Set Interface Media)");
1274                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1275                 break;
1276         case SIOCSIFCAP:
1277             {
1278                 int mask, reinit;
1279
1280                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1281                 reinit = 0;
1282                 mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1283 #ifdef DEVICE_POLLING
1284                 if (mask & IFCAP_POLLING) {
1285                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1286                                 error = ether_poll_register(em_poll, ifp);
1287                                 if (error)
1288                                         return (error);
1289                                 EM_CORE_LOCK(adapter);
1290                                 em_disable_intr(adapter);
1291                                 if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1292                                 EM_CORE_UNLOCK(adapter);
1293                         } else {
1294                                 error = ether_poll_deregister(ifp);
1295                                 /* Enable interrupt even in error case */
1296                                 EM_CORE_LOCK(adapter);
1297                                 em_enable_intr(adapter);
1298                                 if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1299                                 EM_CORE_UNLOCK(adapter);
1300                         }
1301                 }
1302 #endif
1303                 if (mask & IFCAP_HWCSUM) {
1304                         if_togglecapenable(ifp,IFCAP_HWCSUM);
1305                         reinit = 1;
1306                 }
1307                 if (mask & IFCAP_TSO4) {
1308                         if_togglecapenable(ifp,IFCAP_TSO4);
1309                         reinit = 1;
1310                 }
1311                 if (mask & IFCAP_VLAN_HWTAGGING) {
1312                         if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1313                         reinit = 1;
1314                 }
1315                 if (mask & IFCAP_VLAN_HWFILTER) {
1316                         if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1317                         reinit = 1;
1318                 }
1319                 if (mask & IFCAP_VLAN_HWTSO) {
1320                         if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1321                         reinit = 1;
1322                 }
1323                 if ((mask & IFCAP_WOL) &&
1324                     (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1325                         if (mask & IFCAP_WOL_MCAST)
1326                                 if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1327                         if (mask & IFCAP_WOL_MAGIC)
1328                                 if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1329                 }
1330                 if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1331                         em_init(adapter);
1332                 if_vlancap(ifp);
1333                 break;
1334             }
1335
1336         default:
1337                 error = ether_ioctl(ifp, command, data);
1338                 break;
1339         }
1340
1341         return (error);
1342 }
1343
1344
1345 /*********************************************************************
1346  *  Init entry point
1347  *
1348  *  This routine is used in two ways. It is used by the stack as
1349  *  init entry point in network interface structure. It is also used
1350  *  by the driver as a hw/sw initialization routine to get to a
1351  *  consistent state.
1352  *
1353  *  return 0 on success, positive on failure
1354  **********************************************************************/
1355
1356 static void
1357 em_init_locked(struct adapter *adapter)
1358 {
1359         if_t ifp = adapter->ifp;
1360         device_t        dev = adapter->dev;
1361
1362         INIT_DEBUGOUT("em_init: begin");
1363
1364         EM_CORE_LOCK_ASSERT(adapter);
1365
1366         em_disable_intr(adapter);
1367         callout_stop(&adapter->timer);
1368
1369         /* Get the latest mac address, User can use a LAA */
1370         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1371               ETHER_ADDR_LEN);
1372
1373         /* Put the address into the Receive Address Array */
1374         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1375
1376         /*
1377          * With the 82571 adapter, RAR[0] may be overwritten
1378          * when the other port is reset, we make a duplicate
1379          * in RAR[14] for that eventuality, this assures
1380          * the interface continues to function.
1381          */
1382         if (adapter->hw.mac.type == e1000_82571) {
1383                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1384                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1385                     E1000_RAR_ENTRIES - 1);
1386         }
1387
1388         /* Initialize the hardware */
1389         em_reset(adapter);
1390         em_update_link_status(adapter);
1391
1392         /* Setup VLAN support, basic and offload if available */
1393         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1394
1395         /* Set hardware offload abilities */
1396         if_clearhwassist(ifp);
1397         if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1398                 if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1399         /* 
1400         ** There have proven to be problems with TSO when not
1401         ** at full gigabit speed, so disable the assist automatically
1402         ** when at lower speeds.  -jfv
1403         */
1404         if (if_getcapenable(ifp) & IFCAP_TSO4) {
1405                 if (adapter->link_speed == SPEED_1000)
1406                         if_sethwassistbits(ifp, CSUM_TSO, 0);
1407         }
1408
1409         /* Configure for OS presence */
1410         em_init_manageability(adapter);
1411
1412         /* Prepare transmit descriptors and buffers */
1413         em_setup_transmit_structures(adapter);
1414         em_initialize_transmit_unit(adapter);
1415
1416         /* Setup Multicast table */
1417         em_set_multi(adapter);
1418
1419         /*
1420         ** Figure out the desired mbuf
1421         ** pool for doing jumbos
1422         */
1423         if (adapter->hw.mac.max_frame_size <= 2048)
1424                 adapter->rx_mbuf_sz = MCLBYTES;
1425         else if (adapter->hw.mac.max_frame_size <= 4096)
1426                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1427         else
1428                 adapter->rx_mbuf_sz = MJUM9BYTES;
1429
1430         /* Prepare receive descriptors and buffers */
1431         if (em_setup_receive_structures(adapter)) {
1432                 device_printf(dev, "Could not setup receive structures\n");
1433                 em_stop(adapter);
1434                 return;
1435         }
1436         em_initialize_receive_unit(adapter);
1437
1438         /* Use real VLAN Filter support? */
1439         if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1440                 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1441                         /* Use real VLAN Filter support */
1442                         em_setup_vlan_hw_support(adapter);
1443                 else {
1444                         u32 ctrl;
1445                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1446                         ctrl |= E1000_CTRL_VME;
1447                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1448                 }
1449         }
1450
1451         /* Don't lose promiscuous settings */
1452         em_set_promisc(adapter);
1453
1454         /* Set the interface as ACTIVE */
1455         if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1456
1457         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1458         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1459
1460         /* MSI/X configuration for 82574 */
1461         if (adapter->hw.mac.type == e1000_82574) {
1462                 int tmp;
1463                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1464                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1465                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1466                 /* Set the IVAR - interrupt vector routing. */
1467                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1468         }
1469
1470 #ifdef DEVICE_POLLING
1471         /*
1472          * Only enable interrupts if we are not polling, make sure
1473          * they are off otherwise.
1474          */
1475         if (if_getcapenable(ifp) & IFCAP_POLLING)
1476                 em_disable_intr(adapter);
1477         else
1478 #endif /* DEVICE_POLLING */
1479                 em_enable_intr(adapter);
1480
1481         /* AMT based hardware can now take control from firmware */
1482         if (adapter->has_manage && adapter->has_amt)
1483                 em_get_hw_control(adapter);
1484 }
1485
1486 static void
1487 em_init(void *arg)
1488 {
1489         struct adapter *adapter = arg;
1490
1491         EM_CORE_LOCK(adapter);
1492         em_init_locked(adapter);
1493         EM_CORE_UNLOCK(adapter);
1494 }
1495
1496
1497 #ifdef DEVICE_POLLING
1498 /*********************************************************************
1499  *
1500  *  Legacy polling routine: note this only works with single queue
1501  *
1502  *********************************************************************/
1503 static int
1504 em_poll(if_t ifp, enum poll_cmd cmd, int count)
1505 {
1506         struct adapter *adapter = if_getsoftc(ifp);
1507         struct tx_ring  *txr = adapter->tx_rings;
1508         struct rx_ring  *rxr = adapter->rx_rings;
1509         u32             reg_icr;
1510         int             rx_done;
1511
1512         EM_CORE_LOCK(adapter);
1513         if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1514                 EM_CORE_UNLOCK(adapter);
1515                 return (0);
1516         }
1517
1518         if (cmd == POLL_AND_CHECK_STATUS) {
1519                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1520                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1521                         callout_stop(&adapter->timer);
1522                         adapter->hw.mac.get_link_status = 1;
1523                         em_update_link_status(adapter);
1524                         callout_reset(&adapter->timer, hz,
1525                             em_local_timer, adapter);
1526                 }
1527         }
1528         EM_CORE_UNLOCK(adapter);
1529
1530         em_rxeof(rxr, count, &rx_done);
1531
1532         EM_TX_LOCK(txr);
1533         em_txeof(txr);
1534 #ifdef EM_MULTIQUEUE
1535         if (!drbr_empty(ifp, txr->br))
1536                 em_mq_start_locked(ifp, txr);
1537 #else
1538         if (!if_sendq_empty(ifp))
1539                 em_start_locked(ifp, txr);
1540 #endif
1541         EM_TX_UNLOCK(txr);
1542
1543         return (rx_done);
1544 }
1545 #endif /* DEVICE_POLLING */
1546
1547
1548 /*********************************************************************
1549  *
1550  *  Fast Legacy/MSI Combined Interrupt Service routine  
1551  *
1552  *********************************************************************/
1553 static int
1554 em_irq_fast(void *arg)
1555 {
1556         struct adapter  *adapter = arg;
1557         if_t ifp;
1558         u32             reg_icr;
1559
1560         ifp = adapter->ifp;
1561
1562         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1563
1564         /* Hot eject?  */
1565         if (reg_icr == 0xffffffff)
1566                 return FILTER_STRAY;
1567
1568         /* Definitely not our interrupt.  */
1569         if (reg_icr == 0x0)
1570                 return FILTER_STRAY;
1571
1572         /*
1573          * Starting with the 82571 chip, bit 31 should be used to
1574          * determine whether the interrupt belongs to us.
1575          */
1576         if (adapter->hw.mac.type >= e1000_82571 &&
1577             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1578                 return FILTER_STRAY;
1579
1580         em_disable_intr(adapter);
1581         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1582
1583         /* Link status change */
1584         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1585                 adapter->hw.mac.get_link_status = 1;
1586                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1587         }
1588
1589         if (reg_icr & E1000_ICR_RXO)
1590                 adapter->rx_overruns++;
1591         return FILTER_HANDLED;
1592 }
1593
1594 /* Combined RX/TX handler, used by Legacy and MSI */
1595 static void
1596 em_handle_que(void *context, int pending)
1597 {
1598         struct adapter  *adapter = context;
1599         if_t ifp = adapter->ifp;
1600         struct tx_ring  *txr = adapter->tx_rings;
1601         struct rx_ring  *rxr = adapter->rx_rings;
1602
1603         if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1604                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1605
1606                 EM_TX_LOCK(txr);
1607                 em_txeof(txr);
1608 #ifdef EM_MULTIQUEUE
1609                 if (!drbr_empty(ifp, txr->br))
1610                         em_mq_start_locked(ifp, txr);
1611 #else
1612                 if (!if_sendq_empty(ifp))
1613                         em_start_locked(ifp, txr);
1614 #endif
1615                 EM_TX_UNLOCK(txr);
1616                 if (more) {
1617                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1618                         return;
1619                 }
1620         }
1621
1622         em_enable_intr(adapter);
1623         return;
1624 }
1625
1626
1627 /*********************************************************************
1628  *
1629  *  MSIX Interrupt Service Routines
1630  *
1631  **********************************************************************/
1632 static void
1633 em_msix_tx(void *arg)
1634 {
1635         struct tx_ring *txr = arg;
1636         struct adapter *adapter = txr->adapter;
1637         if_t ifp = adapter->ifp;
1638
1639         ++txr->tx_irq;
1640         EM_TX_LOCK(txr);
1641         em_txeof(txr);
1642 #ifdef EM_MULTIQUEUE
1643         if (!drbr_empty(ifp, txr->br))
1644                 em_mq_start_locked(ifp, txr);
1645 #else
1646         if (!if_sendq_empty(ifp))
1647                 em_start_locked(ifp, txr);
1648 #endif
1649
1650         /* Reenable this interrupt */
1651         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1652         EM_TX_UNLOCK(txr);
1653         return;
1654 }
1655
1656 /*********************************************************************
1657  *
1658  *  MSIX RX Interrupt Service routine
1659  *
1660  **********************************************************************/
1661
1662 static void
1663 em_msix_rx(void *arg)
1664 {
1665         struct rx_ring  *rxr = arg;
1666         struct adapter  *adapter = rxr->adapter;
1667         bool            more;
1668
1669         ++rxr->rx_irq;
1670         if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1671                 return;
1672         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1673         if (more)
1674                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1675         else {
1676                 /* Reenable this interrupt */
1677                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1678         }
1679         return;
1680 }
1681
1682 /*********************************************************************
1683  *
1684  *  MSIX Link Fast Interrupt Service routine
1685  *
1686  **********************************************************************/
1687 static void
1688 em_msix_link(void *arg)
1689 {
1690         struct adapter  *adapter = arg;
1691         u32             reg_icr;
1692
1693         ++adapter->link_irq;
1694         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1695
1696         if (reg_icr & E1000_ICR_RXO)
1697                 adapter->rx_overruns++;
1698
1699         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1700                 adapter->hw.mac.get_link_status = 1;
1701                 em_handle_link(adapter, 0);
1702         } else
1703                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1704                     EM_MSIX_LINK | E1000_IMS_LSC);
1705         /*
1706         ** Because we must read the ICR for this interrupt
1707         ** it may clear other causes using autoclear, for
1708         ** this reason we simply create a soft interrupt
1709         ** for all these vectors.
1710         */
1711         if (reg_icr) {
1712                 E1000_WRITE_REG(&adapter->hw,
1713                         E1000_ICS, adapter->ims);
1714         }
1715         return;
1716 }
1717
1718 static void
1719 em_handle_rx(void *context, int pending)
1720 {
1721         struct rx_ring  *rxr = context;
1722         struct adapter  *adapter = rxr->adapter;
1723         bool            more;
1724
1725         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1726         if (more)
1727                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1728         else {
1729                 /* Reenable this interrupt */
1730                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1731         }
1732 }
1733
1734 static void
1735 em_handle_tx(void *context, int pending)
1736 {
1737         struct tx_ring  *txr = context;
1738         struct adapter  *adapter = txr->adapter;
1739         if_t ifp = adapter->ifp;
1740
1741         EM_TX_LOCK(txr);
1742         em_txeof(txr);
1743 #ifdef EM_MULTIQUEUE
1744         if (!drbr_empty(ifp, txr->br))
1745                 em_mq_start_locked(ifp, txr);
1746 #else
1747         if (!if_sendq_empty(ifp))
1748                 em_start_locked(ifp, txr);
1749 #endif
1750         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1751         EM_TX_UNLOCK(txr);
1752 }
1753
1754 static void
1755 em_handle_link(void *context, int pending)
1756 {
1757         struct adapter  *adapter = context;
1758         struct tx_ring  *txr = adapter->tx_rings;
1759         if_t ifp = adapter->ifp;
1760
1761         if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1762                 return;
1763
1764         EM_CORE_LOCK(adapter);
1765         callout_stop(&adapter->timer);
1766         em_update_link_status(adapter);
1767         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1768         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1769             EM_MSIX_LINK | E1000_IMS_LSC);
1770         if (adapter->link_active) {
1771                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1772                         EM_TX_LOCK(txr);
1773 #ifdef EM_MULTIQUEUE
1774                         if (!drbr_empty(ifp, txr->br))
1775                                 em_mq_start_locked(ifp, txr);
1776 #else
1777                         if (if_sendq_empty(ifp))
1778                                 em_start_locked(ifp, txr);
1779 #endif
1780                         EM_TX_UNLOCK(txr);
1781                 }
1782         }
1783         EM_CORE_UNLOCK(adapter);
1784 }
1785
1786
1787 /*********************************************************************
1788  *
1789  *  Media Ioctl callback
1790  *
1791  *  This routine is called whenever the user queries the status of
1792  *  the interface using ifconfig.
1793  *
1794  **********************************************************************/
1795 static void
1796 em_media_status(if_t ifp, struct ifmediareq *ifmr)
1797 {
1798         struct adapter *adapter = if_getsoftc(ifp);
1799         u_char fiber_type = IFM_1000_SX;
1800
1801         INIT_DEBUGOUT("em_media_status: begin");
1802
1803         EM_CORE_LOCK(adapter);
1804         em_update_link_status(adapter);
1805
1806         ifmr->ifm_status = IFM_AVALID;
1807         ifmr->ifm_active = IFM_ETHER;
1808
1809         if (!adapter->link_active) {
1810                 EM_CORE_UNLOCK(adapter);
1811                 return;
1812         }
1813
1814         ifmr->ifm_status |= IFM_ACTIVE;
1815
1816         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1817             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1818                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1819         } else {
1820                 switch (adapter->link_speed) {
1821                 case 10:
1822                         ifmr->ifm_active |= IFM_10_T;
1823                         break;
1824                 case 100:
1825                         ifmr->ifm_active |= IFM_100_TX;
1826                         break;
1827                 case 1000:
1828                         ifmr->ifm_active |= IFM_1000_T;
1829                         break;
1830                 }
1831                 if (adapter->link_duplex == FULL_DUPLEX)
1832                         ifmr->ifm_active |= IFM_FDX;
1833                 else
1834                         ifmr->ifm_active |= IFM_HDX;
1835         }
1836         EM_CORE_UNLOCK(adapter);
1837 }
1838
1839 /*********************************************************************
1840  *
1841  *  Media Ioctl callback
1842  *
1843  *  This routine is called when the user changes speed/duplex using
1844  *  media/mediopt option with ifconfig.
1845  *
1846  **********************************************************************/
1847 static int
1848 em_media_change(if_t ifp)
1849 {
1850         struct adapter *adapter = if_getsoftc(ifp);
1851         struct ifmedia  *ifm = &adapter->media;
1852
1853         INIT_DEBUGOUT("em_media_change: begin");
1854
1855         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1856                 return (EINVAL);
1857
1858         EM_CORE_LOCK(adapter);
1859         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1860         case IFM_AUTO:
1861                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1862                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1863                 break;
1864         case IFM_1000_LX:
1865         case IFM_1000_SX:
1866         case IFM_1000_T:
1867                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1868                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1869                 break;
1870         case IFM_100_TX:
1871                 adapter->hw.mac.autoneg = FALSE;
1872                 adapter->hw.phy.autoneg_advertised = 0;
1873                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1874                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1875                 else
1876                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1877                 break;
1878         case IFM_10_T:
1879                 adapter->hw.mac.autoneg = FALSE;
1880                 adapter->hw.phy.autoneg_advertised = 0;
1881                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1882                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1883                 else
1884                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1885                 break;
1886         default:
1887                 device_printf(adapter->dev, "Unsupported media type\n");
1888         }
1889
1890         em_init_locked(adapter);
1891         EM_CORE_UNLOCK(adapter);
1892
1893         return (0);
1894 }
1895
1896 /*********************************************************************
1897  *
1898  *  This routine maps the mbufs to tx descriptors.
1899  *
1900  *  return 0 on success, positive on failure
1901  **********************************************************************/
1902
1903 static int
1904 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1905 {
1906         struct adapter          *adapter = txr->adapter;
1907         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1908         bus_dmamap_t            map;
1909         struct em_txbuffer      *tx_buffer, *tx_buffer_mapped;
1910         struct e1000_tx_desc    *ctxd = NULL;
1911         struct mbuf             *m_head;
1912         struct ether_header     *eh;
1913         struct ip               *ip = NULL;
1914         struct tcphdr           *tp = NULL;
1915         u32                     txd_upper = 0, txd_lower = 0;
1916         int                     ip_off, poff;
1917         int                     nsegs, i, j, first, last = 0;
1918         int                     error;
1919         bool                    do_tso, tso_desc, remap = TRUE;
1920
1921         m_head = *m_headp;
1922         do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO);
1923         tso_desc = FALSE;
1924         ip_off = poff = 0;
1925
1926         /*
1927          * Intel recommends entire IP/TCP header length reside in a single
1928          * buffer. If multiple descriptors are used to describe the IP and
1929          * TCP header, each descriptor should describe one or more
1930          * complete headers; descriptors referencing only parts of headers
1931          * are not supported. If all layer headers are not coalesced into
1932          * a single buffer, each buffer should not cross a 4KB boundary,
1933          * or be larger than the maximum read request size.
1934          * Controller also requires modifing IP/TCP header to make TSO work
1935          * so we firstly get a writable mbuf chain then coalesce ethernet/
1936          * IP/TCP header into a single buffer to meet the requirement of
1937          * controller. This also simplifies IP/TCP/UDP checksum offloading
1938          * which also has similar restrictions.
1939          */
1940         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1941                 if (do_tso || (m_head->m_next != NULL && 
1942                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1943                         if (M_WRITABLE(*m_headp) == 0) {
1944                                 m_head = m_dup(*m_headp, M_NOWAIT);
1945                                 m_freem(*m_headp);
1946                                 if (m_head == NULL) {
1947                                         *m_headp = NULL;
1948                                         return (ENOBUFS);
1949                                 }
1950                                 *m_headp = m_head;
1951                         }
1952                 }
1953                 /*
1954                  * XXX
1955                  * Assume IPv4, we don't have TSO/checksum offload support
1956                  * for IPv6 yet.
1957                  */
1958                 ip_off = sizeof(struct ether_header);
1959                 if (m_head->m_len < ip_off) {
1960                         m_head = m_pullup(m_head, ip_off);
1961                         if (m_head == NULL) {
1962                                 *m_headp = NULL;
1963                                 return (ENOBUFS);
1964                         }
1965                 }
1966                 eh = mtod(m_head, struct ether_header *);
1967                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1968                         ip_off = sizeof(struct ether_vlan_header);
1969                         if (m_head->m_len < ip_off) {
1970                                 m_head = m_pullup(m_head, ip_off);
1971                                 if (m_head == NULL) {
1972                                         *m_headp = NULL;
1973                                         return (ENOBUFS);
1974                                 }
1975                         }
1976                 }
1977                 if (m_head->m_len < ip_off + sizeof(struct ip)) {
1978                         m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1979                         if (m_head == NULL) {
1980                                 *m_headp = NULL;
1981                                 return (ENOBUFS);
1982                         }
1983                 }
1984                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1985                 poff = ip_off + (ip->ip_hl << 2);
1986
1987                 if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
1988                         if (m_head->m_len < poff + sizeof(struct tcphdr)) {
1989                                 m_head = m_pullup(m_head, poff +
1990                                     sizeof(struct tcphdr));
1991                                 if (m_head == NULL) {
1992                                         *m_headp = NULL;
1993                                         return (ENOBUFS);
1994                                 }
1995                         }
1996                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1997                         /*
1998                          * TSO workaround:
1999                          *   pull 4 more bytes of data into it.
2000                          */
2001                         if (m_head->m_len < poff + (tp->th_off << 2)) {
2002                                 m_head = m_pullup(m_head, poff +
2003                                                  (tp->th_off << 2) +
2004                                                  TSO_WORKAROUND);
2005                                 if (m_head == NULL) {
2006                                         *m_headp = NULL;
2007                                         return (ENOBUFS);
2008                                 }
2009                         }
2010                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2011                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2012                         if (do_tso) {
2013                                 ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2014                                                   (ip->ip_hl << 2) +
2015                                                   (tp->th_off << 2));
2016                                 ip->ip_sum = 0;
2017                                 /*
2018                                  * The pseudo TCP checksum does not include TCP
2019                                  * payload length so driver should recompute
2020                                  * the checksum here what hardware expect to
2021                                  * see. This is adherence of Microsoft's Large
2022                                  * Send specification.
2023                                 */
2024                                 tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2025                                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2026                         }
2027                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2028                         if (m_head->m_len < poff + sizeof(struct udphdr)) {
2029                                 m_head = m_pullup(m_head, poff +
2030                                     sizeof(struct udphdr));
2031                                 if (m_head == NULL) {
2032                                         *m_headp = NULL;
2033                                         return (ENOBUFS);
2034                                 }
2035                         }
2036                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2037                 }
2038                 *m_headp = m_head;
2039         }
2040
2041         /*
2042          * Map the packet for DMA
2043          *
2044          * Capture the first descriptor index,
2045          * this descriptor will have the index
2046          * of the EOP which is the only one that
2047          * now gets a DONE bit writeback.
2048          */
2049         first = txr->next_avail_desc;
2050         tx_buffer = &txr->tx_buffers[first];
2051         tx_buffer_mapped = tx_buffer;
2052         map = tx_buffer->map;
2053
2054 retry:
2055         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2056             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2057
2058         /*
2059          * There are two types of errors we can (try) to handle:
2060          * - EFBIG means the mbuf chain was too long and bus_dma ran
2061          *   out of segments.  Defragment the mbuf chain and try again.
2062          * - ENOMEM means bus_dma could not obtain enough bounce buffers
2063          *   at this point in time.  Defer sending and try again later.
2064          * All other errors, in particular EINVAL, are fatal and prevent the
2065          * mbuf chain from ever going through.  Drop it and report error.
2066          */
2067         if (error == EFBIG && remap) {
2068                 struct mbuf *m;
2069
2070                 m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2071                 if (m == NULL) {
2072                         adapter->mbuf_defrag_failed++;
2073                         m_freem(*m_headp);
2074                         *m_headp = NULL;
2075                         return (ENOBUFS);
2076                 }
2077                 *m_headp = m;
2078
2079                 /* Try it again, but only once */
2080                 remap = FALSE;
2081                 goto retry;
2082         } else if (error != 0) {
2083                 adapter->no_tx_dma_setup++;
2084                 m_freem(*m_headp);
2085                 *m_headp = NULL;
2086                 return (error);
2087         }
2088
2089         /*
2090          * TSO Hardware workaround, if this packet is not
2091          * TSO, and is only a single descriptor long, and
2092          * it follows a TSO burst, then we need to add a
2093          * sentinel descriptor to prevent premature writeback.
2094          */
2095         if ((!do_tso) && (txr->tx_tso == TRUE)) {
2096                 if (nsegs == 1)
2097                         tso_desc = TRUE;
2098                 txr->tx_tso = FALSE;
2099         }
2100
2101         if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
2102                 txr->no_desc_avail++;
2103                 bus_dmamap_unload(txr->txtag, map);
2104                 return (ENOBUFS);
2105         }
2106         m_head = *m_headp;
2107
2108         /* Do hardware assists */
2109         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2110                 em_tso_setup(txr, m_head, ip_off, ip, tp,
2111                     &txd_upper, &txd_lower);
2112                 /* we need to make a final sentinel transmit desc */
2113                 tso_desc = TRUE;
2114         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2115                 em_transmit_checksum_setup(txr, m_head,
2116                     ip_off, ip, &txd_upper, &txd_lower);
2117
2118         if (m_head->m_flags & M_VLANTAG) {
2119                 /* Set the vlan id. */
2120                 txd_upper |= htole16(if_getvtag(m_head)) << 16;
2121                 /* Tell hardware to add tag */
2122                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2123         }
2124
2125         i = txr->next_avail_desc;
2126
2127         /* Set up our transmit descriptors */
2128         for (j = 0; j < nsegs; j++) {
2129                 bus_size_t seg_len;
2130                 bus_addr_t seg_addr;
2131
2132                 tx_buffer = &txr->tx_buffers[i];
2133                 ctxd = &txr->tx_base[i];
2134                 seg_addr = segs[j].ds_addr;
2135                 seg_len  = segs[j].ds_len;
2136                 /*
2137                 ** TSO Workaround:
2138                 ** If this is the last descriptor, we want to
2139                 ** split it so we have a small final sentinel
2140                 */
2141                 if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2142                         seg_len -= TSO_WORKAROUND;
2143                         ctxd->buffer_addr = htole64(seg_addr);
2144                         ctxd->lower.data = htole32(
2145                                 adapter->txd_cmd | txd_lower | seg_len);
2146                         ctxd->upper.data = htole32(txd_upper);
2147                         if (++i == adapter->num_tx_desc)
2148                                 i = 0;
2149
2150                         /* Now make the sentinel */     
2151                         txr->tx_avail--;
2152                         ctxd = &txr->tx_base[i];
2153                         tx_buffer = &txr->tx_buffers[i];
2154                         ctxd->buffer_addr =
2155                             htole64(seg_addr + seg_len);
2156                         ctxd->lower.data = htole32(
2157                         adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2158                         ctxd->upper.data =
2159                             htole32(txd_upper);
2160                         last = i;
2161                         if (++i == adapter->num_tx_desc)
2162                                 i = 0;
2163                 } else {
2164                         ctxd->buffer_addr = htole64(seg_addr);
2165                         ctxd->lower.data = htole32(
2166                         adapter->txd_cmd | txd_lower | seg_len);
2167                         ctxd->upper.data = htole32(txd_upper);
2168                         last = i;
2169                         if (++i == adapter->num_tx_desc)
2170                                 i = 0;
2171                 }
2172                 tx_buffer->m_head = NULL;
2173                 tx_buffer->next_eop = -1;
2174         }
2175
2176         txr->next_avail_desc = i;
2177         txr->tx_avail -= nsegs;
2178
2179         tx_buffer->m_head = m_head;
2180         /*
2181         ** Here we swap the map so the last descriptor,
2182         ** which gets the completion interrupt has the
2183         ** real map, and the first descriptor gets the
2184         ** unused map from this descriptor.
2185         */
2186         tx_buffer_mapped->map = tx_buffer->map;
2187         tx_buffer->map = map;
2188         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2189
2190         /*
2191          * Last Descriptor of Packet
2192          * needs End Of Packet (EOP)
2193          * and Report Status (RS)
2194          */
2195         ctxd->lower.data |=
2196             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2197         /*
2198          * Keep track in the first buffer which
2199          * descriptor will be written back
2200          */
2201         tx_buffer = &txr->tx_buffers[first];
2202         tx_buffer->next_eop = last;
2203
2204         /*
2205          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2206          * that this frame is available to transmit.
2207          */
2208         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2209             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2210         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2211
2212         return (0);
2213 }
2214
2215 static void
2216 em_set_promisc(struct adapter *adapter)
2217 {
2218         if_t ifp = adapter->ifp;
2219         u32             reg_rctl;
2220
2221         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2222
2223         if (if_getflags(ifp) & IFF_PROMISC) {
2224                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2225                 /* Turn this on if you want to see bad packets */
2226                 if (em_debug_sbp)
2227                         reg_rctl |= E1000_RCTL_SBP;
2228                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2229         } else if (if_getflags(ifp) & IFF_ALLMULTI) {
2230                 reg_rctl |= E1000_RCTL_MPE;
2231                 reg_rctl &= ~E1000_RCTL_UPE;
2232                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2233         }
2234 }
2235
2236 static void
2237 em_disable_promisc(struct adapter *adapter)
2238 {
2239         if_t            ifp = adapter->ifp;
2240         u32             reg_rctl;
2241         int             mcnt = 0;
2242
2243         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2244         reg_rctl &=  (~E1000_RCTL_UPE);
2245         if (if_getflags(ifp) & IFF_ALLMULTI)
2246                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2247         else
2248                 mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2249         /* Don't disable if in MAX groups */
2250         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2251                 reg_rctl &=  (~E1000_RCTL_MPE);
2252         reg_rctl &=  (~E1000_RCTL_SBP);
2253         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2254 }
2255
2256
2257 /*********************************************************************
2258  *  Multicast Update
2259  *
2260  *  This routine is called whenever multicast address list is updated.
2261  *
2262  **********************************************************************/
2263
2264 static void
2265 em_set_multi(struct adapter *adapter)
2266 {
2267         if_t ifp = adapter->ifp;
2268         u32 reg_rctl = 0;
2269         u8  *mta; /* Multicast array memory */
2270         int mcnt = 0;
2271
2272         IOCTL_DEBUGOUT("em_set_multi: begin");
2273
2274         mta = adapter->mta;
2275         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2276
2277         if (adapter->hw.mac.type == e1000_82542 && 
2278             adapter->hw.revision_id == E1000_REVISION_2) {
2279                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2280                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2281                         e1000_pci_clear_mwi(&adapter->hw);
2282                 reg_rctl |= E1000_RCTL_RST;
2283                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2284                 msec_delay(5);
2285         }
2286
2287         if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2288
2289         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2290                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2291                 reg_rctl |= E1000_RCTL_MPE;
2292                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2293         } else
2294                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2295
2296         if (adapter->hw.mac.type == e1000_82542 && 
2297             adapter->hw.revision_id == E1000_REVISION_2) {
2298                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2299                 reg_rctl &= ~E1000_RCTL_RST;
2300                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2301                 msec_delay(5);
2302                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2303                         e1000_pci_set_mwi(&adapter->hw);
2304         }
2305 }
2306
2307
2308 /*********************************************************************
2309  *  Timer routine
2310  *
2311  *  This routine checks for link status and updates statistics.
2312  *
2313  **********************************************************************/
2314
2315 static void
2316 em_local_timer(void *arg)
2317 {
2318         struct adapter  *adapter = arg;
2319         if_t ifp = adapter->ifp;
2320         struct tx_ring  *txr = adapter->tx_rings;
2321         struct rx_ring  *rxr = adapter->rx_rings;
2322         u32             trigger = 0;
2323
2324         EM_CORE_LOCK_ASSERT(adapter);
2325
2326         em_update_link_status(adapter);
2327         em_update_stats_counters(adapter);
2328
2329         /* Reset LAA into RAR[0] on 82571 */
2330         if ((adapter->hw.mac.type == e1000_82571) &&
2331             e1000_get_laa_state_82571(&adapter->hw))
2332                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2333
2334         /* Mask to use in the irq trigger */
2335         if (adapter->msix_mem) {
2336                 for (int i = 0; i < adapter->num_queues; i++, rxr++)
2337                         trigger |= rxr->ims;
2338                 rxr = adapter->rx_rings;
2339         } else
2340                 trigger = E1000_ICS_RXDMT0;
2341
2342         /*
2343         ** Check on the state of the TX queue(s), this 
2344         ** can be done without the lock because its RO
2345         ** and the HUNG state will be static if set.
2346         */
2347         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2348                 if (txr->busy == EM_TX_HUNG)
2349                         goto hung;
2350                 if (txr->busy >= EM_TX_MAXTRIES)
2351                         txr->busy = EM_TX_HUNG;
2352                 /* Schedule a TX tasklet if needed */
2353                 if (txr->tx_avail <= EM_MAX_SCATTER)
2354                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2355         }
2356         
2357         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2358 #ifndef DEVICE_POLLING
2359         /* Trigger an RX interrupt to guarantee mbuf refresh */
2360         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2361 #endif
2362         return;
2363 hung:
2364         /* Looks like we're hung */
2365         device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2366                         txr->me);
2367         em_print_debug_info(adapter);
2368         if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2369         adapter->watchdog_events++;
2370         em_init_locked(adapter);
2371 }
2372
2373
2374 static void
2375 em_update_link_status(struct adapter *adapter)
2376 {
2377         struct e1000_hw *hw = &adapter->hw;
2378         if_t ifp = adapter->ifp;
2379         device_t dev = adapter->dev;
2380         struct tx_ring *txr = adapter->tx_rings;
2381         u32 link_check = 0;
2382
2383         /* Get the cached link value or read phy for real */
2384         switch (hw->phy.media_type) {
2385         case e1000_media_type_copper:
2386                 if (hw->mac.get_link_status) {
2387                         if (hw->mac.type == e1000_pch_spt)
2388                                 msec_delay(50);
2389                         /* Do the work to read phy */
2390                         e1000_check_for_link(hw);
2391                         link_check = !hw->mac.get_link_status;
2392                         if (link_check) /* ESB2 fix */
2393                                 e1000_cfg_on_link_up(hw);
2394                 } else
2395                         link_check = TRUE;
2396                 break;
2397         case e1000_media_type_fiber:
2398                 e1000_check_for_link(hw);
2399                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2400                                  E1000_STATUS_LU);
2401                 break;
2402         case e1000_media_type_internal_serdes:
2403                 e1000_check_for_link(hw);
2404                 link_check = adapter->hw.mac.serdes_has_link;
2405                 break;
2406         default:
2407         case e1000_media_type_unknown:
2408                 break;
2409         }
2410
2411         /* Now check for a transition */
2412         if (link_check && (adapter->link_active == 0)) {
2413                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2414                     &adapter->link_duplex);
2415                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2416                 if ((adapter->link_speed != SPEED_1000) &&
2417                     ((hw->mac.type == e1000_82571) ||
2418                     (hw->mac.type == e1000_82572))) {
2419                         int tarc0;
2420                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2421                         tarc0 &= ~TARC_SPEED_MODE_BIT;
2422                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2423                 }
2424                 if (bootverbose)
2425                         device_printf(dev, "Link is up %d Mbps %s\n",
2426                             adapter->link_speed,
2427                             ((adapter->link_duplex == FULL_DUPLEX) ?
2428                             "Full Duplex" : "Half Duplex"));
2429                 adapter->link_active = 1;
2430                 adapter->smartspeed = 0;
2431                 if_setbaudrate(ifp, adapter->link_speed * 1000000);
2432                 if_link_state_change(ifp, LINK_STATE_UP);
2433         } else if (!link_check && (adapter->link_active == 1)) {
2434                 if_setbaudrate(ifp, 0);
2435                 adapter->link_speed = 0;
2436                 adapter->link_duplex = 0;
2437                 if (bootverbose)
2438                         device_printf(dev, "Link is Down\n");
2439                 adapter->link_active = 0;
2440                 /* Link down, disable hang detection */
2441                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2442                         txr->busy = EM_TX_IDLE;
2443                 if_link_state_change(ifp, LINK_STATE_DOWN);
2444         }
2445 }
2446
2447 /*********************************************************************
2448  *
2449  *  This routine disables all traffic on the adapter by issuing a
2450  *  global reset on the MAC and deallocates TX/RX buffers.
2451  *
2452  *  This routine should always be called with BOTH the CORE
2453  *  and TX locks.
2454  **********************************************************************/
2455
2456 static void
2457 em_stop(void *arg)
2458 {
2459         struct adapter  *adapter = arg;
2460         if_t ifp = adapter->ifp;
2461         struct tx_ring  *txr = adapter->tx_rings;
2462
2463         EM_CORE_LOCK_ASSERT(adapter);
2464
2465         INIT_DEBUGOUT("em_stop: begin");
2466
2467         em_disable_intr(adapter);
2468         callout_stop(&adapter->timer);
2469
2470         /* Tell the stack that the interface is no longer active */
2471         if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2472
2473         /* Disarm Hang Detection. */
2474         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2475                 EM_TX_LOCK(txr);
2476                 txr->busy = EM_TX_IDLE;
2477                 EM_TX_UNLOCK(txr);
2478         }
2479
2480         /* I219 needs some special flushing to avoid hangs */
2481         if (adapter->hw.mac.type == e1000_pch_spt)
2482                 em_flush_desc_rings(adapter);
2483
2484         e1000_reset_hw(&adapter->hw);
2485         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2486
2487         e1000_led_off(&adapter->hw);
2488         e1000_cleanup_led(&adapter->hw);
2489 }
2490
2491
2492 /*********************************************************************
2493  *
2494  *  Determine hardware revision.
2495  *
2496  **********************************************************************/
2497 static void
2498 em_identify_hardware(struct adapter *adapter)
2499 {
2500         device_t dev = adapter->dev;
2501
2502         /* Make sure our PCI config space has the necessary stuff set */
2503         pci_enable_busmaster(dev);
2504         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2505
2506         /* Save off the information about this board */
2507         adapter->hw.vendor_id = pci_get_vendor(dev);
2508         adapter->hw.device_id = pci_get_device(dev);
2509         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2510         adapter->hw.subsystem_vendor_id =
2511             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2512         adapter->hw.subsystem_device_id =
2513             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2514
2515         /* Do Shared Code Init and Setup */
2516         if (e1000_set_mac_type(&adapter->hw)) {
2517                 device_printf(dev, "Setup init failure\n");
2518                 return;
2519         }
2520 }
2521
2522 static int
2523 em_allocate_pci_resources(struct adapter *adapter)
2524 {
2525         device_t        dev = adapter->dev;
2526         int             rid;
2527
2528         rid = PCIR_BAR(0);
2529         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2530             &rid, RF_ACTIVE);
2531         if (adapter->memory == NULL) {
2532                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2533                 return (ENXIO);
2534         }
2535         adapter->osdep.mem_bus_space_tag =
2536             rman_get_bustag(adapter->memory);
2537         adapter->osdep.mem_bus_space_handle =
2538             rman_get_bushandle(adapter->memory);
2539         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2540
2541         adapter->hw.back = &adapter->osdep;
2542
2543         return (0);
2544 }
2545
2546 /*********************************************************************
2547  *
2548  *  Setup the Legacy or MSI Interrupt handler
2549  *
2550  **********************************************************************/
2551 int
2552 em_allocate_legacy(struct adapter *adapter)
2553 {
2554         device_t dev = adapter->dev;
2555         struct tx_ring  *txr = adapter->tx_rings;
2556         int error, rid = 0;
2557
2558         /* Manually turn off all interrupts */
2559         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2560
2561         if (adapter->msix == 1) /* using MSI */
2562                 rid = 1;
2563         /* We allocate a single interrupt resource */
2564         adapter->res = bus_alloc_resource_any(dev,
2565             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2566         if (adapter->res == NULL) {
2567                 device_printf(dev, "Unable to allocate bus resource: "
2568                     "interrupt\n");
2569                 return (ENXIO);
2570         }
2571
2572         /*
2573          * Allocate a fast interrupt and the associated
2574          * deferred processing contexts.
2575          */
2576         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2577         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2578             taskqueue_thread_enqueue, &adapter->tq);
2579         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2580             device_get_nameunit(adapter->dev));
2581         /* Use a TX only tasklet for local timer */
2582         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2583         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2584             taskqueue_thread_enqueue, &txr->tq);
2585         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2586             device_get_nameunit(adapter->dev));
2587         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2588         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2589             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2590                 device_printf(dev, "Failed to register fast interrupt "
2591                             "handler: %d\n", error);
2592                 taskqueue_free(adapter->tq);
2593                 adapter->tq = NULL;
2594                 return (error);
2595         }
2596         
2597         return (0);
2598 }
2599
2600 /*********************************************************************
2601  *
2602  *  Setup the MSIX Interrupt handlers
2603  *   This is not really Multiqueue, rather
2604  *   its just separate interrupt vectors
2605  *   for TX, RX, and Link.
2606  *
2607  **********************************************************************/
2608 int
2609 em_allocate_msix(struct adapter *adapter)
2610 {
2611         device_t        dev = adapter->dev;
2612         struct          tx_ring *txr = adapter->tx_rings;
2613         struct          rx_ring *rxr = adapter->rx_rings;
2614         int             error, rid, vector = 0;
2615         int             cpu_id = 0;
2616
2617
2618         /* Make sure all interrupts are disabled */
2619         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2620
2621         /* First set up ring resources */
2622         for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2623
2624                 /* RX ring */
2625                 rid = vector + 1;
2626
2627                 rxr->res = bus_alloc_resource_any(dev,
2628                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2629                 if (rxr->res == NULL) {
2630                         device_printf(dev,
2631                             "Unable to allocate bus resource: "
2632                             "RX MSIX Interrupt %d\n", i);
2633                         return (ENXIO);
2634                 }
2635                 if ((error = bus_setup_intr(dev, rxr->res,
2636                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2637                     rxr, &rxr->tag)) != 0) {
2638                         device_printf(dev, "Failed to register RX handler");
2639                         return (error);
2640                 }
2641 #if __FreeBSD_version >= 800504
2642                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2643 #endif
2644                 rxr->msix = vector;
2645
2646                 if (em_last_bind_cpu < 0)
2647                         em_last_bind_cpu = CPU_FIRST();
2648                 cpu_id = em_last_bind_cpu;
2649                 bus_bind_intr(dev, rxr->res, cpu_id);
2650
2651                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2652                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2653                     taskqueue_thread_enqueue, &rxr->tq);
2654                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2655                     device_get_nameunit(adapter->dev), cpu_id);
2656                 /*
2657                 ** Set the bit to enable interrupt
2658                 ** in E1000_IMS -- bits 20 and 21
2659                 ** are for RX0 and RX1, note this has
2660                 ** NOTHING to do with the MSIX vector
2661                 */
2662                 rxr->ims = 1 << (20 + i);
2663                 adapter->ims |= rxr->ims;
2664                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2665
2666                 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2667         }
2668
2669         for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2670                 /* TX ring */
2671                 rid = vector + 1;
2672                 txr->res = bus_alloc_resource_any(dev,
2673                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2674                 if (txr->res == NULL) {
2675                         device_printf(dev,
2676                             "Unable to allocate bus resource: "
2677                             "TX MSIX Interrupt %d\n", i);
2678                         return (ENXIO);
2679                 }
2680                 if ((error = bus_setup_intr(dev, txr->res,
2681                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2682                     txr, &txr->tag)) != 0) {
2683                         device_printf(dev, "Failed to register TX handler");
2684                         return (error);
2685                 }
2686 #if __FreeBSD_version >= 800504
2687                 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2688 #endif
2689                 txr->msix = vector;
2690
2691                 if (em_last_bind_cpu < 0)
2692                         em_last_bind_cpu = CPU_FIRST();
2693                 cpu_id = em_last_bind_cpu;
2694                 bus_bind_intr(dev, txr->res, cpu_id);
2695
2696                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2697                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2698                     taskqueue_thread_enqueue, &txr->tq);
2699                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2700                     device_get_nameunit(adapter->dev), cpu_id);
2701                 /*
2702                 ** Set the bit to enable interrupt
2703                 ** in E1000_IMS -- bits 22 and 23
2704                 ** are for TX0 and TX1, note this has
2705                 ** NOTHING to do with the MSIX vector
2706                 */
2707                 txr->ims = 1 << (22 + i);
2708                 adapter->ims |= txr->ims;
2709                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2710
2711                 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2712         }
2713
2714         /* Link interrupt */
2715         rid = vector + 1;
2716         adapter->res = bus_alloc_resource_any(dev,
2717             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2718         if (!adapter->res) {
2719                 device_printf(dev,"Unable to allocate "
2720                     "bus resource: Link interrupt [%d]\n", rid);
2721                 return (ENXIO);
2722         }
2723         /* Set the link handler function */
2724         error = bus_setup_intr(dev, adapter->res,
2725             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2726             em_msix_link, adapter, &adapter->tag);
2727         if (error) {
2728                 adapter->res = NULL;
2729                 device_printf(dev, "Failed to register LINK handler");
2730                 return (error);
2731         }
2732 #if __FreeBSD_version >= 800504
2733         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2734 #endif
2735         adapter->linkvec = vector;
2736         adapter->ivars |=  (8 | vector) << 16;
2737         adapter->ivars |= 0x80000000;
2738
2739         return (0);
2740 }
2741
2742
2743 static void
2744 em_free_pci_resources(struct adapter *adapter)
2745 {
2746         device_t        dev = adapter->dev;
2747         struct tx_ring  *txr;
2748         struct rx_ring  *rxr;
2749         int             rid;
2750
2751
2752         /*
2753         ** Release all the queue interrupt resources:
2754         */
2755         for (int i = 0; i < adapter->num_queues; i++) {
2756                 txr = &adapter->tx_rings[i];
2757                 /* an early abort? */
2758                 if (txr == NULL)
2759                         break;
2760                 rid = txr->msix +1;
2761                 if (txr->tag != NULL) {
2762                         bus_teardown_intr(dev, txr->res, txr->tag);
2763                         txr->tag = NULL;
2764                 }
2765                 if (txr->res != NULL)
2766                         bus_release_resource(dev, SYS_RES_IRQ,
2767                             rid, txr->res);
2768
2769                 rxr = &adapter->rx_rings[i];
2770                 /* an early abort? */
2771                 if (rxr == NULL)
2772                         break;
2773                 rid = rxr->msix +1;
2774                 if (rxr->tag != NULL) {
2775                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2776                         rxr->tag = NULL;
2777                 }
2778                 if (rxr->res != NULL)
2779                         bus_release_resource(dev, SYS_RES_IRQ,
2780                             rid, rxr->res);
2781         }
2782
2783         if (adapter->linkvec) /* we are doing MSIX */
2784                 rid = adapter->linkvec + 1;
2785         else
2786                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2787
2788         if (adapter->tag != NULL) {
2789                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2790                 adapter->tag = NULL;
2791         }
2792
2793         if (adapter->res != NULL)
2794                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2795
2796
2797         if (adapter->msix)
2798                 pci_release_msi(dev);
2799
2800         if (adapter->msix_mem != NULL)
2801                 bus_release_resource(dev, SYS_RES_MEMORY,
2802                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2803
2804         if (adapter->memory != NULL)
2805                 bus_release_resource(dev, SYS_RES_MEMORY,
2806                     PCIR_BAR(0), adapter->memory);
2807
2808         if (adapter->flash != NULL)
2809                 bus_release_resource(dev, SYS_RES_MEMORY,
2810                     EM_FLASH, adapter->flash);
2811 }
2812
2813 /*
2814  * Setup MSI or MSI/X
2815  */
2816 static int
2817 em_setup_msix(struct adapter *adapter)
2818 {
2819         device_t dev = adapter->dev;
2820         int val;
2821
2822         /* Nearly always going to use one queue */
2823         adapter->num_queues = 1;
2824
2825         /*
2826         ** Try using MSI-X for Hartwell adapters
2827         */
2828         if ((adapter->hw.mac.type == e1000_82574) &&
2829             (em_enable_msix == TRUE)) {
2830 #ifdef EM_MULTIQUEUE
2831                 adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2832                 if (adapter->num_queues > 1)
2833                         em_enable_vectors_82574(adapter);
2834 #endif
2835                 /* Map the MSIX BAR */
2836                 int rid = PCIR_BAR(EM_MSIX_BAR);
2837                 adapter->msix_mem = bus_alloc_resource_any(dev,
2838                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2839                 if (adapter->msix_mem == NULL) {
2840                         /* May not be enabled */
2841                         device_printf(adapter->dev,
2842                             "Unable to map MSIX table \n");
2843                         goto msi;
2844                 }
2845                 val = pci_msix_count(dev); 
2846
2847 #ifdef EM_MULTIQUEUE
2848                 /* We need 5 vectors in the multiqueue case */
2849                 if (adapter->num_queues > 1 ) {
2850                         if (val >= 5)
2851                                 val = 5;
2852                         else {
2853                                 adapter->num_queues = 1;
2854                                 device_printf(adapter->dev,
2855                                     "Insufficient MSIX vectors for >1 queue, "
2856                                     "using single queue...\n");
2857                                 goto msix_one;
2858                         }
2859                 } else {
2860 msix_one:
2861 #endif
2862                         if (val >= 3)
2863                                 val = 3;
2864                         else {
2865                                 device_printf(adapter->dev,
2866                                 "Insufficient MSIX vectors, using MSI\n");
2867                                 goto msi;
2868                         }
2869 #ifdef EM_MULTIQUEUE
2870                 }
2871 #endif
2872
2873                 if ((pci_alloc_msix(dev, &val) == 0)) {
2874                         device_printf(adapter->dev,
2875                             "Using MSIX interrupts "
2876                             "with %d vectors\n", val);
2877                         return (val);
2878                 }
2879
2880                 /*
2881                 ** If MSIX alloc failed or provided us with
2882                 ** less than needed, free and fall through to MSI
2883                 */
2884                 pci_release_msi(dev);
2885         }
2886 msi:
2887         if (adapter->msix_mem != NULL) {
2888                 bus_release_resource(dev, SYS_RES_MEMORY,
2889                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2890                 adapter->msix_mem = NULL;
2891         }
2892         val = 1;
2893         if (pci_alloc_msi(dev, &val) == 0) {
2894                 device_printf(adapter->dev, "Using an MSI interrupt\n");
2895                 return (val);
2896         } 
2897         /* Should only happen due to manual configuration */
2898         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2899         return (0);
2900 }
2901
2902
2903 /*
2904 ** The 3 following flush routines are used as a workaround in the
2905 ** I219 client parts and only for them.
2906 **
2907 ** em_flush_tx_ring - remove all descriptors from the tx_ring
2908 **
2909 ** We want to clear all pending descriptors from the TX ring.
2910 ** zeroing happens when the HW reads the regs. We  assign the ring itself as
2911 ** the data of the next descriptor. We don't care about the data we are about
2912 ** to reset the HW.
2913 */
2914 static void
2915 em_flush_tx_ring(struct adapter *adapter)
2916 {
2917         struct e1000_hw         *hw = &adapter->hw;
2918         struct tx_ring          *txr = adapter->tx_rings;
2919         struct e1000_tx_desc    *txd;
2920         u32                     tctl, txd_lower = E1000_TXD_CMD_IFCS;
2921         u16                     size = 512;
2922
2923         tctl = E1000_READ_REG(hw, E1000_TCTL);
2924         E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2925
2926         txd = &txr->tx_base[txr->next_avail_desc++];
2927         if (txr->next_avail_desc == adapter->num_tx_desc)
2928                 txr->next_avail_desc = 0;
2929
2930         /* Just use the ring as a dummy buffer addr */
2931         txd->buffer_addr = txr->txdma.dma_paddr;
2932         txd->lower.data = htole32(txd_lower | size);
2933         txd->upper.data = 0;
2934
2935         /* flush descriptors to memory before notifying the HW */
2936         wmb();
2937
2938         E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
2939         mb();
2940         usec_delay(250);
2941 }
2942
2943 /*
2944 ** em_flush_rx_ring - remove all descriptors from the rx_ring
2945 **
2946 ** Mark all descriptors in the RX ring as consumed and disable the rx ring
2947 */
2948 static void
2949 em_flush_rx_ring(struct adapter *adapter)
2950 {
2951         struct e1000_hw *hw = &adapter->hw;
2952         u32             rctl, rxdctl;
2953
2954         rctl = E1000_READ_REG(hw, E1000_RCTL);
2955         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2956         E1000_WRITE_FLUSH(hw);
2957         usec_delay(150);
2958
2959         rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
2960         /* zero the lower 14 bits (prefetch and host thresholds) */
2961         rxdctl &= 0xffffc000;
2962         /*
2963          * update thresholds: prefetch threshold to 31, host threshold to 1
2964          * and make sure the granularity is "descriptors" and not "cache lines"
2965          */
2966         rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
2967         E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
2968
2969         /* momentarily enable the RX ring for the changes to take effect */
2970         E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
2971         E1000_WRITE_FLUSH(hw);
2972         usec_delay(150);
2973         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2974 }
2975
2976 /*
2977 ** em_flush_desc_rings - remove all descriptors from the descriptor rings
2978 **
2979 ** In i219, the descriptor rings must be emptied before resetting the HW
2980 ** or before changing the device state to D3 during runtime (runtime PM).
2981 **
2982 ** Failure to do this will cause the HW to enter a unit hang state which can
2983 ** only be released by PCI reset on the device
2984 **
2985 */
2986 static void
2987 em_flush_desc_rings(struct adapter *adapter)
2988 {
2989         struct e1000_hw *hw = &adapter->hw;
2990         device_t        dev = adapter->dev;
2991         u16             hang_state;
2992         u32             fext_nvm11, tdlen;
2993  
2994         /* First, disable MULR fix in FEXTNVM11 */
2995         fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
2996         fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
2997         E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
2998         
2999         /* do nothing if we're not in faulty state, or if the queue is empty */
3000         tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
3001         hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3002         if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3003                 return;
3004         em_flush_tx_ring(adapter);
3005
3006         /* recheck, maybe the fault is caused by the rx ring */
3007         hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3008         if (hang_state & FLUSH_DESC_REQUIRED)
3009                 em_flush_rx_ring(adapter);
3010 }
3011
3012
3013 /*********************************************************************
3014  *
3015  *  Initialize the hardware to a configuration
3016  *  as specified by the adapter structure.
3017  *
3018  **********************************************************************/
3019 static void
3020 em_reset(struct adapter *adapter)
3021 {
3022         device_t        dev = adapter->dev;
3023         if_t ifp = adapter->ifp;
3024         struct e1000_hw *hw = &adapter->hw;
3025         u16             rx_buffer_size;
3026         u32             pba;
3027
3028         INIT_DEBUGOUT("em_reset: begin");
3029
3030         /* Set up smart power down as default off on newer adapters. */
3031         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3032             hw->mac.type == e1000_82572)) {
3033                 u16 phy_tmp = 0;
3034
3035                 /* Speed up time to link by disabling smart power down. */
3036                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3037                 phy_tmp &= ~IGP02E1000_PM_SPD;
3038                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3039         }
3040
3041         /*
3042          * Packet Buffer Allocation (PBA)
3043          * Writing PBA sets the receive portion of the buffer
3044          * the remainder is used for the transmit buffer.
3045          */
3046         switch (hw->mac.type) {
3047         /* Total Packet Buffer on these is 48K */
3048         case e1000_82571:
3049         case e1000_82572:
3050         case e1000_80003es2lan:
3051                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3052                 break;
3053         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3054                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3055                 break;
3056         case e1000_82574:
3057         case e1000_82583:
3058                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3059                 break;
3060         case e1000_ich8lan:
3061                 pba = E1000_PBA_8K;
3062                 break;
3063         case e1000_ich9lan:
3064         case e1000_ich10lan:
3065                 /* Boost Receive side for jumbo frames */
3066                 if (adapter->hw.mac.max_frame_size > 4096)
3067                         pba = E1000_PBA_14K;
3068                 else
3069                         pba = E1000_PBA_10K;
3070                 break;
3071         case e1000_pchlan:
3072         case e1000_pch2lan:
3073         case e1000_pch_lpt:
3074         case e1000_pch_spt:
3075                 pba = E1000_PBA_26K;
3076                 break;
3077         default:
3078                 if (adapter->hw.mac.max_frame_size > 8192)
3079                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3080                 else
3081                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3082         }
3083         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3084
3085         /*
3086          * These parameters control the automatic generation (Tx) and
3087          * response (Rx) to Ethernet PAUSE frames.
3088          * - High water mark should allow for at least two frames to be
3089          *   received after sending an XOFF.
3090          * - Low water mark works best when it is very near the high water mark.
3091          *   This allows the receiver to restart by sending XON when it has
3092          *   drained a bit. Here we use an arbitrary value of 1500 which will
3093          *   restart after one full frame is pulled from the buffer. There
3094          *   could be several smaller frames in the buffer and if so they will
3095          *   not trigger the XON until their total number reduces the buffer
3096          *   by 1500.
3097          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3098          */
3099         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3100         hw->fc.high_water = rx_buffer_size -
3101             roundup2(adapter->hw.mac.max_frame_size, 1024);
3102         hw->fc.low_water = hw->fc.high_water - 1500;
3103
3104         if (adapter->fc) /* locally set flow control value? */
3105                 hw->fc.requested_mode = adapter->fc;
3106         else
3107                 hw->fc.requested_mode = e1000_fc_full;
3108
3109         if (hw->mac.type == e1000_80003es2lan)
3110                 hw->fc.pause_time = 0xFFFF;
3111         else
3112                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
3113
3114         hw->fc.send_xon = TRUE;
3115
3116         /* Device specific overrides/settings */
3117         switch (hw->mac.type) {
3118         case e1000_pchlan:
3119                 /* Workaround: no TX flow ctrl for PCH */
3120                 hw->fc.requested_mode = e1000_fc_rx_pause;
3121                 hw->fc.pause_time = 0xFFFF; /* override */
3122                 if (if_getmtu(ifp) > ETHERMTU) {
3123                         hw->fc.high_water = 0x3500;
3124                         hw->fc.low_water = 0x1500;
3125                 } else {
3126                         hw->fc.high_water = 0x5000;
3127                         hw->fc.low_water = 0x3000;
3128                 }
3129                 hw->fc.refresh_time = 0x1000;
3130                 break;
3131         case e1000_pch2lan:
3132         case e1000_pch_lpt:
3133         case e1000_pch_spt:
3134                 hw->fc.high_water = 0x5C20;
3135                 hw->fc.low_water = 0x5048;
3136                 hw->fc.pause_time = 0x0650;
3137                 hw->fc.refresh_time = 0x0400;
3138                 /* Jumbos need adjusted PBA */
3139                 if (if_getmtu(ifp) > ETHERMTU)
3140                         E1000_WRITE_REG(hw, E1000_PBA, 12);
3141                 else
3142                         E1000_WRITE_REG(hw, E1000_PBA, 26);
3143                 break;
3144         case e1000_ich9lan:
3145         case e1000_ich10lan:
3146                 if (if_getmtu(ifp) > ETHERMTU) {
3147                         hw->fc.high_water = 0x2800;
3148                         hw->fc.low_water = hw->fc.high_water - 8;
3149                         break;
3150                 } 
3151                 /* else fall thru */
3152         default:
3153                 if (hw->mac.type == e1000_80003es2lan)
3154                         hw->fc.pause_time = 0xFFFF;
3155                 break;
3156         }
3157
3158         /* I219 needs some special flushing to avoid hangs */
3159         if (hw->mac.type == e1000_pch_spt)
3160                 em_flush_desc_rings(adapter);
3161
3162         /* Issue a global reset */
3163         e1000_reset_hw(hw);
3164         E1000_WRITE_REG(hw, E1000_WUC, 0);
3165         em_disable_aspm(adapter);
3166         /* and a re-init */
3167         if (e1000_init_hw(hw) < 0) {
3168                 device_printf(dev, "Hardware Initialization Failed\n");
3169                 return;
3170         }
3171
3172         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3173         e1000_get_phy_info(hw);
3174         e1000_check_for_link(hw);
3175         return;
3176 }
3177
3178 /*********************************************************************
3179  *
3180  *  Setup networking device structure and register an interface.
3181  *
3182  **********************************************************************/
3183 static int
3184 em_setup_interface(device_t dev, struct adapter *adapter)
3185 {
3186         if_t ifp;
3187
3188         INIT_DEBUGOUT("em_setup_interface: begin");
3189
3190         ifp = adapter->ifp = if_gethandle(IFT_ETHER);
3191         if (ifp == 0) {
3192                 device_printf(dev, "can not allocate ifnet structure\n");
3193                 return (-1);
3194         }
3195         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3196         if_setdev(ifp, dev);
3197         if_setinitfn(ifp, em_init);
3198         if_setsoftc(ifp, adapter);
3199         if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
3200         if_setioctlfn(ifp, em_ioctl);
3201         if_setgetcounterfn(ifp, em_get_counter);
3202
3203         /* TSO parameters */
3204         ifp->if_hw_tsomax = IP_MAXPACKET;
3205         /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
3206         ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
3207         ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3208
3209 #ifdef EM_MULTIQUEUE
3210         /* Multiqueue stack interface */
3211         if_settransmitfn(ifp, em_mq_start);
3212         if_setqflushfn(ifp, em_qflush);
3213 #else
3214         if_setstartfn(ifp, em_start);
3215         if_setsendqlen(ifp, adapter->num_tx_desc - 1);
3216         if_setsendqready(ifp);
3217 #endif  
3218
3219         ether_ifattach(ifp, adapter->hw.mac.addr);
3220
3221         if_setcapabilities(ifp, 0);
3222         if_setcapenable(ifp, 0);
3223
3224
3225         if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
3226             IFCAP_TSO4, 0);
3227         /*
3228          * Tell the upper layer(s) we
3229          * support full VLAN capability
3230          */
3231         if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
3232         if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
3233             IFCAP_VLAN_MTU, 0);
3234         if_setcapenable(ifp, if_getcapabilities(ifp));
3235
3236         /*
3237         ** Don't turn this on by default, if vlans are
3238         ** created on another pseudo device (eg. lagg)
3239         ** then vlan events are not passed thru, breaking
3240         ** operation, but with HW FILTER off it works. If
3241         ** using vlans directly on the em driver you can
3242         ** enable this and get full hardware tag filtering.
3243         */
3244         if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
3245
3246 #ifdef DEVICE_POLLING
3247         if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
3248 #endif
3249
3250         /* Enable only WOL MAGIC by default */
3251         if (adapter->wol) {
3252                 if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
3253                 if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
3254         }
3255                 
3256         /*
3257          * Specify the media types supported by this adapter and register
3258          * callbacks to update media and link information
3259          */
3260         ifmedia_init(&adapter->media, IFM_IMASK,
3261             em_media_change, em_media_status);
3262         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3263             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3264                 u_char fiber_type = IFM_1000_SX;        /* default type */
3265
3266                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
3267                             0, NULL);
3268                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3269         } else {
3270                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3271                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3272                             0, NULL);
3273                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3274                             0, NULL);
3275                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3276                             0, NULL);
3277                 if (adapter->hw.phy.type != e1000_phy_ife) {
3278                         ifmedia_add(&adapter->media,
3279                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3280                         ifmedia_add(&adapter->media,
3281                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3282                 }
3283         }
3284         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3285         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3286         return (0);
3287 }
3288
3289
3290 /*
3291  * Manage DMA'able memory.
3292  */
3293 static void
3294 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3295 {
3296         if (error)
3297                 return;
3298         *(bus_addr_t *) arg = segs[0].ds_addr;
3299 }
3300
3301 static int
3302 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3303         struct em_dma_alloc *dma, int mapflags)
3304 {
3305         int error;
3306
3307         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3308                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3309                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3310                                 BUS_SPACE_MAXADDR,      /* highaddr */
3311                                 NULL, NULL,             /* filter, filterarg */
3312                                 size,                   /* maxsize */
3313                                 1,                      /* nsegments */
3314                                 size,                   /* maxsegsize */
3315                                 0,                      /* flags */
3316                                 NULL,                   /* lockfunc */
3317                                 NULL,                   /* lockarg */
3318                                 &dma->dma_tag);
3319         if (error) {
3320                 device_printf(adapter->dev,
3321                     "%s: bus_dma_tag_create failed: %d\n",
3322                     __func__, error);
3323                 goto fail_0;
3324         }
3325
3326         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3327             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3328         if (error) {
3329                 device_printf(adapter->dev,
3330                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3331                     __func__, (uintmax_t)size, error);
3332                 goto fail_2;
3333         }
3334
3335         dma->dma_paddr = 0;
3336         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3337             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3338         if (error || dma->dma_paddr == 0) {
3339                 device_printf(adapter->dev,
3340                     "%s: bus_dmamap_load failed: %d\n",
3341                     __func__, error);
3342                 goto fail_3;
3343         }
3344
3345         return (0);
3346
3347 fail_3:
3348         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3349 fail_2:
3350         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3351         bus_dma_tag_destroy(dma->dma_tag);
3352 fail_0:
3353         dma->dma_tag = NULL;
3354
3355         return (error);
3356 }
3357
3358 static void
3359 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3360 {
3361         if (dma->dma_tag == NULL)
3362                 return;
3363         if (dma->dma_paddr != 0) {
3364                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3365                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3366                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3367                 dma->dma_paddr = 0;
3368         }
3369         if (dma->dma_vaddr != NULL) {
3370                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3371                 dma->dma_vaddr = NULL;
3372         }
3373         bus_dma_tag_destroy(dma->dma_tag);
3374         dma->dma_tag = NULL;
3375 }
3376
3377
3378 /*********************************************************************
3379  *
3380  *  Allocate memory for the transmit and receive rings, and then
3381  *  the descriptors associated with each, called only once at attach.
3382  *
3383  **********************************************************************/
3384 static int
3385 em_allocate_queues(struct adapter *adapter)
3386 {
3387         device_t                dev = adapter->dev;
3388         struct tx_ring          *txr = NULL;
3389         struct rx_ring          *rxr = NULL;
3390         int rsize, tsize, error = E1000_SUCCESS;
3391         int txconf = 0, rxconf = 0;
3392
3393
3394         /* Allocate the TX ring struct memory */
3395         if (!(adapter->tx_rings =
3396             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3397             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3398                 device_printf(dev, "Unable to allocate TX ring memory\n");
3399                 error = ENOMEM;
3400                 goto fail;
3401         }
3402
3403         /* Now allocate the RX */
3404         if (!(adapter->rx_rings =
3405             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3406             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3407                 device_printf(dev, "Unable to allocate RX ring memory\n");
3408                 error = ENOMEM;
3409                 goto rx_fail;
3410         }
3411
3412         tsize = roundup2(adapter->num_tx_desc *
3413             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3414         /*
3415          * Now set up the TX queues, txconf is needed to handle the
3416          * possibility that things fail midcourse and we need to
3417          * undo memory gracefully
3418          */ 
3419         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3420                 /* Set up some basics */
3421                 txr = &adapter->tx_rings[i];
3422                 txr->adapter = adapter;
3423                 txr->me = i;
3424
3425                 /* Initialize the TX lock */
3426                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3427                     device_get_nameunit(dev), txr->me);
3428                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3429
3430                 if (em_dma_malloc(adapter, tsize,
3431                         &txr->txdma, BUS_DMA_NOWAIT)) {
3432                         device_printf(dev,
3433                             "Unable to allocate TX Descriptor memory\n");
3434                         error = ENOMEM;
3435                         goto err_tx_desc;
3436                 }
3437                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3438                 bzero((void *)txr->tx_base, tsize);
3439
3440                 if (em_allocate_transmit_buffers(txr)) {
3441                         device_printf(dev,
3442                             "Critical Failure setting up transmit buffers\n");
3443                         error = ENOMEM;
3444                         goto err_tx_desc;
3445                 }
3446 #if __FreeBSD_version >= 800000
3447                 /* Allocate a buf ring */
3448                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3449                     M_WAITOK, &txr->tx_mtx);
3450 #endif
3451         }
3452
3453         /*
3454          * Next the RX queues...
3455          */ 
3456         rsize = roundup2(adapter->num_rx_desc *
3457             sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3458         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3459                 rxr = &adapter->rx_rings[i];
3460                 rxr->adapter = adapter;
3461                 rxr->me = i;
3462
3463                 /* Initialize the RX lock */
3464                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3465                     device_get_nameunit(dev), txr->me);
3466                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3467
3468                 if (em_dma_malloc(adapter, rsize,
3469                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3470                         device_printf(dev,
3471                             "Unable to allocate RxDescriptor memory\n");
3472                         error = ENOMEM;
3473                         goto err_rx_desc;
3474                 }
3475                 rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3476                 bzero((void *)rxr->rx_base, rsize);
3477
3478                 /* Allocate receive buffers for the ring*/
3479                 if (em_allocate_receive_buffers(rxr)) {
3480                         device_printf(dev,
3481                             "Critical Failure setting up receive buffers\n");
3482                         error = ENOMEM;
3483                         goto err_rx_desc;
3484                 }
3485         }
3486
3487         return (0);
3488
3489 err_rx_desc:
3490         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3491                 em_dma_free(adapter, &rxr->rxdma);
3492 err_tx_desc:
3493         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3494                 em_dma_free(adapter, &txr->txdma);
3495         free(adapter->rx_rings, M_DEVBUF);
3496 rx_fail:
3497 #if __FreeBSD_version >= 800000
3498         buf_ring_free(txr->br, M_DEVBUF);
3499 #endif
3500         free(adapter->tx_rings, M_DEVBUF);
3501 fail:
3502         return (error);
3503 }
3504
3505
3506 /*********************************************************************
3507  *
3508  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3509  *  the information needed to transmit a packet on the wire. This is
3510  *  called only once at attach, setup is done every reset.
3511  *
3512  **********************************************************************/
3513 static int
3514 em_allocate_transmit_buffers(struct tx_ring *txr)
3515 {
3516         struct adapter *adapter = txr->adapter;
3517         device_t dev = adapter->dev;
3518         struct em_txbuffer *txbuf;
3519         int error, i;
3520
3521         /*
3522          * Setup DMA descriptor areas.
3523          */
3524         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3525                                1, 0,                    /* alignment, bounds */
3526                                BUS_SPACE_MAXADDR,       /* lowaddr */
3527                                BUS_SPACE_MAXADDR,       /* highaddr */
3528                                NULL, NULL,              /* filter, filterarg */
3529                                EM_TSO_SIZE,             /* maxsize */
3530                                EM_MAX_SCATTER,          /* nsegments */
3531                                PAGE_SIZE,               /* maxsegsize */
3532                                0,                       /* flags */
3533                                NULL,                    /* lockfunc */
3534                                NULL,                    /* lockfuncarg */
3535                                &txr->txtag))) {
3536                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3537                 goto fail;
3538         }
3539
3540         if (!(txr->tx_buffers =
3541             (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3542             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3543                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3544                 error = ENOMEM;
3545                 goto fail;
3546         }
3547
3548         /* Create the descriptor buffer dma maps */
3549         txbuf = txr->tx_buffers;
3550         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3551                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3552                 if (error != 0) {
3553                         device_printf(dev, "Unable to create TX DMA map\n");
3554                         goto fail;
3555                 }
3556         }
3557
3558         return 0;
3559 fail:
3560         /* We free all, it handles case where we are in the middle */
3561         em_free_transmit_structures(adapter);
3562         return (error);
3563 }
3564
3565 /*********************************************************************
3566  *
3567  *  Initialize a transmit ring.
3568  *
3569  **********************************************************************/
3570 static void
3571 em_setup_transmit_ring(struct tx_ring *txr)
3572 {
3573         struct adapter *adapter = txr->adapter;
3574         struct em_txbuffer *txbuf;
3575         int i;
3576 #ifdef DEV_NETMAP
3577         struct netmap_slot *slot;
3578         struct netmap_adapter *na = netmap_getna(adapter->ifp);
3579 #endif /* DEV_NETMAP */
3580
3581         /* Clear the old descriptor contents */
3582         EM_TX_LOCK(txr);
3583 #ifdef DEV_NETMAP
3584         slot = netmap_reset(na, NR_TX, txr->me, 0);
3585 #endif /* DEV_NETMAP */
3586
3587         bzero((void *)txr->tx_base,
3588               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3589         /* Reset indices */
3590         txr->next_avail_desc = 0;
3591         txr->next_to_clean = 0;
3592
3593         /* Free any existing tx buffers. */
3594         txbuf = txr->tx_buffers;
3595         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3596                 if (txbuf->m_head != NULL) {
3597                         bus_dmamap_sync(txr->txtag, txbuf->map,
3598                             BUS_DMASYNC_POSTWRITE);
3599                         bus_dmamap_unload(txr->txtag, txbuf->map);
3600                         m_freem(txbuf->m_head);
3601                         txbuf->m_head = NULL;
3602                 }
3603 #ifdef DEV_NETMAP
3604                 if (slot) {
3605                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3606                         uint64_t paddr;
3607                         void *addr;
3608
3609                         addr = PNMB(na, slot + si, &paddr);
3610                         txr->tx_base[i].buffer_addr = htole64(paddr);
3611                         /* reload the map for netmap mode */
3612                         netmap_load_map(na, txr->txtag, txbuf->map, addr);
3613                 }
3614 #endif /* DEV_NETMAP */
3615
3616                 /* clear the watch index */
3617                 txbuf->next_eop = -1;
3618         }
3619
3620         /* Set number of descriptors available */
3621         txr->tx_avail = adapter->num_tx_desc;
3622         txr->busy = EM_TX_IDLE;
3623
3624         /* Clear checksum offload context. */
3625         txr->last_hw_offload = 0;
3626         txr->last_hw_ipcss = 0;
3627         txr->last_hw_ipcso = 0;
3628         txr->last_hw_tucss = 0;
3629         txr->last_hw_tucso = 0;
3630
3631         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3632             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3633         EM_TX_UNLOCK(txr);
3634 }
3635
3636 /*********************************************************************
3637  *
3638  *  Initialize all transmit rings.
3639  *
3640  **********************************************************************/
3641 static void
3642 em_setup_transmit_structures(struct adapter *adapter)
3643 {
3644         struct tx_ring *txr = adapter->tx_rings;
3645
3646         for (int i = 0; i < adapter->num_queues; i++, txr++)
3647                 em_setup_transmit_ring(txr);
3648
3649         return;
3650 }
3651
3652 /*********************************************************************
3653  *
3654  *  Enable transmit unit.
3655  *
3656  **********************************************************************/
3657 static void
3658 em_initialize_transmit_unit(struct adapter *adapter)
3659 {
3660         struct tx_ring  *txr = adapter->tx_rings;
3661         struct e1000_hw *hw = &adapter->hw;
3662         u32     tctl, txdctl = 0, tarc, tipg = 0;
3663
3664          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3665
3666         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3667                 u64 bus_addr = txr->txdma.dma_paddr;
3668                 /* Base and Len of TX Ring */
3669                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3670                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3671                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3672                     (u32)(bus_addr >> 32));
3673                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3674                     (u32)bus_addr);
3675                 /* Init the HEAD/TAIL indices */
3676                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3677                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3678
3679                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3680                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3681                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3682
3683                 txr->busy = EM_TX_IDLE;
3684                 txdctl = 0; /* clear txdctl */
3685                 txdctl |= 0x1f; /* PTHRESH */
3686                 txdctl |= 1 << 8; /* HTHRESH */
3687                 txdctl |= 1 << 16;/* WTHRESH */
3688                 txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3689                 txdctl |= E1000_TXDCTL_GRAN;
3690                 txdctl |= 1 << 25; /* LWTHRESH */
3691
3692                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3693         }
3694
3695         /* Set the default values for the Tx Inter Packet Gap timer */
3696         switch (adapter->hw.mac.type) {
3697         case e1000_80003es2lan:
3698                 tipg = DEFAULT_82543_TIPG_IPGR1;
3699                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3700                     E1000_TIPG_IPGR2_SHIFT;
3701                 break;
3702         default:
3703                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3704                     (adapter->hw.phy.media_type ==
3705                     e1000_media_type_internal_serdes))
3706                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3707                 else
3708                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3709                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3710                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3711         }
3712
3713         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3714         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3715
3716         if(adapter->hw.mac.type >= e1000_82540)
3717                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3718                     adapter->tx_abs_int_delay.value);
3719
3720         if ((adapter->hw.mac.type == e1000_82571) ||
3721             (adapter->hw.mac.type == e1000_82572)) {
3722                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3723                 tarc |= TARC_SPEED_MODE_BIT;
3724                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3725         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3726                 /* errata: program both queues to unweighted RR */
3727                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3728                 tarc |= 1;
3729                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3730                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3731                 tarc |= 1;
3732                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3733         } else if (adapter->hw.mac.type == e1000_82574) {
3734                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3735                 tarc |= TARC_ERRATA_BIT;
3736                 if ( adapter->num_queues > 1) {
3737                         tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3738                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3739                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3740                 } else
3741                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3742         }
3743
3744         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3745         if (adapter->tx_int_delay.value > 0)
3746                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3747
3748         /* Program the Transmit Control Register */
3749         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3750         tctl &= ~E1000_TCTL_CT;
3751         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3752                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3753
3754         if (adapter->hw.mac.type >= e1000_82571)
3755                 tctl |= E1000_TCTL_MULR;
3756
3757         /* This write will effectively turn on the transmit unit. */
3758         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3759
3760         if (hw->mac.type == e1000_pch_spt) {
3761                 u32 reg;
3762                 reg = E1000_READ_REG(hw, E1000_IOSFPC);
3763                 reg |= E1000_RCTL_RDMTS_HEX;
3764                 E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3765                 reg = E1000_READ_REG(hw, E1000_TARC(0));
3766                 reg |= E1000_TARC0_CB_MULTIQ_3_REQ;
3767                 E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3768         }
3769 }
3770
3771
3772 /*********************************************************************
3773  *
3774  *  Free all transmit rings.
3775  *
3776  **********************************************************************/
3777 static void
3778 em_free_transmit_structures(struct adapter *adapter)
3779 {
3780         struct tx_ring *txr = adapter->tx_rings;
3781
3782         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3783                 EM_TX_LOCK(txr);
3784                 em_free_transmit_buffers(txr);
3785                 em_dma_free(adapter, &txr->txdma);
3786                 EM_TX_UNLOCK(txr);
3787                 EM_TX_LOCK_DESTROY(txr);
3788         }
3789
3790         free(adapter->tx_rings, M_DEVBUF);
3791 }
3792
3793 /*********************************************************************
3794  *
3795  *  Free transmit ring related data structures.
3796  *
3797  **********************************************************************/
3798 static void
3799 em_free_transmit_buffers(struct tx_ring *txr)
3800 {
3801         struct adapter          *adapter = txr->adapter;
3802         struct em_txbuffer      *txbuf;
3803
3804         INIT_DEBUGOUT("free_transmit_ring: begin");
3805
3806         if (txr->tx_buffers == NULL)
3807                 return;
3808
3809         for (int i = 0; i < adapter->num_tx_desc; i++) {
3810                 txbuf = &txr->tx_buffers[i];
3811                 if (txbuf->m_head != NULL) {
3812                         bus_dmamap_sync(txr->txtag, txbuf->map,
3813                             BUS_DMASYNC_POSTWRITE);
3814                         bus_dmamap_unload(txr->txtag,
3815                             txbuf->map);
3816                         m_freem(txbuf->m_head);
3817                         txbuf->m_head = NULL;
3818                         if (txbuf->map != NULL) {
3819                                 bus_dmamap_destroy(txr->txtag,
3820                                     txbuf->map);
3821                                 txbuf->map = NULL;
3822                         }
3823                 } else if (txbuf->map != NULL) {
3824                         bus_dmamap_unload(txr->txtag,
3825                             txbuf->map);
3826                         bus_dmamap_destroy(txr->txtag,
3827                             txbuf->map);
3828                         txbuf->map = NULL;
3829                 }
3830         }
3831 #if __FreeBSD_version >= 800000
3832         if (txr->br != NULL)
3833                 buf_ring_free(txr->br, M_DEVBUF);
3834 #endif
3835         if (txr->tx_buffers != NULL) {
3836                 free(txr->tx_buffers, M_DEVBUF);
3837                 txr->tx_buffers = NULL;
3838         }
3839         if (txr->txtag != NULL) {
3840                 bus_dma_tag_destroy(txr->txtag);
3841                 txr->txtag = NULL;
3842         }
3843         return;
3844 }
3845
3846
3847 /*********************************************************************
3848  *  The offload context is protocol specific (TCP/UDP) and thus
3849  *  only needs to be set when the protocol changes. The occasion
3850  *  of a context change can be a performance detriment, and
3851  *  might be better just disabled. The reason arises in the way
3852  *  in which the controller supports pipelined requests from the
3853  *  Tx data DMA. Up to four requests can be pipelined, and they may
3854  *  belong to the same packet or to multiple packets. However all
3855  *  requests for one packet are issued before a request is issued
3856  *  for a subsequent packet and if a request for the next packet
3857  *  requires a context change, that request will be stalled
3858  *  until the previous request completes. This means setting up
3859  *  a new context effectively disables pipelined Tx data DMA which
3860  *  in turn greatly slow down performance to send small sized
3861  *  frames. 
3862  **********************************************************************/
3863 static void
3864 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3865     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3866 {
3867         struct adapter                  *adapter = txr->adapter;
3868         struct e1000_context_desc       *TXD = NULL;
3869         struct em_txbuffer              *tx_buffer;
3870         int                             cur, hdr_len;
3871         u32                             cmd = 0;
3872         u16                             offload = 0;
3873         u8                              ipcso, ipcss, tucso, tucss;
3874
3875         ipcss = ipcso = tucss = tucso = 0;
3876         hdr_len = ip_off + (ip->ip_hl << 2);
3877         cur = txr->next_avail_desc;
3878
3879         /* Setup of IP header checksum. */
3880         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3881                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3882                 offload |= CSUM_IP;
3883                 ipcss = ip_off;
3884                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3885                 /*
3886                  * Start offset for header checksum calculation.
3887                  * End offset for header checksum calculation.
3888                  * Offset of place to put the checksum.
3889                  */
3890                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3891                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3892                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3893                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3894                 cmd |= E1000_TXD_CMD_IP;
3895         }
3896
3897         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3898                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3899                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3900                 offload |= CSUM_TCP;
3901                 tucss = hdr_len;
3902                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3903                 /*
3904                  * The 82574L can only remember the *last* context used
3905                  * regardless of queue that it was use for.  We cannot reuse
3906                  * contexts on this hardware platform and must generate a new
3907                  * context every time.  82574L hardware spec, section 7.2.6,
3908                  * second note.
3909                  */
3910                 if (adapter->num_queues < 2) {
3911                         /*
3912                         * Setting up new checksum offload context for every
3913                         * frames takes a lot of processing time for hardware.
3914                         * This also reduces performance a lot for small sized
3915                         * frames so avoid it if driver can use previously
3916                         * configured checksum offload context.
3917                         */
3918                         if (txr->last_hw_offload == offload) {
3919                                 if (offload & CSUM_IP) {
3920                                         if (txr->last_hw_ipcss == ipcss &&
3921                                         txr->last_hw_ipcso == ipcso &&
3922                                         txr->last_hw_tucss == tucss &&
3923                                         txr->last_hw_tucso == tucso)
3924                                                 return;
3925                                 } else {
3926                                         if (txr->last_hw_tucss == tucss &&
3927                                         txr->last_hw_tucso == tucso)
3928                                                 return;
3929                                 }
3930                         }
3931                         txr->last_hw_offload = offload;
3932                         txr->last_hw_tucss = tucss;
3933                         txr->last_hw_tucso = tucso;
3934                 }
3935                 /*
3936                  * Start offset for payload checksum calculation.
3937                  * End offset for payload checksum calculation.
3938                  * Offset of place to put the checksum.
3939                  */
3940                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3941                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3942                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3943                 TXD->upper_setup.tcp_fields.tucso = tucso;
3944                 cmd |= E1000_TXD_CMD_TCP;
3945         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3946                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3947                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3948                 tucss = hdr_len;
3949                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3950                 /*
3951                  * The 82574L can only remember the *last* context used
3952                  * regardless of queue that it was use for.  We cannot reuse
3953                  * contexts on this hardware platform and must generate a new
3954                  * context every time.  82574L hardware spec, section 7.2.6,
3955                  * second note.
3956                  */
3957                 if (adapter->num_queues < 2) {
3958                         /*
3959                         * Setting up new checksum offload context for every
3960                         * frames takes a lot of processing time for hardware.
3961                         * This also reduces performance a lot for small sized
3962                         * frames so avoid it if driver can use previously
3963                         * configured checksum offload context.
3964                         */
3965                         if (txr->last_hw_offload == offload) {
3966                                 if (offload & CSUM_IP) {
3967                                         if (txr->last_hw_ipcss == ipcss &&
3968                                         txr->last_hw_ipcso == ipcso &&
3969                                         txr->last_hw_tucss == tucss &&
3970                                         txr->last_hw_tucso == tucso)
3971                                                 return;
3972                                 } else {
3973                                         if (txr->last_hw_tucss == tucss &&
3974                                         txr->last_hw_tucso == tucso)
3975                                                 return;
3976                                 }
3977                         }
3978                         txr->last_hw_offload = offload;
3979                         txr->last_hw_tucss = tucss;
3980                         txr->last_hw_tucso = tucso;
3981                 }
3982                 /*
3983                  * Start offset for header checksum calculation.
3984                  * End offset for header checksum calculation.
3985                  * Offset of place to put the checksum.
3986                  */
3987                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3988                 TXD->upper_setup.tcp_fields.tucss = tucss;
3989                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3990                 TXD->upper_setup.tcp_fields.tucso = tucso;
3991         }
3992   
3993         if (offload & CSUM_IP) {
3994                 txr->last_hw_ipcss = ipcss;
3995                 txr->last_hw_ipcso = ipcso;
3996         }
3997
3998         TXD->tcp_seg_setup.data = htole32(0);
3999         TXD->cmd_and_length =
4000             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
4001         tx_buffer = &txr->tx_buffers[cur];
4002         tx_buffer->m_head = NULL;
4003         tx_buffer->next_eop = -1;
4004
4005         if (++cur == adapter->num_tx_desc)
4006                 cur = 0;
4007
4008         txr->tx_avail--;
4009         txr->next_avail_desc = cur;
4010 }
4011
4012
4013 /**********************************************************************
4014  *
4015  *  Setup work for hardware segmentation offload (TSO)
4016  *
4017  **********************************************************************/
4018 static void
4019 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4020     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4021 {
4022         struct adapter                  *adapter = txr->adapter;
4023         struct e1000_context_desc       *TXD;
4024         struct em_txbuffer              *tx_buffer;
4025         int cur, hdr_len;
4026
4027         /*
4028          * In theory we can use the same TSO context if and only if
4029          * frame is the same type(IP/TCP) and the same MSS. However
4030          * checking whether a frame has the same IP/TCP structure is
4031          * hard thing so just ignore that and always restablish a
4032          * new TSO context.
4033          */
4034         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4035         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
4036                       E1000_TXD_DTYP_D |        /* Data descr type */
4037                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
4038
4039         /* IP and/or TCP header checksum calculation and insertion. */
4040         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4041
4042         cur = txr->next_avail_desc;
4043         tx_buffer = &txr->tx_buffers[cur];
4044         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4045
4046         /*
4047          * Start offset for header checksum calculation.
4048          * End offset for header checksum calculation.
4049          * Offset of place put the checksum.
4050          */
4051         TXD->lower_setup.ip_fields.ipcss = ip_off;
4052         TXD->lower_setup.ip_fields.ipcse =
4053             htole16(ip_off + (ip->ip_hl << 2) - 1);
4054         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4055         /*
4056          * Start offset for payload checksum calculation.
4057          * End offset for payload checksum calculation.
4058          * Offset of place to put the checksum.
4059          */
4060         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4061         TXD->upper_setup.tcp_fields.tucse = 0;
4062         TXD->upper_setup.tcp_fields.tucso =
4063             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4064         /*
4065          * Payload size per packet w/o any headers.
4066          * Length of all headers up to payload.
4067          */
4068         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4069         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4070
4071         TXD->cmd_and_length = htole32(adapter->txd_cmd |
4072                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
4073                                 E1000_TXD_CMD_TSE |     /* TSE context */
4074                                 E1000_TXD_CMD_IP |      /* Do IP csum */
4075                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
4076                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
4077
4078         tx_buffer->m_head = NULL;
4079         tx_buffer->next_eop = -1;
4080
4081         if (++cur == adapter->num_tx_desc)
4082                 cur = 0;
4083
4084         txr->tx_avail--;
4085         txr->next_avail_desc = cur;
4086         txr->tx_tso = TRUE;
4087 }
4088
4089
4090 /**********************************************************************
4091  *
4092  *  Examine each tx_buffer in the used queue. If the hardware is done
4093  *  processing the packet then free associated resources. The
4094  *  tx_buffer is put back on the free queue.
4095  *
4096  **********************************************************************/
4097 static void
4098 em_txeof(struct tx_ring *txr)
4099 {
4100         struct adapter  *adapter = txr->adapter;
4101         int first, last, done, processed;
4102         struct em_txbuffer *tx_buffer;
4103         struct e1000_tx_desc   *tx_desc, *eop_desc;
4104         if_t ifp = adapter->ifp;
4105
4106         EM_TX_LOCK_ASSERT(txr);
4107 #ifdef DEV_NETMAP
4108         if (netmap_tx_irq(ifp, txr->me))
4109                 return;
4110 #endif /* DEV_NETMAP */
4111
4112         /* No work, make sure hang detection is disabled */
4113         if (txr->tx_avail == adapter->num_tx_desc) {
4114                 txr->busy = EM_TX_IDLE;
4115                 return;
4116         }
4117
4118         processed = 0;
4119         first = txr->next_to_clean;
4120         tx_desc = &txr->tx_base[first];
4121         tx_buffer = &txr->tx_buffers[first];
4122         last = tx_buffer->next_eop;
4123         eop_desc = &txr->tx_base[last];
4124
4125         /*
4126          * What this does is get the index of the
4127          * first descriptor AFTER the EOP of the 
4128          * first packet, that way we can do the
4129          * simple comparison on the inner while loop.
4130          */
4131         if (++last == adapter->num_tx_desc)
4132                 last = 0;
4133         done = last;
4134
4135         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4136             BUS_DMASYNC_POSTREAD);
4137
4138         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4139                 /* We clean the range of the packet */
4140                 while (first != done) {
4141                         tx_desc->upper.data = 0;
4142                         tx_desc->lower.data = 0;
4143                         tx_desc->buffer_addr = 0;
4144                         ++txr->tx_avail;
4145                         ++processed;
4146
4147                         if (tx_buffer->m_head) {
4148                                 bus_dmamap_sync(txr->txtag,
4149                                     tx_buffer->map,
4150                                     BUS_DMASYNC_POSTWRITE);
4151                                 bus_dmamap_unload(txr->txtag,
4152                                     tx_buffer->map);
4153                                 m_freem(tx_buffer->m_head);
4154                                 tx_buffer->m_head = NULL;
4155                         }
4156                         tx_buffer->next_eop = -1;
4157
4158                         if (++first == adapter->num_tx_desc)
4159                                 first = 0;
4160
4161                         tx_buffer = &txr->tx_buffers[first];
4162                         tx_desc = &txr->tx_base[first];
4163                 }
4164                 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
4165                 /* See if we can continue to the next packet */
4166                 last = tx_buffer->next_eop;
4167                 if (last != -1) {
4168                         eop_desc = &txr->tx_base[last];
4169                         /* Get new done point */
4170                         if (++last == adapter->num_tx_desc) last = 0;
4171                         done = last;
4172                 } else
4173                         break;
4174         }
4175         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4176             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4177
4178         txr->next_to_clean = first;
4179
4180         /*
4181         ** Hang detection: we know there's work outstanding
4182         ** or the entry return would have been taken, so no
4183         ** descriptor processed here indicates a potential hang.
4184         ** The local timer will examine this and do a reset if needed.
4185         */
4186         if (processed == 0) {
4187                 if (txr->busy != EM_TX_HUNG)
4188                         ++txr->busy;
4189         } else /* At least one descriptor was cleaned */
4190                 txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4191
4192         /*
4193          * If we have a minimum free, clear IFF_DRV_OACTIVE
4194          * to tell the stack that it is OK to send packets.
4195          * Notice that all writes of OACTIVE happen under the
4196          * TX lock which, with a single queue, guarantees 
4197          * sanity.
4198          */
4199         if (txr->tx_avail >= EM_MAX_SCATTER) {
4200                 if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
4201         }
4202
4203         /* Disable hang detection if all clean */
4204         if (txr->tx_avail == adapter->num_tx_desc)
4205                 txr->busy = EM_TX_IDLE;
4206 }
4207
4208 /*********************************************************************
4209  *
4210  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4211  *
4212  **********************************************************************/
4213 static void
4214 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4215 {
4216         struct adapter          *adapter = rxr->adapter;
4217         struct mbuf             *m;
4218         bus_dma_segment_t       segs;
4219         struct em_rxbuffer      *rxbuf;
4220         int                     i, j, error, nsegs;
4221         bool                    cleaned = FALSE;
4222
4223         i = j = rxr->next_to_refresh;
4224         /*
4225         ** Get one descriptor beyond
4226         ** our work mark to control
4227         ** the loop.
4228         */
4229         if (++j == adapter->num_rx_desc)
4230                 j = 0;
4231
4232         while (j != limit) {
4233                 rxbuf = &rxr->rx_buffers[i];
4234                 if (rxbuf->m_head == NULL) {
4235                         m = m_getjcl(M_NOWAIT, MT_DATA,
4236                             M_PKTHDR, adapter->rx_mbuf_sz);
4237                         /*
4238                         ** If we have a temporary resource shortage
4239                         ** that causes a failure, just abort refresh
4240                         ** for now, we will return to this point when
4241                         ** reinvoked from em_rxeof.
4242                         */
4243                         if (m == NULL)
4244                                 goto update;
4245                 } else
4246                         m = rxbuf->m_head;
4247
4248                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4249                 m->m_flags |= M_PKTHDR;
4250                 m->m_data = m->m_ext.ext_buf;
4251
4252                 /* Use bus_dma machinery to setup the memory mapping  */
4253                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4254                     m, &segs, &nsegs, BUS_DMA_NOWAIT);
4255                 if (error != 0) {
4256                         printf("Refresh mbufs: hdr dmamap load"
4257                             " failure - %d\n", error);
4258                         m_free(m);
4259                         rxbuf->m_head = NULL;
4260                         goto update;
4261                 }
4262                 rxbuf->m_head = m;
4263                 rxbuf->paddr = segs.ds_addr;
4264                 bus_dmamap_sync(rxr->rxtag,
4265                     rxbuf->map, BUS_DMASYNC_PREREAD);
4266                 em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4267                 cleaned = TRUE;
4268
4269                 i = j; /* Next is precalulated for us */
4270                 rxr->next_to_refresh = i;
4271                 /* Calculate next controlling index */
4272                 if (++j == adapter->num_rx_desc)
4273                         j = 0;
4274         }
4275 update:
4276         /*
4277         ** Update the tail pointer only if,
4278         ** and as far as we have refreshed.
4279         */
4280         if (cleaned)
4281                 E1000_WRITE_REG(&adapter->hw,
4282                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4283
4284         return;
4285 }
4286
4287
4288 /*********************************************************************
4289  *
4290  *  Allocate memory for rx_buffer structures. Since we use one
4291  *  rx_buffer per received packet, the maximum number of rx_buffer's
4292  *  that we'll need is equal to the number of receive descriptors
4293  *  that we've allocated.
4294  *
4295  **********************************************************************/
4296 static int
4297 em_allocate_receive_buffers(struct rx_ring *rxr)
4298 {
4299         struct adapter          *adapter = rxr->adapter;
4300         device_t                dev = adapter->dev;
4301         struct em_rxbuffer      *rxbuf;
4302         int                     error;
4303
4304         rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4305             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4306         if (rxr->rx_buffers == NULL) {
4307                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4308                 return (ENOMEM);
4309         }
4310
4311         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4312                                 1, 0,                   /* alignment, bounds */
4313                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4314                                 BUS_SPACE_MAXADDR,      /* highaddr */
4315                                 NULL, NULL,             /* filter, filterarg */
4316                                 MJUM9BYTES,             /* maxsize */
4317                                 1,                      /* nsegments */
4318                                 MJUM9BYTES,             /* maxsegsize */
4319                                 0,                      /* flags */
4320                                 NULL,                   /* lockfunc */
4321                                 NULL,                   /* lockarg */
4322                                 &rxr->rxtag);
4323         if (error) {
4324                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4325                     __func__, error);
4326                 goto fail;
4327         }
4328
4329         rxbuf = rxr->rx_buffers;
4330         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4331                 rxbuf = &rxr->rx_buffers[i];
4332                 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4333                 if (error) {
4334                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4335                             __func__, error);
4336                         goto fail;
4337                 }
4338         }
4339
4340         return (0);
4341
4342 fail:
4343         em_free_receive_structures(adapter);
4344         return (error);
4345 }
4346
4347
4348 /*********************************************************************
4349  *
4350  *  Initialize a receive ring and its buffers.
4351  *
4352  **********************************************************************/
4353 static int
4354 em_setup_receive_ring(struct rx_ring *rxr)
4355 {
4356         struct  adapter         *adapter = rxr->adapter;
4357         struct em_rxbuffer      *rxbuf;
4358         bus_dma_segment_t       seg[1];
4359         int                     rsize, nsegs, error = 0;
4360 #ifdef DEV_NETMAP
4361         struct netmap_slot *slot;
4362         struct netmap_adapter *na = netmap_getna(adapter->ifp);
4363 #endif
4364
4365
4366         /* Clear the ring contents */
4367         EM_RX_LOCK(rxr);
4368         rsize = roundup2(adapter->num_rx_desc *
4369             sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4370         bzero((void *)rxr->rx_base, rsize);
4371 #ifdef DEV_NETMAP
4372         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4373 #endif
4374
4375         /*
4376         ** Free current RX buffer structs and their mbufs
4377         */
4378         for (int i = 0; i < adapter->num_rx_desc; i++) {
4379                 rxbuf = &rxr->rx_buffers[i];
4380                 if (rxbuf->m_head != NULL) {
4381                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4382                             BUS_DMASYNC_POSTREAD);
4383                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4384                         m_freem(rxbuf->m_head);
4385                         rxbuf->m_head = NULL; /* mark as freed */
4386                 }
4387         }
4388
4389         /* Now replenish the mbufs */
4390         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4391                 rxbuf = &rxr->rx_buffers[j];
4392 #ifdef DEV_NETMAP
4393                 if (slot) {
4394                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4395                         uint64_t paddr;
4396                         void *addr;
4397
4398                         addr = PNMB(na, slot + si, &paddr);
4399                         netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4400                         rxbuf->paddr = paddr;
4401                         em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4402                         continue;
4403                 }
4404 #endif /* DEV_NETMAP */
4405                 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4406                     M_PKTHDR, adapter->rx_mbuf_sz);
4407                 if (rxbuf->m_head == NULL) {
4408                         error = ENOBUFS;
4409                         goto fail;
4410                 }
4411                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4412                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4413                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4414
4415                 /* Get the memory mapping */
4416                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4417                     rxbuf->map, rxbuf->m_head, seg,
4418                     &nsegs, BUS_DMA_NOWAIT);
4419                 if (error != 0) {
4420                         m_freem(rxbuf->m_head);
4421                         rxbuf->m_head = NULL;
4422                         goto fail;
4423                 }
4424                 bus_dmamap_sync(rxr->rxtag,
4425                     rxbuf->map, BUS_DMASYNC_PREREAD);
4426
4427                 rxbuf->paddr = seg[0].ds_addr;
4428                 em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4429         }
4430         rxr->next_to_check = 0;
4431         rxr->next_to_refresh = 0;
4432         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4433             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4434
4435 fail:
4436         EM_RX_UNLOCK(rxr);
4437         return (error);
4438 }
4439
4440 /*********************************************************************
4441  *
4442  *  Initialize all receive rings.
4443  *
4444  **********************************************************************/
4445 static int
4446 em_setup_receive_structures(struct adapter *adapter)
4447 {
4448         struct rx_ring *rxr = adapter->rx_rings;
4449         int q;
4450
4451         for (q = 0; q < adapter->num_queues; q++, rxr++)
4452                 if (em_setup_receive_ring(rxr))
4453                         goto fail;
4454
4455         return (0);
4456 fail:
4457         /*
4458          * Free RX buffers allocated so far, we will only handle
4459          * the rings that completed, the failing case will have
4460          * cleaned up for itself. 'q' failed, so its the terminus.
4461          */
4462         for (int i = 0; i < q; ++i) {
4463                 rxr = &adapter->rx_rings[i];
4464                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4465                         struct em_rxbuffer *rxbuf;
4466                         rxbuf = &rxr->rx_buffers[n];
4467                         if (rxbuf->m_head != NULL) {
4468                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4469                                   BUS_DMASYNC_POSTREAD);
4470                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4471                                 m_freem(rxbuf->m_head);
4472                                 rxbuf->m_head = NULL;
4473                         }
4474                 }
4475                 rxr->next_to_check = 0;
4476                 rxr->next_to_refresh = 0;
4477         }
4478
4479         return (ENOBUFS);
4480 }
4481
4482 /*********************************************************************
4483  *
4484  *  Free all receive rings.
4485  *
4486  **********************************************************************/
4487 static void
4488 em_free_receive_structures(struct adapter *adapter)
4489 {
4490         struct rx_ring *rxr = adapter->rx_rings;
4491
4492         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4493                 em_free_receive_buffers(rxr);
4494                 /* Free the ring memory as well */
4495                 em_dma_free(adapter, &rxr->rxdma);
4496                 EM_RX_LOCK_DESTROY(rxr);
4497         }
4498
4499         free(adapter->rx_rings, M_DEVBUF);
4500 }
4501
4502
4503 /*********************************************************************
4504  *
4505  *  Free receive ring data structures
4506  *
4507  **********************************************************************/
4508 static void
4509 em_free_receive_buffers(struct rx_ring *rxr)
4510 {
4511         struct adapter          *adapter = rxr->adapter;
4512         struct em_rxbuffer      *rxbuf = NULL;
4513
4514         INIT_DEBUGOUT("free_receive_buffers: begin");
4515
4516         if (rxr->rx_buffers != NULL) {
4517                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4518                         rxbuf = &rxr->rx_buffers[i];
4519                         if (rxbuf->map != NULL) {
4520                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4521                                     BUS_DMASYNC_POSTREAD);
4522                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4523                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4524                         }
4525                         if (rxbuf->m_head != NULL) {
4526                                 m_freem(rxbuf->m_head);
4527                                 rxbuf->m_head = NULL;
4528                         }
4529                 }
4530                 free(rxr->rx_buffers, M_DEVBUF);
4531                 rxr->rx_buffers = NULL;
4532                 rxr->next_to_check = 0;
4533                 rxr->next_to_refresh = 0;
4534         }
4535
4536         if (rxr->rxtag != NULL) {
4537                 bus_dma_tag_destroy(rxr->rxtag);
4538                 rxr->rxtag = NULL;
4539         }
4540
4541         return;
4542 }
4543
4544
4545 /*********************************************************************
4546  *
4547  *  Enable receive unit.
4548  *
4549  **********************************************************************/
4550
4551 static void
4552 em_initialize_receive_unit(struct adapter *adapter)
4553 {
4554         struct rx_ring *rxr = adapter->rx_rings;
4555         if_t ifp = adapter->ifp;
4556         struct e1000_hw *hw = &adapter->hw;
4557         u32     rctl, rxcsum, rfctl;
4558
4559         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4560
4561         /*
4562          * Make sure receives are disabled while setting
4563          * up the descriptor ring
4564          */
4565         rctl = E1000_READ_REG(hw, E1000_RCTL);
4566         /* Do not disable if ever enabled on this hardware */
4567         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4568                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4569
4570         /* Setup the Receive Control Register */
4571         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4572         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4573             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4574             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4575
4576         /* Do not store bad packets */
4577         rctl &= ~E1000_RCTL_SBP;
4578
4579         /* Enable Long Packet receive */
4580         if (if_getmtu(ifp) > ETHERMTU)
4581                 rctl |= E1000_RCTL_LPE;
4582         else
4583                 rctl &= ~E1000_RCTL_LPE;
4584
4585         /* Strip the CRC */
4586         if (!em_disable_crc_stripping)
4587                 rctl |= E1000_RCTL_SECRC;
4588
4589         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4590             adapter->rx_abs_int_delay.value);
4591
4592         E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4593             adapter->rx_int_delay.value);
4594         /*
4595          * Set the interrupt throttling rate. Value is calculated
4596          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4597          */
4598         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4599
4600         /* Use extended rx descriptor formats */
4601         rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4602         rfctl |= E1000_RFCTL_EXTEN;
4603         /*
4604         ** When using MSIX interrupts we need to throttle
4605         ** using the EITR register (82574 only)
4606         */
4607         if (hw->mac.type == e1000_82574) {
4608                 for (int i = 0; i < 4; i++)
4609                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4610                             DEFAULT_ITR);
4611                 /* Disable accelerated acknowledge */
4612                 rfctl |= E1000_RFCTL_ACK_DIS;
4613         }
4614         E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4615
4616         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4617         if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
4618 #ifdef EM_MULTIQUEUE
4619                 rxcsum |= E1000_RXCSUM_TUOFL |
4620                           E1000_RXCSUM_IPOFL |
4621                           E1000_RXCSUM_PCSD;
4622 #else
4623                 rxcsum |= E1000_RXCSUM_TUOFL;
4624 #endif
4625         } else
4626                 rxcsum &= ~E1000_RXCSUM_TUOFL;
4627
4628         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4629
4630 #ifdef EM_MULTIQUEUE
4631 #define RSSKEYLEN 10
4632         if (adapter->num_queues > 1) {
4633                 uint8_t  rss_key[4 * RSSKEYLEN];
4634                 uint32_t reta = 0;
4635                 int i;
4636
4637                 /*
4638                 * Configure RSS key
4639                 */
4640                 arc4rand(rss_key, sizeof(rss_key), 0);
4641                 for (i = 0; i < RSSKEYLEN; ++i) {
4642                         uint32_t rssrk = 0;
4643
4644                         rssrk = EM_RSSRK_VAL(rss_key, i);
4645                         E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4646                 }
4647
4648                 /*
4649                 * Configure RSS redirect table in following fashion:
4650                 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4651                 */
4652                 for (i = 0; i < sizeof(reta); ++i) {
4653                         uint32_t q;
4654
4655                         q = (i % adapter->num_queues) << 7;
4656                         reta |= q << (8 * i);
4657                 }
4658
4659                 for (i = 0; i < 32; ++i) {
4660                         E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4661                 }
4662
4663                 E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | 
4664                                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
4665                                 E1000_MRQC_RSS_FIELD_IPV4 |
4666                                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4667                                 E1000_MRQC_RSS_FIELD_IPV6_EX |
4668                                 E1000_MRQC_RSS_FIELD_IPV6);
4669         }
4670 #endif
4671         /*
4672         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4673         ** long latencies are observed, like Lenovo X60. This
4674         ** change eliminates the problem, but since having positive
4675         ** values in RDTR is a known source of problems on other
4676         ** platforms another solution is being sought.
4677         */
4678         if (hw->mac.type == e1000_82573)
4679                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4680
4681         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4682                 /* Setup the Base and Length of the Rx Descriptor Ring */
4683                 u64 bus_addr = rxr->rxdma.dma_paddr;
4684                 u32 rdt = adapter->num_rx_desc - 1; /* default */
4685
4686                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4687                     adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4688                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4689                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4690                 /* Setup the Head and Tail Descriptor Pointers */
4691                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4692 #ifdef DEV_NETMAP
4693                 /*
4694                  * an init() while a netmap client is active must
4695                  * preserve the rx buffers passed to userspace.
4696                  */
4697                 if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4698                         struct netmap_adapter *na = netmap_getna(adapter->ifp);
4699                         rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4700                 }
4701 #endif /* DEV_NETMAP */
4702                 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4703         }
4704
4705         /*
4706          * Set PTHRESH for improved jumbo performance
4707          * According to 10.2.5.11 of Intel 82574 Datasheet,
4708          * RXDCTL(1) is written whenever RXDCTL(0) is written.
4709          * Only write to RXDCTL(1) if there is a need for different
4710          * settings.
4711          */
4712         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4713             (adapter->hw.mac.type == e1000_pch2lan) ||
4714             (adapter->hw.mac.type == e1000_ich10lan)) &&
4715             (if_getmtu(ifp) > ETHERMTU)) {
4716                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4717                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4718         } else if (adapter->hw.mac.type == e1000_82574) {
4719                 for (int i = 0; i < adapter->num_queues; i++) {
4720                         u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4721
4722                         rxdctl |= 0x20; /* PTHRESH */
4723                         rxdctl |= 4 << 8; /* HTHRESH */
4724                         rxdctl |= 4 << 16;/* WTHRESH */
4725                         rxdctl |= 1 << 24; /* Switch to granularity */
4726                         E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4727                 }
4728         }
4729                 
4730         if (adapter->hw.mac.type >= e1000_pch2lan) {
4731                 if (if_getmtu(ifp) > ETHERMTU)
4732                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4733                 else
4734                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4735         }
4736
4737         /* Make sure VLAN Filters are off */
4738         rctl &= ~E1000_RCTL_VFE;
4739
4740         if (adapter->rx_mbuf_sz == MCLBYTES)
4741                 rctl |= E1000_RCTL_SZ_2048;
4742         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4743                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4744         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4745                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4746
4747         /* ensure we clear use DTYPE of 00 here */
4748         rctl &= ~0x00000C00;
4749         /* Write out the settings */
4750         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4751
4752         return;
4753 }
4754
4755
4756 /*********************************************************************
4757  *
4758  *  This routine executes in interrupt context. It replenishes
4759  *  the mbufs in the descriptor and sends data which has been
4760  *  dma'ed into host memory to upper layer.
4761  *
4762  *  We loop at most count times if count is > 0, or until done if
4763  *  count < 0.
4764  *  
4765  *  For polling we also now return the number of cleaned packets
4766  *********************************************************************/
4767 static bool
4768 em_rxeof(struct rx_ring *rxr, int count, int *done)
4769 {
4770         struct adapter          *adapter = rxr->adapter;
4771         if_t ifp = adapter->ifp;
4772         struct mbuf             *mp, *sendmp;
4773         u32                     status = 0;
4774         u16                     len;
4775         int                     i, processed, rxdone = 0;
4776         bool                    eop;
4777         union e1000_rx_desc_extended    *cur;
4778
4779         EM_RX_LOCK(rxr);
4780
4781         /* Sync the ring */
4782         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4783             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4784
4785
4786 #ifdef DEV_NETMAP
4787         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4788                 EM_RX_UNLOCK(rxr);
4789                 return (FALSE);
4790         }
4791 #endif /* DEV_NETMAP */
4792
4793         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4794                 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4795                         break;
4796
4797                 cur = &rxr->rx_base[i];
4798                 status = le32toh(cur->wb.upper.status_error);
4799                 mp = sendmp = NULL;
4800
4801                 if ((status & E1000_RXD_STAT_DD) == 0)
4802                         break;
4803
4804                 len = le16toh(cur->wb.upper.length);
4805                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4806
4807                 if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4808                     (rxr->discard == TRUE)) {
4809                         adapter->dropped_pkts++;
4810                         ++rxr->rx_discarded;
4811                         if (!eop) /* Catch subsequent segs */
4812                                 rxr->discard = TRUE;
4813                         else
4814                                 rxr->discard = FALSE;
4815                         em_rx_discard(rxr, i);
4816                         goto next_desc;
4817                 }
4818                 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4819
4820                 /* Assign correct length to the current fragment */
4821                 mp = rxr->rx_buffers[i].m_head;
4822                 mp->m_len = len;
4823
4824                 /* Trigger for refresh */
4825                 rxr->rx_buffers[i].m_head = NULL;
4826
4827                 /* First segment? */
4828                 if (rxr->fmp == NULL) {
4829                         mp->m_pkthdr.len = len;
4830                         rxr->fmp = rxr->lmp = mp;
4831                 } else {
4832                         /* Chain mbuf's together */
4833                         mp->m_flags &= ~M_PKTHDR;
4834                         rxr->lmp->m_next = mp;
4835                         rxr->lmp = mp;
4836                         rxr->fmp->m_pkthdr.len += len;
4837                 }
4838
4839                 if (eop) {
4840                         --count;
4841                         sendmp = rxr->fmp;
4842                         if_setrcvif(sendmp, ifp);
4843                         if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4844                         em_receive_checksum(status, sendmp);
4845 #ifndef __NO_STRICT_ALIGNMENT
4846                         if (adapter->hw.mac.max_frame_size >
4847                             (MCLBYTES - ETHER_ALIGN) &&
4848                             em_fixup_rx(rxr) != 0)
4849                                 goto skip;
4850 #endif
4851                         if (status & E1000_RXD_STAT_VP) {
4852                                 if_setvtag(sendmp, 
4853                                     le16toh(cur->wb.upper.vlan));
4854                                 sendmp->m_flags |= M_VLANTAG;
4855                         }
4856 #ifndef __NO_STRICT_ALIGNMENT
4857 skip:
4858 #endif
4859                         rxr->fmp = rxr->lmp = NULL;
4860                 }
4861 next_desc:
4862                 /* Sync the ring */
4863                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4864                         BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4865
4866                 /* Zero out the receive descriptors status. */
4867                 cur->wb.upper.status_error &= htole32(~0xFF);
4868                 ++rxdone;       /* cumulative for POLL */
4869                 ++processed;
4870
4871                 /* Advance our pointers to the next descriptor. */
4872                 if (++i == adapter->num_rx_desc)
4873                         i = 0;
4874
4875                 /* Send to the stack */
4876                 if (sendmp != NULL) {
4877                         rxr->next_to_check = i;
4878                         EM_RX_UNLOCK(rxr);
4879                         if_input(ifp, sendmp);
4880                         EM_RX_LOCK(rxr);
4881                         i = rxr->next_to_check;
4882                 }
4883
4884                 /* Only refresh mbufs every 8 descriptors */
4885                 if (processed == 8) {
4886                         em_refresh_mbufs(rxr, i);
4887                         processed = 0;
4888                 }
4889         }
4890
4891         /* Catch any remaining refresh work */
4892         if (e1000_rx_unrefreshed(rxr))
4893                 em_refresh_mbufs(rxr, i);
4894
4895         rxr->next_to_check = i;
4896         if (done != NULL)
4897                 *done = rxdone;
4898         EM_RX_UNLOCK(rxr);
4899
4900         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4901 }
4902
4903 static __inline void
4904 em_rx_discard(struct rx_ring *rxr, int i)
4905 {
4906         struct em_rxbuffer      *rbuf;
4907
4908         rbuf = &rxr->rx_buffers[i];
4909         bus_dmamap_unload(rxr->rxtag, rbuf->map);
4910
4911         /* Free any previous pieces */
4912         if (rxr->fmp != NULL) {
4913                 rxr->fmp->m_flags |= M_PKTHDR;
4914                 m_freem(rxr->fmp);
4915                 rxr->fmp = NULL;
4916                 rxr->lmp = NULL;
4917         }
4918         /*
4919         ** Free buffer and allow em_refresh_mbufs()
4920         ** to clean up and recharge buffer.
4921         */
4922         if (rbuf->m_head) {
4923                 m_free(rbuf->m_head);
4924                 rbuf->m_head = NULL;
4925         }
4926         return;
4927 }
4928
4929 #ifndef __NO_STRICT_ALIGNMENT
4930 /*
4931  * When jumbo frames are enabled we should realign entire payload on
4932  * architecures with strict alignment. This is serious design mistake of 8254x
4933  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4934  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4935  * payload. On architecures without strict alignment restrictions 8254x still
4936  * performs unaligned memory access which would reduce the performance too.
4937  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4938  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4939  * existing mbuf chain.
4940  *
4941  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4942  * not used at all on architectures with strict alignment.
4943  */
4944 static int
4945 em_fixup_rx(struct rx_ring *rxr)
4946 {
4947         struct adapter *adapter = rxr->adapter;
4948         struct mbuf *m, *n;
4949         int error;
4950
4951         error = 0;
4952         m = rxr->fmp;
4953         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4954                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4955                 m->m_data += ETHER_HDR_LEN;
4956         } else {
4957                 MGETHDR(n, M_NOWAIT, MT_DATA);
4958                 if (n != NULL) {
4959                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4960                         m->m_data += ETHER_HDR_LEN;
4961                         m->m_len -= ETHER_HDR_LEN;
4962                         n->m_len = ETHER_HDR_LEN;
4963                         M_MOVE_PKTHDR(n, m);
4964                         n->m_next = m;
4965                         rxr->fmp = n;
4966                 } else {
4967                         adapter->dropped_pkts++;
4968                         m_freem(rxr->fmp);
4969                         rxr->fmp = NULL;
4970                         error = ENOMEM;
4971                 }
4972         }
4973
4974         return (error);
4975 }
4976 #endif
4977
4978 static void
4979 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
4980 {
4981         rxd->read.buffer_addr = htole64(rxbuf->paddr);
4982         /* DD bits must be cleared */
4983         rxd->wb.upper.status_error= 0;
4984 }
4985
4986 /*********************************************************************
4987  *
4988  *  Verify that the hardware indicated that the checksum is valid.
4989  *  Inform the stack about the status of checksum so that stack
4990  *  doesn't spend time verifying the checksum.
4991  *
4992  *********************************************************************/
4993 static void
4994 em_receive_checksum(uint32_t status, struct mbuf *mp)
4995 {
4996         mp->m_pkthdr.csum_flags = 0;
4997
4998         /* Ignore Checksum bit is set */
4999         if (status & E1000_RXD_STAT_IXSM)
5000                 return;
5001
5002         /* If the IP checksum exists and there is no IP Checksum error */
5003         if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
5004                 E1000_RXD_STAT_IPCS) {
5005                 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
5006         }
5007
5008         /* TCP or UDP checksum */
5009         if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5010             E1000_RXD_STAT_TCPCS) {
5011                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5012                 mp->m_pkthdr.csum_data = htons(0xffff);
5013         }
5014         if (status & E1000_RXD_STAT_UDPCS) {
5015                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5016                 mp->m_pkthdr.csum_data = htons(0xffff);
5017         }
5018 }
5019
5020 /*
5021  * This routine is run via an vlan
5022  * config EVENT
5023  */
5024 static void
5025 em_register_vlan(void *arg, if_t ifp, u16 vtag)
5026 {
5027         struct adapter  *adapter = if_getsoftc(ifp);
5028         u32             index, bit;
5029
5030         if ((void*)adapter !=  arg)   /* Not our event */
5031                 return;
5032
5033         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
5034                 return;
5035
5036         EM_CORE_LOCK(adapter);
5037         index = (vtag >> 5) & 0x7F;
5038         bit = vtag & 0x1F;
5039         adapter->shadow_vfta[index] |= (1 << bit);
5040         ++adapter->num_vlans;
5041         /* Re-init to load the changes */
5042         if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5043                 em_init_locked(adapter);
5044         EM_CORE_UNLOCK(adapter);
5045 }
5046
5047 /*
5048  * This routine is run via an vlan
5049  * unconfig EVENT
5050  */
5051 static void
5052 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
5053 {
5054         struct adapter  *adapter = if_getsoftc(ifp);
5055         u32             index, bit;
5056
5057         if (adapter != arg)
5058                 return;
5059
5060         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5061                 return;
5062
5063         EM_CORE_LOCK(adapter);
5064         index = (vtag >> 5) & 0x7F;
5065         bit = vtag & 0x1F;
5066         adapter->shadow_vfta[index] &= ~(1 << bit);
5067         --adapter->num_vlans;
5068         /* Re-init to load the changes */
5069         if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5070                 em_init_locked(adapter);
5071         EM_CORE_UNLOCK(adapter);
5072 }
5073
5074 static void
5075 em_setup_vlan_hw_support(struct adapter *adapter)
5076 {
5077         struct e1000_hw *hw = &adapter->hw;
5078         u32             reg;
5079
5080         /*
5081         ** We get here thru init_locked, meaning
5082         ** a soft reset, this has already cleared
5083         ** the VFTA and other state, so if there
5084         ** have been no vlan's registered do nothing.
5085         */
5086         if (adapter->num_vlans == 0)
5087                 return;
5088
5089         /*
5090         ** A soft reset zero's out the VFTA, so
5091         ** we need to repopulate it now.
5092         */
5093         for (int i = 0; i < EM_VFTA_SIZE; i++)
5094                 if (adapter->shadow_vfta[i] != 0)
5095                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5096                             i, adapter->shadow_vfta[i]);
5097
5098         reg = E1000_READ_REG(hw, E1000_CTRL);
5099         reg |= E1000_CTRL_VME;
5100         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5101
5102         /* Enable the Filter Table */
5103         reg = E1000_READ_REG(hw, E1000_RCTL);
5104         reg &= ~E1000_RCTL_CFIEN;
5105         reg |= E1000_RCTL_VFE;
5106         E1000_WRITE_REG(hw, E1000_RCTL, reg);
5107 }
5108
5109 static void
5110 em_enable_intr(struct adapter *adapter)
5111 {
5112         struct e1000_hw *hw = &adapter->hw;
5113         u32 ims_mask = IMS_ENABLE_MASK;
5114
5115         if (hw->mac.type == e1000_82574) {
5116                 E1000_WRITE_REG(hw, EM_EIAC, adapter->ims);
5117                 ims_mask |= adapter->ims;
5118         } 
5119         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5120 }
5121
5122 static void
5123 em_disable_intr(struct adapter *adapter)
5124 {
5125         struct e1000_hw *hw = &adapter->hw;
5126
5127         if (hw->mac.type == e1000_82574)
5128                 E1000_WRITE_REG(hw, EM_EIAC, 0);
5129         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5130 }
5131
5132 /*
5133  * Bit of a misnomer, what this really means is
5134  * to enable OS management of the system... aka
5135  * to disable special hardware management features 
5136  */
5137 static void
5138 em_init_manageability(struct adapter *adapter)
5139 {
5140         /* A shared code workaround */
5141 #define E1000_82542_MANC2H E1000_MANC2H
5142         if (adapter->has_manage) {
5143                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5144                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5145
5146                 /* disable hardware interception of ARP */
5147                 manc &= ~(E1000_MANC_ARP_EN);
5148
5149                 /* enable receiving management packets to the host */
5150                 manc |= E1000_MANC_EN_MNG2HOST;
5151 #define E1000_MNG2HOST_PORT_623 (1 << 5)
5152 #define E1000_MNG2HOST_PORT_664 (1 << 6)
5153                 manc2h |= E1000_MNG2HOST_PORT_623;
5154                 manc2h |= E1000_MNG2HOST_PORT_664;
5155                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5156                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5157         }
5158 }
5159
5160 /*
5161  * Give control back to hardware management
5162  * controller if there is one.
5163  */
5164 static void
5165 em_release_manageability(struct adapter *adapter)
5166 {
5167         if (adapter->has_manage) {
5168                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5169
5170                 /* re-enable hardware interception of ARP */
5171                 manc |= E1000_MANC_ARP_EN;
5172                 manc &= ~E1000_MANC_EN_MNG2HOST;
5173
5174                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5175         }
5176 }
5177
5178 /*
5179  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5180  * For ASF and Pass Through versions of f/w this means
5181  * that the driver is loaded. For AMT version type f/w
5182  * this means that the network i/f is open.
5183  */
5184 static void
5185 em_get_hw_control(struct adapter *adapter)
5186 {
5187         u32 ctrl_ext, swsm;
5188
5189         if (adapter->hw.mac.type == e1000_82573) {
5190                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5191                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5192                     swsm | E1000_SWSM_DRV_LOAD);
5193                 return;
5194         }
5195         /* else */
5196         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5197         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5198             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5199         return;
5200 }
5201
5202 /*
5203  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5204  * For ASF and Pass Through versions of f/w this means that
5205  * the driver is no longer loaded. For AMT versions of the
5206  * f/w this means that the network i/f is closed.
5207  */
5208 static void
5209 em_release_hw_control(struct adapter *adapter)
5210 {
5211         u32 ctrl_ext, swsm;
5212
5213         if (!adapter->has_manage)
5214                 return;
5215
5216         if (adapter->hw.mac.type == e1000_82573) {
5217                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5218                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5219                     swsm & ~E1000_SWSM_DRV_LOAD);
5220                 return;
5221         }
5222         /* else */
5223         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5224         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5225             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5226         return;
5227 }
5228
5229 static int
5230 em_is_valid_ether_addr(u8 *addr)
5231 {
5232         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5233
5234         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5235                 return (FALSE);
5236         }
5237
5238         return (TRUE);
5239 }
5240
5241 /*
5242 ** Parse the interface capabilities with regard
5243 ** to both system management and wake-on-lan for
5244 ** later use.
5245 */
5246 static void
5247 em_get_wakeup(device_t dev)
5248 {
5249         struct adapter  *adapter = device_get_softc(dev);
5250         u16             eeprom_data = 0, device_id, apme_mask;
5251
5252         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5253         apme_mask = EM_EEPROM_APME;
5254
5255         switch (adapter->hw.mac.type) {
5256         case e1000_82573:
5257         case e1000_82583:
5258                 adapter->has_amt = TRUE;
5259                 /* Falls thru */
5260         case e1000_82571:
5261         case e1000_82572:
5262         case e1000_80003es2lan:
5263                 if (adapter->hw.bus.func == 1) {
5264                         e1000_read_nvm(&adapter->hw,
5265                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5266                         break;
5267                 } else
5268                         e1000_read_nvm(&adapter->hw,
5269                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5270                 break;
5271         case e1000_ich8lan:
5272         case e1000_ich9lan:
5273         case e1000_ich10lan:
5274         case e1000_pchlan:
5275         case e1000_pch2lan:
5276                 apme_mask = E1000_WUC_APME;
5277                 adapter->has_amt = TRUE;
5278                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5279                 break;
5280         default:
5281                 e1000_read_nvm(&adapter->hw,
5282                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5283                 break;
5284         }
5285         if (eeprom_data & apme_mask)
5286                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5287         /*
5288          * We have the eeprom settings, now apply the special cases
5289          * where the eeprom may be wrong or the board won't support
5290          * wake on lan on a particular port
5291          */
5292         device_id = pci_get_device(dev);
5293         switch (device_id) {
5294         case E1000_DEV_ID_82571EB_FIBER:
5295                 /* Wake events only supported on port A for dual fiber
5296                  * regardless of eeprom setting */
5297                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5298                     E1000_STATUS_FUNC_1)
5299                         adapter->wol = 0;
5300                 break;
5301         case E1000_DEV_ID_82571EB_QUAD_COPPER:
5302         case E1000_DEV_ID_82571EB_QUAD_FIBER:
5303         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5304                 /* if quad port adapter, disable WoL on all but port A */
5305                 if (global_quad_port_a != 0)
5306                         adapter->wol = 0;
5307                 /* Reset for multiple quad port adapters */
5308                 if (++global_quad_port_a == 4)
5309                         global_quad_port_a = 0;
5310                 break;
5311         }
5312         return;
5313 }
5314
5315
5316 /*
5317  * Enable PCI Wake On Lan capability
5318  */
5319 static void
5320 em_enable_wakeup(device_t dev)
5321 {
5322         struct adapter  *adapter = device_get_softc(dev);
5323         if_t ifp = adapter->ifp;
5324         u32             pmc, ctrl, ctrl_ext, rctl;
5325         u16             status;
5326
5327         if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
5328                 return;
5329
5330         /* Advertise the wakeup capability */
5331         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5332         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5333         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5334         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5335
5336         if ((adapter->hw.mac.type == e1000_ich8lan) ||
5337             (adapter->hw.mac.type == e1000_pchlan) ||
5338             (adapter->hw.mac.type == e1000_ich9lan) ||
5339             (adapter->hw.mac.type == e1000_ich10lan))
5340                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
5341
5342         /* Keep the laser running on Fiber adapters */
5343         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5344             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5345                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5346                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5347                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5348         }
5349
5350         /*
5351         ** Determine type of Wakeup: note that wol
5352         ** is set with all bits on by default.
5353         */
5354         if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
5355                 adapter->wol &= ~E1000_WUFC_MAG;
5356
5357         if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
5358                 adapter->wol &= ~E1000_WUFC_MC;
5359         else {
5360                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5361                 rctl |= E1000_RCTL_MPE;
5362                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5363         }
5364
5365         if ((adapter->hw.mac.type == e1000_pchlan) ||
5366             (adapter->hw.mac.type == e1000_pch2lan)) {
5367                 if (em_enable_phy_wakeup(adapter))
5368                         return;
5369         } else {
5370                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5371                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5372         }
5373
5374         if (adapter->hw.phy.type == e1000_phy_igp_3)
5375                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5376
5377         /* Request PME */
5378         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5379         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5380         if (if_getcapenable(ifp) & IFCAP_WOL)
5381                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5382         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5383
5384         return;
5385 }
5386
5387 /*
5388 ** WOL in the newer chipset interfaces (pchlan)
5389 ** require thing to be copied into the phy
5390 */
5391 static int
5392 em_enable_phy_wakeup(struct adapter *adapter)
5393 {
5394         struct e1000_hw *hw = &adapter->hw;
5395         u32 mreg, ret = 0;
5396         u16 preg;
5397
5398         /* copy MAC RARs to PHY RARs */
5399         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5400
5401         /* copy MAC MTA to PHY MTA */
5402         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5403                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5404                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5405                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5406                     (u16)((mreg >> 16) & 0xFFFF));
5407         }
5408
5409         /* configure PHY Rx Control register */
5410         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5411         mreg = E1000_READ_REG(hw, E1000_RCTL);
5412         if (mreg & E1000_RCTL_UPE)
5413                 preg |= BM_RCTL_UPE;
5414         if (mreg & E1000_RCTL_MPE)
5415                 preg |= BM_RCTL_MPE;
5416         preg &= ~(BM_RCTL_MO_MASK);
5417         if (mreg & E1000_RCTL_MO_3)
5418                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5419                                 << BM_RCTL_MO_SHIFT);
5420         if (mreg & E1000_RCTL_BAM)
5421                 preg |= BM_RCTL_BAM;
5422         if (mreg & E1000_RCTL_PMCF)
5423                 preg |= BM_RCTL_PMCF;
5424         mreg = E1000_READ_REG(hw, E1000_CTRL);
5425         if (mreg & E1000_CTRL_RFCE)
5426                 preg |= BM_RCTL_RFCE;
5427         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5428
5429         /* enable PHY wakeup in MAC register */
5430         E1000_WRITE_REG(hw, E1000_WUC,
5431             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5432         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5433
5434         /* configure and enable PHY wakeup in PHY registers */
5435         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5436         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5437
5438         /* activate PHY wakeup */
5439         ret = hw->phy.ops.acquire(hw);
5440         if (ret) {
5441                 printf("Could not acquire PHY\n");
5442                 return ret;
5443         }
5444         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5445                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5446         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5447         if (ret) {
5448                 printf("Could not read PHY page 769\n");
5449                 goto out;
5450         }
5451         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5452         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5453         if (ret)
5454                 printf("Could not set PHY Host Wakeup bit\n");
5455 out:
5456         hw->phy.ops.release(hw);
5457
5458         return ret;
5459 }
5460
5461 static void
5462 em_led_func(void *arg, int onoff)
5463 {
5464         struct adapter  *adapter = arg;
5465  
5466         EM_CORE_LOCK(adapter);
5467         if (onoff) {
5468                 e1000_setup_led(&adapter->hw);
5469                 e1000_led_on(&adapter->hw);
5470         } else {
5471                 e1000_led_off(&adapter->hw);
5472                 e1000_cleanup_led(&adapter->hw);
5473         }
5474         EM_CORE_UNLOCK(adapter);
5475 }
5476
5477 /*
5478 ** Disable the L0S and L1 LINK states
5479 */
5480 static void
5481 em_disable_aspm(struct adapter *adapter)
5482 {
5483         int             base, reg;
5484         u16             link_cap,link_ctrl;
5485         device_t        dev = adapter->dev;
5486
5487         switch (adapter->hw.mac.type) {
5488                 case e1000_82573:
5489                 case e1000_82574:
5490                 case e1000_82583:
5491                         break;
5492                 default:
5493                         return;
5494         }
5495         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5496                 return;
5497         reg = base + PCIER_LINK_CAP;
5498         link_cap = pci_read_config(dev, reg, 2);
5499         if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5500                 return;
5501         reg = base + PCIER_LINK_CTL;
5502         link_ctrl = pci_read_config(dev, reg, 2);
5503         link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5504         pci_write_config(dev, reg, link_ctrl, 2);
5505         return;
5506 }
5507
5508 /**********************************************************************
5509  *
5510  *  Update the board statistics counters.
5511  *
5512  **********************************************************************/
5513 static void
5514 em_update_stats_counters(struct adapter *adapter)
5515 {
5516
5517         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5518            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5519                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5520                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5521         }
5522         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5523         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5524         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5525         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5526
5527         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5528         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5529         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5530         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5531         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5532         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5533         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5534         adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5535         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5536         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5537         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5538         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5539         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5540         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5541         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5542         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5543         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5544         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5545         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5546         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5547
5548         /* For the 64-bit byte counters the low dword must be read first. */
5549         /* Both registers clear on the read of the high dword */
5550
5551         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5552             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5553         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5554             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5555
5556         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5557         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5558         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5559         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5560         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5561
5562         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5563         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5564
5565         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5566         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5567         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5568         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5569         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5570         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5571         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5572         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5573         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5574         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5575
5576         /* Interrupt Counts */
5577
5578         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5579         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5580         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5581         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5582         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5583         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5584         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5585         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5586         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5587
5588         if (adapter->hw.mac.type >= e1000_82543) {
5589                 adapter->stats.algnerrc += 
5590                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5591                 adapter->stats.rxerrc += 
5592                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5593                 adapter->stats.tncrs += 
5594                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5595                 adapter->stats.cexterr += 
5596                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5597                 adapter->stats.tsctc += 
5598                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5599                 adapter->stats.tsctfc += 
5600                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5601         }
5602 }
5603
5604 static uint64_t
5605 em_get_counter(if_t ifp, ift_counter cnt)
5606 {
5607         struct adapter *adapter;
5608
5609         adapter = if_getsoftc(ifp);
5610
5611         switch (cnt) {
5612         case IFCOUNTER_COLLISIONS:
5613                 return (adapter->stats.colc);
5614         case IFCOUNTER_IERRORS:
5615                 return (adapter->dropped_pkts + adapter->stats.rxerrc +
5616                     adapter->stats.crcerrs + adapter->stats.algnerrc +
5617                     adapter->stats.ruc + adapter->stats.roc +
5618                     adapter->stats.mpc + adapter->stats.cexterr);
5619         case IFCOUNTER_OERRORS:
5620                 return (adapter->stats.ecol + adapter->stats.latecol +
5621                     adapter->watchdog_events);
5622         default:
5623                 return (if_get_counter_default(ifp, cnt));
5624         }
5625 }
5626
5627 /* Export a single 32-bit register via a read-only sysctl. */
5628 static int
5629 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5630 {
5631         struct adapter *adapter;
5632         u_int val;
5633
5634         adapter = oidp->oid_arg1;
5635         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5636         return (sysctl_handle_int(oidp, &val, 0, req));
5637 }
5638
5639 /*
5640  * Add sysctl variables, one per statistic, to the system.
5641  */
5642 static void
5643 em_add_hw_stats(struct adapter *adapter)
5644 {
5645         device_t dev = adapter->dev;
5646
5647         struct tx_ring *txr = adapter->tx_rings;
5648         struct rx_ring *rxr = adapter->rx_rings;
5649
5650         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5651         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5652         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5653         struct e1000_hw_stats *stats = &adapter->stats;
5654
5655         struct sysctl_oid *stat_node, *queue_node, *int_node;
5656         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5657
5658 #define QUEUE_NAME_LEN 32
5659         char namebuf[QUEUE_NAME_LEN];
5660         
5661         /* Driver Statistics */
5662         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5663                         CTLFLAG_RD, &adapter->dropped_pkts,
5664                         "Driver dropped packets");
5665         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5666                         CTLFLAG_RD, &adapter->link_irq,
5667                         "Link MSIX IRQ Handled");
5668         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", 
5669                          CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5670                          "Defragmenting mbuf chain failed");
5671         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5672                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5673                         "Driver tx dma failure in xmit");
5674         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5675                         CTLFLAG_RD, &adapter->rx_overruns,
5676                         "RX overruns");
5677         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5678                         CTLFLAG_RD, &adapter->watchdog_events,
5679                         "Watchdog timeouts");
5680         
5681         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5682                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5683                         em_sysctl_reg_handler, "IU",
5684                         "Device Control Register");
5685         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5686                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5687                         em_sysctl_reg_handler, "IU",
5688                         "Receiver Control Register");
5689         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5690                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5691                         "Flow Control High Watermark");
5692         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5693                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5694                         "Flow Control Low Watermark");
5695
5696         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5697                 snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5698                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5699                                             CTLFLAG_RD, NULL, "TX Queue Name");
5700                 queue_list = SYSCTL_CHILDREN(queue_node);
5701
5702                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5703                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5704                                 E1000_TDH(txr->me),
5705                                 em_sysctl_reg_handler, "IU",
5706                                 "Transmit Descriptor Head");
5707                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5708                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5709                                 E1000_TDT(txr->me),
5710                                 em_sysctl_reg_handler, "IU",
5711                                 "Transmit Descriptor Tail");
5712                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5713                                 CTLFLAG_RD, &txr->tx_irq,
5714                                 "Queue MSI-X Transmit Interrupts");
5715                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5716                                 CTLFLAG_RD, &txr->no_desc_avail,
5717                                 "Queue No Descriptor Available");
5718
5719                 snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5720                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5721                                             CTLFLAG_RD, NULL, "RX Queue Name");
5722                 queue_list = SYSCTL_CHILDREN(queue_node);
5723
5724                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5725                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5726                                 E1000_RDH(rxr->me),
5727                                 em_sysctl_reg_handler, "IU",
5728                                 "Receive Descriptor Head");
5729                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5730                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5731                                 E1000_RDT(rxr->me),
5732                                 em_sysctl_reg_handler, "IU",
5733                                 "Receive Descriptor Tail");
5734                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5735                                 CTLFLAG_RD, &rxr->rx_irq,
5736                                 "Queue MSI-X Receive Interrupts");
5737         }
5738
5739         /* MAC stats get their own sub node */
5740
5741         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5742                                     CTLFLAG_RD, NULL, "Statistics");
5743         stat_list = SYSCTL_CHILDREN(stat_node);
5744
5745         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5746                         CTLFLAG_RD, &stats->ecol,
5747                         "Excessive collisions");
5748         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5749                         CTLFLAG_RD, &stats->scc,
5750                         "Single collisions");
5751         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5752                         CTLFLAG_RD, &stats->mcc,
5753                         "Multiple collisions");
5754         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5755                         CTLFLAG_RD, &stats->latecol,
5756                         "Late collisions");
5757         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5758                         CTLFLAG_RD, &stats->colc,
5759                         "Collision Count");
5760         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5761                         CTLFLAG_RD, &adapter->stats.symerrs,
5762                         "Symbol Errors");
5763         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5764                         CTLFLAG_RD, &adapter->stats.sec,
5765                         "Sequence Errors");
5766         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5767                         CTLFLAG_RD, &adapter->stats.dc,
5768                         "Defer Count");
5769         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5770                         CTLFLAG_RD, &adapter->stats.mpc,
5771                         "Missed Packets");
5772         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5773                         CTLFLAG_RD, &adapter->stats.rnbc,
5774                         "Receive No Buffers");
5775         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5776                         CTLFLAG_RD, &adapter->stats.ruc,
5777                         "Receive Undersize");
5778         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5779                         CTLFLAG_RD, &adapter->stats.rfc,
5780                         "Fragmented Packets Received ");
5781         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5782                         CTLFLAG_RD, &adapter->stats.roc,
5783                         "Oversized Packets Received");
5784         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5785                         CTLFLAG_RD, &adapter->stats.rjc,
5786                         "Recevied Jabber");
5787         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5788                         CTLFLAG_RD, &adapter->stats.rxerrc,
5789                         "Receive Errors");
5790         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5791                         CTLFLAG_RD, &adapter->stats.crcerrs,
5792                         "CRC errors");
5793         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5794                         CTLFLAG_RD, &adapter->stats.algnerrc,
5795                         "Alignment Errors");
5796         /* On 82575 these are collision counts */
5797         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5798                         CTLFLAG_RD, &adapter->stats.cexterr,
5799                         "Collision/Carrier extension errors");
5800         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5801                         CTLFLAG_RD, &adapter->stats.xonrxc,
5802                         "XON Received");
5803         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5804                         CTLFLAG_RD, &adapter->stats.xontxc,
5805                         "XON Transmitted");
5806         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5807                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5808                         "XOFF Received");
5809         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5810                         CTLFLAG_RD, &adapter->stats.xofftxc,
5811                         "XOFF Transmitted");
5812
5813         /* Packet Reception Stats */
5814         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5815                         CTLFLAG_RD, &adapter->stats.tpr,
5816                         "Total Packets Received ");
5817         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5818                         CTLFLAG_RD, &adapter->stats.gprc,
5819                         "Good Packets Received");
5820         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5821                         CTLFLAG_RD, &adapter->stats.bprc,
5822                         "Broadcast Packets Received");
5823         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5824                         CTLFLAG_RD, &adapter->stats.mprc,
5825                         "Multicast Packets Received");
5826         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5827                         CTLFLAG_RD, &adapter->stats.prc64,
5828                         "64 byte frames received ");
5829         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5830                         CTLFLAG_RD, &adapter->stats.prc127,
5831                         "65-127 byte frames received");
5832         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5833                         CTLFLAG_RD, &adapter->stats.prc255,
5834                         "128-255 byte frames received");
5835         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5836                         CTLFLAG_RD, &adapter->stats.prc511,
5837                         "256-511 byte frames received");
5838         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5839                         CTLFLAG_RD, &adapter->stats.prc1023,
5840                         "512-1023 byte frames received");
5841         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5842                         CTLFLAG_RD, &adapter->stats.prc1522,
5843                         "1023-1522 byte frames received");
5844         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5845                         CTLFLAG_RD, &adapter->stats.gorc, 
5846                         "Good Octets Received"); 
5847
5848         /* Packet Transmission Stats */
5849         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5850                         CTLFLAG_RD, &adapter->stats.gotc, 
5851                         "Good Octets Transmitted"); 
5852         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5853                         CTLFLAG_RD, &adapter->stats.tpt,
5854                         "Total Packets Transmitted");
5855         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5856                         CTLFLAG_RD, &adapter->stats.gptc,
5857                         "Good Packets Transmitted");
5858         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5859                         CTLFLAG_RD, &adapter->stats.bptc,
5860                         "Broadcast Packets Transmitted");
5861         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5862                         CTLFLAG_RD, &adapter->stats.mptc,
5863                         "Multicast Packets Transmitted");
5864         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5865                         CTLFLAG_RD, &adapter->stats.ptc64,
5866                         "64 byte frames transmitted ");
5867         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5868                         CTLFLAG_RD, &adapter->stats.ptc127,
5869                         "65-127 byte frames transmitted");
5870         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5871                         CTLFLAG_RD, &adapter->stats.ptc255,
5872                         "128-255 byte frames transmitted");
5873         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5874                         CTLFLAG_RD, &adapter->stats.ptc511,
5875                         "256-511 byte frames transmitted");
5876         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5877                         CTLFLAG_RD, &adapter->stats.ptc1023,
5878                         "512-1023 byte frames transmitted");
5879         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5880                         CTLFLAG_RD, &adapter->stats.ptc1522,
5881                         "1024-1522 byte frames transmitted");
5882         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5883                         CTLFLAG_RD, &adapter->stats.tsctc,
5884                         "TSO Contexts Transmitted");
5885         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5886                         CTLFLAG_RD, &adapter->stats.tsctfc,
5887                         "TSO Contexts Failed");
5888
5889
5890         /* Interrupt Stats */
5891
5892         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5893                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5894         int_list = SYSCTL_CHILDREN(int_node);
5895
5896         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5897                         CTLFLAG_RD, &adapter->stats.iac,
5898                         "Interrupt Assertion Count");
5899
5900         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5901                         CTLFLAG_RD, &adapter->stats.icrxptc,
5902                         "Interrupt Cause Rx Pkt Timer Expire Count");
5903
5904         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5905                         CTLFLAG_RD, &adapter->stats.icrxatc,
5906                         "Interrupt Cause Rx Abs Timer Expire Count");
5907
5908         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5909                         CTLFLAG_RD, &adapter->stats.ictxptc,
5910                         "Interrupt Cause Tx Pkt Timer Expire Count");
5911
5912         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5913                         CTLFLAG_RD, &adapter->stats.ictxatc,
5914                         "Interrupt Cause Tx Abs Timer Expire Count");
5915
5916         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5917                         CTLFLAG_RD, &adapter->stats.ictxqec,
5918                         "Interrupt Cause Tx Queue Empty Count");
5919
5920         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5921                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5922                         "Interrupt Cause Tx Queue Min Thresh Count");
5923
5924         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5925                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5926                         "Interrupt Cause Rx Desc Min Thresh Count");
5927
5928         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5929                         CTLFLAG_RD, &adapter->stats.icrxoc,
5930                         "Interrupt Cause Receiver Overrun Count");
5931 }
5932
5933 /**********************************************************************
5934  *
5935  *  This routine provides a way to dump out the adapter eeprom,
5936  *  often a useful debug/service tool. This only dumps the first
5937  *  32 words, stuff that matters is in that extent.
5938  *
5939  **********************************************************************/
5940 static int
5941 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5942 {
5943         struct adapter *adapter = (struct adapter *)arg1;
5944         int error;
5945         int result;
5946
5947         result = -1;
5948         error = sysctl_handle_int(oidp, &result, 0, req);
5949
5950         if (error || !req->newptr)
5951                 return (error);
5952
5953         /*
5954          * This value will cause a hex dump of the
5955          * first 32 16-bit words of the EEPROM to
5956          * the screen.
5957          */
5958         if (result == 1)
5959                 em_print_nvm_info(adapter);
5960
5961         return (error);
5962 }
5963
5964 static void
5965 em_print_nvm_info(struct adapter *adapter)
5966 {
5967         u16     eeprom_data;
5968         int     i, j, row = 0;
5969
5970         /* Its a bit crude, but it gets the job done */
5971         printf("\nInterface EEPROM Dump:\n");
5972         printf("Offset\n0x0000  ");
5973         for (i = 0, j = 0; i < 32; i++, j++) {
5974                 if (j == 8) { /* Make the offset block */
5975                         j = 0; ++row;
5976                         printf("\n0x00%x0  ",row);
5977                 }
5978                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5979                 printf("%04x ", eeprom_data);
5980         }
5981         printf("\n");
5982 }
5983
5984 static int
5985 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5986 {
5987         struct em_int_delay_info *info;
5988         struct adapter *adapter;
5989         u32 regval;
5990         int error, usecs, ticks;
5991
5992         info = (struct em_int_delay_info *)arg1;
5993         usecs = info->value;
5994         error = sysctl_handle_int(oidp, &usecs, 0, req);
5995         if (error != 0 || req->newptr == NULL)
5996                 return (error);
5997         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5998                 return (EINVAL);
5999         info->value = usecs;
6000         ticks = EM_USECS_TO_TICKS(usecs);
6001         if (info->offset == E1000_ITR)  /* units are 256ns here */
6002                 ticks *= 4;
6003
6004         adapter = info->adapter;
6005         
6006         EM_CORE_LOCK(adapter);
6007         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
6008         regval = (regval & ~0xffff) | (ticks & 0xffff);
6009         /* Handle a few special cases. */
6010         switch (info->offset) {
6011         case E1000_RDTR:
6012                 break;
6013         case E1000_TIDV:
6014                 if (ticks == 0) {
6015                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6016                         /* Don't write 0 into the TIDV register. */
6017                         regval++;
6018                 } else
6019                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6020                 break;
6021         }
6022         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6023         EM_CORE_UNLOCK(adapter);
6024         return (0);
6025 }
6026
6027 static void
6028 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6029         const char *description, struct em_int_delay_info *info,
6030         int offset, int value)
6031 {
6032         info->adapter = adapter;
6033         info->offset = offset;
6034         info->value = value;
6035         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6036             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6037             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6038             info, 0, em_sysctl_int_delay, "I", description);
6039 }
6040
6041 static void
6042 em_set_sysctl_value(struct adapter *adapter, const char *name,
6043         const char *description, int *limit, int value)
6044 {
6045         *limit = value;
6046         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6047             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6048             OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6049 }
6050
6051
6052 /*
6053 ** Set flow control using sysctl:
6054 ** Flow control values:
6055 **      0 - off
6056 **      1 - rx pause
6057 **      2 - tx pause
6058 **      3 - full
6059 */
6060 static int
6061 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6062 {       
6063         int             error;
6064         static int      input = 3; /* default is full */
6065         struct adapter  *adapter = (struct adapter *) arg1;
6066                     
6067         error = sysctl_handle_int(oidp, &input, 0, req);
6068     
6069         if ((error) || (req->newptr == NULL))
6070                 return (error);
6071                 
6072         if (input == adapter->fc) /* no change? */
6073                 return (error);
6074
6075         switch (input) {
6076                 case e1000_fc_rx_pause:
6077                 case e1000_fc_tx_pause:
6078                 case e1000_fc_full:
6079                 case e1000_fc_none:
6080                         adapter->hw.fc.requested_mode = input;
6081                         adapter->fc = input;
6082                         break;
6083                 default:
6084                         /* Do nothing */
6085                         return (error);
6086         }
6087
6088         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6089         e1000_force_mac_fc(&adapter->hw);
6090         return (error);
6091 }
6092
6093 /*
6094 ** Manage Energy Efficient Ethernet:
6095 ** Control values:
6096 **     0/1 - enabled/disabled
6097 */
6098 static int
6099 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6100 {
6101        struct adapter *adapter = (struct adapter *) arg1;
6102        int             error, value;
6103
6104        value = adapter->hw.dev_spec.ich8lan.eee_disable;
6105        error = sysctl_handle_int(oidp, &value, 0, req);
6106        if (error || req->newptr == NULL)
6107                return (error);
6108        EM_CORE_LOCK(adapter);
6109        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6110        em_init_locked(adapter);
6111        EM_CORE_UNLOCK(adapter);
6112        return (0);
6113 }
6114
6115 static int
6116 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6117 {
6118         struct adapter *adapter;
6119         int error;
6120         int result;
6121
6122         result = -1;
6123         error = sysctl_handle_int(oidp, &result, 0, req);
6124
6125         if (error || !req->newptr)
6126                 return (error);
6127
6128         if (result == 1) {
6129                 adapter = (struct adapter *)arg1;
6130                 em_print_debug_info(adapter);
6131         }
6132
6133         return (error);
6134 }
6135
6136 /*
6137 ** This routine is meant to be fluid, add whatever is
6138 ** needed for debugging a problem.  -jfv
6139 */
6140 static void
6141 em_print_debug_info(struct adapter *adapter)
6142 {
6143         device_t dev = adapter->dev;
6144         struct tx_ring *txr = adapter->tx_rings;
6145         struct rx_ring *rxr = adapter->rx_rings;
6146
6147         if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
6148                 printf("Interface is RUNNING ");
6149         else
6150                 printf("Interface is NOT RUNNING\n");
6151
6152         if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
6153                 printf("and INACTIVE\n");
6154         else
6155                 printf("and ACTIVE\n");
6156
6157         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6158                 device_printf(dev, "TX Queue %d ------\n", i);
6159                 device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6160                         E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6161                         E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6162                 device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6163                 device_printf(dev, "TX descriptors avail = %d\n",
6164                         txr->tx_avail);
6165                 device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6166                         txr->no_desc_avail);
6167                 device_printf(dev, "RX Queue %d ------\n", i);
6168                 device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6169                         E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6170                         E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6171                 device_printf(dev, "RX discarded packets = %ld\n",
6172                         rxr->rx_discarded);
6173                 device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6174                 device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6175         }
6176 }
6177
6178 #ifdef EM_MULTIQUEUE
6179 /*
6180  * 82574 only:
6181  * Write a new value to the EEPROM increasing the number of MSIX
6182  * vectors from 3 to 5, for proper multiqueue support.
6183  */
6184 static void
6185 em_enable_vectors_82574(struct adapter *adapter)
6186 {
6187         struct e1000_hw *hw = &adapter->hw;
6188         device_t dev = adapter->dev;
6189         u16 edata;
6190
6191         e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6192         printf("Current cap: %#06x\n", edata);
6193         if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6194                 device_printf(dev, "Writing to eeprom: increasing "
6195                     "reported MSIX vectors from 3 to 5...\n");
6196                 edata &= ~(EM_NVM_MSIX_N_MASK);
6197                 edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6198                 e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6199                 e1000_update_nvm_checksum(hw);
6200                 device_printf(dev, "Writing to eeprom: done\n");
6201         }
6202 }
6203 #endif
6204
6205 #ifdef DDB
6206 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6207 {
6208         devclass_t      dc;
6209         int max_em;
6210
6211         dc = devclass_find("em");
6212         max_em = devclass_get_maxunit(dc);
6213
6214         for (int index = 0; index < (max_em - 1); index++) {
6215                 device_t dev;
6216                 dev = devclass_get_device(dc, index);
6217                 if (device_get_driver(dev) == &em_driver) {
6218                         struct adapter *adapter = device_get_softc(dev);
6219                         EM_CORE_LOCK(adapter);
6220                         em_init_locked(adapter);
6221                         EM_CORE_UNLOCK(adapter);
6222                 }
6223         }
6224 }
6225 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6226 {
6227         devclass_t      dc;
6228         int max_em;
6229
6230         dc = devclass_find("em");
6231         max_em = devclass_get_maxunit(dc);
6232
6233         for (int index = 0; index < (max_em - 1); index++) {
6234                 device_t dev;
6235                 dev = devclass_get_device(dc, index);
6236                 if (device_get_driver(dev) == &em_driver)
6237                         em_print_debug_info(device_get_softc(dev));
6238         }
6239
6240 }
6241 #endif