]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/e1000/if_em.c
Restructure libz, place vendor files in contrib/zlib like other third
[FreeBSD/FreeBSD.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #include "opt_em.h"
36 #include "opt_ddb.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
42 #endif
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #ifdef DDB
47 #include <sys/types.h>
48 #include <ddb/ddb.h>
49 #endif
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
52 #endif
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/smp.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
69
70 #include <net/bpf.h>
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/if_var.h>
74 #include <net/if_arp.h>
75 #include <net/if_dl.h>
76 #include <net/if_media.h>
77
78 #include <net/if_types.h>
79 #include <net/if_vlan_var.h>
80
81 #include <netinet/in_systm.h>
82 #include <netinet/in.h>
83 #include <netinet/if_ether.h>
84 #include <netinet/ip.h>
85 #include <netinet/ip6.h>
86 #include <netinet/tcp.h>
87 #include <netinet/udp.h>
88
89 #include <machine/in_cksum.h>
90 #include <dev/led/led.h>
91 #include <dev/pci/pcivar.h>
92 #include <dev/pci/pcireg.h>
93
94 #include "e1000_api.h"
95 #include "e1000_82571.h"
96 #include "if_em.h"
97
98 /*********************************************************************
99  *  Driver version:
100  *********************************************************************/
101 char em_driver_version[] = "7.6.1-k";
102
103 /*********************************************************************
104  *  PCI Device ID Table
105  *
106  *  Used by probe to select devices to load on
107  *  Last field stores an index into e1000_strings
108  *  Last entry must be all 0s
109  *
110  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
111  *********************************************************************/
112
113 static em_vendor_info_t em_vendor_info_array[] =
114 {
115         /* Intel(R) PRO/1000 Network Connection */
116         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
117         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
119         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
120                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
122                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
124                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
126                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
128                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
130                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
135
136         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
141                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
143                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
145                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
147                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
175         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
176         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
177         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
178         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
179         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
180         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
181         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
182                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
183         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
184                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
185         { 0x8086, E1000_DEV_ID_PCH_I218_LM2,    PCI_ANY_ID, PCI_ANY_ID, 0},
186         { 0x8086, E1000_DEV_ID_PCH_I218_V2,     PCI_ANY_ID, PCI_ANY_ID, 0},
187         { 0x8086, E1000_DEV_ID_PCH_I218_LM3,    PCI_ANY_ID, PCI_ANY_ID, 0},
188         { 0x8086, E1000_DEV_ID_PCH_I218_V3,     PCI_ANY_ID, PCI_ANY_ID, 0},
189         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
190         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
191         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
192                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
193         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
194         { 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
195                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
196         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
197                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
198         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
199         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
200                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
201         { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
202         /* required last entry */
203         { 0, 0, 0, 0, 0}
204 };
205
206 /*********************************************************************
207  *  Table of branding strings for all supported NICs.
208  *********************************************************************/
209
210 static char *em_strings[] = {
211         "Intel(R) PRO/1000 Network Connection"
212 };
213
214 /*********************************************************************
215  *  Function prototypes
216  *********************************************************************/
217 static int      em_probe(device_t);
218 static int      em_attach(device_t);
219 static int      em_detach(device_t);
220 static int      em_shutdown(device_t);
221 static int      em_suspend(device_t);
222 static int      em_resume(device_t);
223 #ifdef EM_MULTIQUEUE
224 static int      em_mq_start(if_t, struct mbuf *);
225 static int      em_mq_start_locked(if_t,
226                     struct tx_ring *);
227 static void     em_qflush(if_t);
228 #else
229 static void     em_start(if_t);
230 static void     em_start_locked(if_t, struct tx_ring *);
231 #endif
232 static int      em_ioctl(if_t, u_long, caddr_t);
233 static uint64_t em_get_counter(if_t, ift_counter);
234 static void     em_init(void *);
235 static void     em_init_locked(struct adapter *);
236 static void     em_stop(void *);
237 static void     em_media_status(if_t, struct ifmediareq *);
238 static int      em_media_change(if_t);
239 static void     em_identify_hardware(struct adapter *);
240 static int      em_allocate_pci_resources(struct adapter *);
241 static int      em_allocate_legacy(struct adapter *);
242 static int      em_allocate_msix(struct adapter *);
243 static int      em_allocate_queues(struct adapter *);
244 static int      em_setup_msix(struct adapter *);
245 static void     em_free_pci_resources(struct adapter *);
246 static void     em_local_timer(void *);
247 static void     em_reset(struct adapter *);
248 static int      em_setup_interface(device_t, struct adapter *);
249 static void     em_flush_desc_rings(struct adapter *);
250
251 static void     em_setup_transmit_structures(struct adapter *);
252 static void     em_initialize_transmit_unit(struct adapter *);
253 static int      em_allocate_transmit_buffers(struct tx_ring *);
254 static void     em_free_transmit_structures(struct adapter *);
255 static void     em_free_transmit_buffers(struct tx_ring *);
256
257 static int      em_setup_receive_structures(struct adapter *);
258 static int      em_allocate_receive_buffers(struct rx_ring *);
259 static void     em_initialize_receive_unit(struct adapter *);
260 static void     em_free_receive_structures(struct adapter *);
261 static void     em_free_receive_buffers(struct rx_ring *);
262
263 static void     em_enable_intr(struct adapter *);
264 static void     em_disable_intr(struct adapter *);
265 static void     em_update_stats_counters(struct adapter *);
266 static void     em_add_hw_stats(struct adapter *adapter);
267 static void     em_txeof(struct tx_ring *);
268 static bool     em_rxeof(struct rx_ring *, int, int *);
269 #ifndef __NO_STRICT_ALIGNMENT
270 static int      em_fixup_rx(struct rx_ring *);
271 #endif
272 static void     em_setup_rxdesc(union e1000_rx_desc_extended *,
273                     const struct em_rxbuffer *rxbuf);
274 static void     em_receive_checksum(uint32_t status, struct mbuf *);
275 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
276                     struct ip *, u32 *, u32 *);
277 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
278                     struct tcphdr *, u32 *, u32 *);
279 static void     em_set_promisc(struct adapter *);
280 static void     em_disable_promisc(struct adapter *);
281 static void     em_set_multi(struct adapter *);
282 static void     em_update_link_status(struct adapter *);
283 static void     em_refresh_mbufs(struct rx_ring *, int);
284 static void     em_register_vlan(void *, if_t, u16);
285 static void     em_unregister_vlan(void *, if_t, u16);
286 static void     em_setup_vlan_hw_support(struct adapter *);
287 static int      em_xmit(struct tx_ring *, struct mbuf **);
288 static int      em_dma_malloc(struct adapter *, bus_size_t,
289                     struct em_dma_alloc *, int);
290 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
291 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
292 static void     em_print_nvm_info(struct adapter *);
293 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
294 static void     em_print_debug_info(struct adapter *);
295 static int      em_is_valid_ether_addr(u8 *);
296 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
297 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
298                     const char *, struct em_int_delay_info *, int, int);
299 /* Management and WOL Support */
300 static void     em_init_manageability(struct adapter *);
301 static void     em_release_manageability(struct adapter *);
302 static void     em_get_hw_control(struct adapter *);
303 static void     em_release_hw_control(struct adapter *);
304 static void     em_get_wakeup(device_t);
305 static void     em_enable_wakeup(device_t);
306 static int      em_enable_phy_wakeup(struct adapter *);
307 static void     em_led_func(void *, int);
308 static void     em_disable_aspm(struct adapter *);
309
310 static int      em_irq_fast(void *);
311
312 /* MSIX handlers */
313 static void     em_msix_tx(void *);
314 static void     em_msix_rx(void *);
315 static void     em_msix_link(void *);
316 static void     em_handle_tx(void *context, int pending);
317 static void     em_handle_rx(void *context, int pending);
318 static void     em_handle_link(void *context, int pending);
319
320 #ifdef EM_MULTIQUEUE
321 static void     em_enable_vectors_82574(struct adapter *);
322 #endif
323
324 static void     em_set_sysctl_value(struct adapter *, const char *,
325                     const char *, int *, int);
326 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
327 static int      em_sysctl_eee(SYSCTL_HANDLER_ARGS);
328
329 static __inline void em_rx_discard(struct rx_ring *, int);
330
331 #ifdef DEVICE_POLLING
332 static poll_handler_t em_poll;
333 #endif /* POLLING */
334
335 /*********************************************************************
336  *  FreeBSD Device Interface Entry Points
337  *********************************************************************/
338
339 static device_method_t em_methods[] = {
340         /* Device interface */
341         DEVMETHOD(device_probe, em_probe),
342         DEVMETHOD(device_attach, em_attach),
343         DEVMETHOD(device_detach, em_detach),
344         DEVMETHOD(device_shutdown, em_shutdown),
345         DEVMETHOD(device_suspend, em_suspend),
346         DEVMETHOD(device_resume, em_resume),
347         DEVMETHOD_END
348 };
349
350 static driver_t em_driver = {
351         "em", em_methods, sizeof(struct adapter),
352 };
353
354 devclass_t em_devclass;
355 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
356 MODULE_DEPEND(em, pci, 1, 1, 1);
357 MODULE_DEPEND(em, ether, 1, 1, 1);
358 #ifdef DEV_NETMAP
359 MODULE_DEPEND(em, netmap, 1, 1, 1);
360 #endif /* DEV_NETMAP */
361
362 /*********************************************************************
363  *  Tunable default values.
364  *********************************************************************/
365
366 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
367 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
368
369 #define MAX_INTS_PER_SEC        8000
370 #define DEFAULT_ITR             (1000000000/(MAX_INTS_PER_SEC * 256))
371
372 #define TSO_WORKAROUND  4
373
374 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
375
376 static int em_disable_crc_stripping = 0;
377 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
378     &em_disable_crc_stripping, 0, "Disable CRC Stripping");
379
380 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
381 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
382 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
383     0, "Default transmit interrupt delay in usecs");
384 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
385     0, "Default receive interrupt delay in usecs");
386
387 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
388 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
389 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
390     &em_tx_abs_int_delay_dflt, 0,
391     "Default transmit interrupt delay limit in usecs");
392 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
393     &em_rx_abs_int_delay_dflt, 0,
394     "Default receive interrupt delay limit in usecs");
395
396 static int em_rxd = EM_DEFAULT_RXD;
397 static int em_txd = EM_DEFAULT_TXD;
398 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
399     "Number of receive descriptors per queue");
400 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
401     "Number of transmit descriptors per queue");
402
403 static int em_smart_pwr_down = FALSE;
404 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
405     0, "Set to true to leave smart power down enabled on newer adapters");
406
407 /* Controls whether promiscuous also shows bad packets */
408 static int em_debug_sbp = FALSE;
409 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
410     "Show bad packets in promiscuous mode");
411
412 static int em_enable_msix = TRUE;
413 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
414     "Enable MSI-X interrupts");
415
416 #ifdef EM_MULTIQUEUE
417 static int em_num_queues = 1;
418 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
419     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
420 #endif
421
422 /*
423 ** Global variable to store last used CPU when binding queues
424 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
425 ** queue is bound to a cpu.
426 */
427 static int em_last_bind_cpu = -1;
428
429 /* How many packets rxeof tries to clean at a time */
430 static int em_rx_process_limit = 100;
431 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
432     &em_rx_process_limit, 0,
433     "Maximum number of received packets to process "
434     "at a time, -1 means unlimited");
435
436 /* Energy efficient ethernet - default to OFF */
437 static int eee_setting = 1;
438 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
439     "Enable Energy Efficient Ethernet");
440
441 /* Global used in WOL setup with multiport cards */
442 static int global_quad_port_a = 0;
443
444 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
445 #include <dev/netmap/if_em_netmap.h>
446 #endif /* DEV_NETMAP */
447
448 /*********************************************************************
449  *  Device identification routine
450  *
451  *  em_probe determines if the driver should be loaded on
452  *  adapter based on PCI vendor/device id of the adapter.
453  *
454  *  return BUS_PROBE_DEFAULT on success, positive on failure
455  *********************************************************************/
456
457 static int
458 em_probe(device_t dev)
459 {
460         char            adapter_name[60];
461         uint16_t        pci_vendor_id = 0;
462         uint16_t        pci_device_id = 0;
463         uint16_t        pci_subvendor_id = 0;
464         uint16_t        pci_subdevice_id = 0;
465         em_vendor_info_t *ent;
466
467         INIT_DEBUGOUT("em_probe: begin");
468
469         pci_vendor_id = pci_get_vendor(dev);
470         if (pci_vendor_id != EM_VENDOR_ID)
471                 return (ENXIO);
472
473         pci_device_id = pci_get_device(dev);
474         pci_subvendor_id = pci_get_subvendor(dev);
475         pci_subdevice_id = pci_get_subdevice(dev);
476
477         ent = em_vendor_info_array;
478         while (ent->vendor_id != 0) {
479                 if ((pci_vendor_id == ent->vendor_id) &&
480                     (pci_device_id == ent->device_id) &&
481
482                     ((pci_subvendor_id == ent->subvendor_id) ||
483                     (ent->subvendor_id == PCI_ANY_ID)) &&
484
485                     ((pci_subdevice_id == ent->subdevice_id) ||
486                     (ent->subdevice_id == PCI_ANY_ID))) {
487                         sprintf(adapter_name, "%s %s",
488                                 em_strings[ent->index],
489                                 em_driver_version);
490                         device_set_desc_copy(dev, adapter_name);
491                         return (BUS_PROBE_DEFAULT);
492                 }
493                 ent++;
494         }
495
496         return (ENXIO);
497 }
498
499 /*********************************************************************
500  *  Device initialization routine
501  *
502  *  The attach entry point is called when the driver is being loaded.
503  *  This routine identifies the type of hardware, allocates all resources
504  *  and initializes the hardware.
505  *
506  *  return 0 on success, positive on failure
507  *********************************************************************/
508
509 static int
510 em_attach(device_t dev)
511 {
512         struct adapter  *adapter;
513         struct e1000_hw *hw;
514         int             error = 0;
515
516         INIT_DEBUGOUT("em_attach: begin");
517
518         if (resource_disabled("em", device_get_unit(dev))) {
519                 device_printf(dev, "Disabled by device hint\n");
520                 return (ENXIO);
521         }
522
523         adapter = device_get_softc(dev);
524         adapter->dev = adapter->osdep.dev = dev;
525         hw = &adapter->hw;
526         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
527
528         /* SYSCTL stuff */
529         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
530             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
531             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
532             em_sysctl_nvm_info, "I", "NVM Information");
533
534         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
535             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
536             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
537             em_sysctl_debug_info, "I", "Debug Information");
538
539         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
540             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
541             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
542             em_set_flowcntl, "I", "Flow Control");
543
544         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
545
546         /* Determine hardware and mac info */
547         em_identify_hardware(adapter);
548
549         /* Setup PCI resources */
550         if (em_allocate_pci_resources(adapter)) {
551                 device_printf(dev, "Allocation of PCI resources failed\n");
552                 error = ENXIO;
553                 goto err_pci;
554         }
555
556         /*
557         ** For ICH8 and family we need to
558         ** map the flash memory, and this
559         ** must happen after the MAC is 
560         ** identified
561         */
562         if ((hw->mac.type == e1000_ich8lan) ||
563             (hw->mac.type == e1000_ich9lan) ||
564             (hw->mac.type == e1000_ich10lan) ||
565             (hw->mac.type == e1000_pchlan) ||
566             (hw->mac.type == e1000_pch2lan) ||
567             (hw->mac.type == e1000_pch_lpt)) {
568                 int rid = EM_BAR_TYPE_FLASH;
569                 adapter->flash = bus_alloc_resource_any(dev,
570                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
571                 if (adapter->flash == NULL) {
572                         device_printf(dev, "Mapping of Flash failed\n");
573                         error = ENXIO;
574                         goto err_pci;
575                 }
576                 /* This is used in the shared code */
577                 hw->flash_address = (u8 *)adapter->flash;
578                 adapter->osdep.flash_bus_space_tag =
579                     rman_get_bustag(adapter->flash);
580                 adapter->osdep.flash_bus_space_handle =
581                     rman_get_bushandle(adapter->flash);
582         }
583         /*
584         ** In the new SPT device flash is not  a
585         ** separate BAR, rather it is also in BAR0,
586         ** so use the same tag and an offset handle for the
587         ** FLASH read/write macros in the shared code.
588         */
589         else if (hw->mac.type == e1000_pch_spt) {
590                 adapter->osdep.flash_bus_space_tag =
591                     adapter->osdep.mem_bus_space_tag;
592                 adapter->osdep.flash_bus_space_handle =
593                     adapter->osdep.mem_bus_space_handle
594                     + E1000_FLASH_BASE_ADDR;
595         }
596
597         /* Do Shared Code initialization */
598         error = e1000_setup_init_funcs(hw, TRUE);
599         if (error) {
600                 device_printf(dev, "Setup of Shared code failed, error %d\n",
601                     error);
602                 error = ENXIO;
603                 goto err_pci;
604         }
605
606         /*
607          * Setup MSI/X or MSI if PCI Express
608          */
609         adapter->msix = em_setup_msix(adapter);
610
611         e1000_get_bus_info(hw);
612
613         /* Set up some sysctls for the tunable interrupt delays */
614         em_add_int_delay_sysctl(adapter, "rx_int_delay",
615             "receive interrupt delay in usecs", &adapter->rx_int_delay,
616             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
617         em_add_int_delay_sysctl(adapter, "tx_int_delay",
618             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
619             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
620         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
621             "receive interrupt delay limit in usecs",
622             &adapter->rx_abs_int_delay,
623             E1000_REGISTER(hw, E1000_RADV),
624             em_rx_abs_int_delay_dflt);
625         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
626             "transmit interrupt delay limit in usecs",
627             &adapter->tx_abs_int_delay,
628             E1000_REGISTER(hw, E1000_TADV),
629             em_tx_abs_int_delay_dflt);
630         em_add_int_delay_sysctl(adapter, "itr",
631             "interrupt delay limit in usecs/4",
632             &adapter->tx_itr,
633             E1000_REGISTER(hw, E1000_ITR),
634             DEFAULT_ITR);
635
636         /* Sysctl for limiting the amount of work done in the taskqueue */
637         em_set_sysctl_value(adapter, "rx_processing_limit",
638             "max number of rx packets to process", &adapter->rx_process_limit,
639             em_rx_process_limit);
640
641         /*
642          * Validate number of transmit and receive descriptors. It
643          * must not exceed hardware maximum, and must be multiple
644          * of E1000_DBA_ALIGN.
645          */
646         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
647             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
648                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
649                     EM_DEFAULT_TXD, em_txd);
650                 adapter->num_tx_desc = EM_DEFAULT_TXD;
651         } else
652                 adapter->num_tx_desc = em_txd;
653
654         if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
655             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
656                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
657                     EM_DEFAULT_RXD, em_rxd);
658                 adapter->num_rx_desc = EM_DEFAULT_RXD;
659         } else
660                 adapter->num_rx_desc = em_rxd;
661
662         hw->mac.autoneg = DO_AUTO_NEG;
663         hw->phy.autoneg_wait_to_complete = FALSE;
664         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
665
666         /* Copper options */
667         if (hw->phy.media_type == e1000_media_type_copper) {
668                 hw->phy.mdix = AUTO_ALL_MODES;
669                 hw->phy.disable_polarity_correction = FALSE;
670                 hw->phy.ms_type = EM_MASTER_SLAVE;
671         }
672
673         /*
674          * Set the frame limits assuming
675          * standard ethernet sized frames.
676          */
677         adapter->hw.mac.max_frame_size =
678             ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
679
680         /*
681          * This controls when hardware reports transmit completion
682          * status.
683          */
684         hw->mac.report_tx_early = 1;
685
686         /* 
687         ** Get queue/ring memory
688         */
689         if (em_allocate_queues(adapter)) {
690                 error = ENOMEM;
691                 goto err_pci;
692         }
693
694         /* Allocate multicast array memory. */
695         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
696             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
697         if (adapter->mta == NULL) {
698                 device_printf(dev, "Can not allocate multicast setup array\n");
699                 error = ENOMEM;
700                 goto err_late;
701         }
702
703         /* Check SOL/IDER usage */
704         if (e1000_check_reset_block(hw))
705                 device_printf(dev, "PHY reset is blocked"
706                     " due to SOL/IDER session.\n");
707
708         /* Sysctl for setting Energy Efficient Ethernet */
709         hw->dev_spec.ich8lan.eee_disable = eee_setting;
710         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
711             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
712             OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
713             adapter, 0, em_sysctl_eee, "I",
714             "Disable Energy Efficient Ethernet");
715
716         /*
717         ** Start from a known state, this is
718         ** important in reading the nvm and
719         ** mac from that.
720         */
721         e1000_reset_hw(hw);
722
723
724         /* Make sure we have a good EEPROM before we read from it */
725         if (e1000_validate_nvm_checksum(hw) < 0) {
726                 /*
727                 ** Some PCI-E parts fail the first check due to
728                 ** the link being in sleep state, call it again,
729                 ** if it fails a second time its a real issue.
730                 */
731                 if (e1000_validate_nvm_checksum(hw) < 0) {
732                         device_printf(dev,
733                             "The EEPROM Checksum Is Not Valid\n");
734                         error = EIO;
735                         goto err_late;
736                 }
737         }
738
739         /* Copy the permanent MAC address out of the EEPROM */
740         if (e1000_read_mac_addr(hw) < 0) {
741                 device_printf(dev, "EEPROM read error while reading MAC"
742                     " address\n");
743                 error = EIO;
744                 goto err_late;
745         }
746
747         if (!em_is_valid_ether_addr(hw->mac.addr)) {
748                 device_printf(dev, "Invalid MAC address\n");
749                 error = EIO;
750                 goto err_late;
751         }
752
753         /* Disable ULP support */
754         e1000_disable_ulp_lpt_lp(hw, TRUE);
755
756         /*
757         **  Do interrupt configuration
758         */
759         if (adapter->msix > 1) /* Do MSIX */
760                 error = em_allocate_msix(adapter);
761         else  /* MSI or Legacy */
762                 error = em_allocate_legacy(adapter);
763         if (error)
764                 goto err_late;
765
766         /*
767          * Get Wake-on-Lan and Management info for later use
768          */
769         em_get_wakeup(dev);
770
771         /* Setup OS specific network interface */
772         if (em_setup_interface(dev, adapter) != 0)
773                 goto err_late;
774
775         em_reset(adapter);
776
777         /* Initialize statistics */
778         em_update_stats_counters(adapter);
779
780         hw->mac.get_link_status = 1;
781         em_update_link_status(adapter);
782
783         /* Register for VLAN events */
784         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
785             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
786         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
787             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
788
789         em_add_hw_stats(adapter);
790
791         /* Non-AMT based hardware can now take control from firmware */
792         if (adapter->has_manage && !adapter->has_amt)
793                 em_get_hw_control(adapter);
794
795         /* Tell the stack that the interface is not active */
796         if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
797
798         adapter->led_dev = led_create(em_led_func, adapter,
799             device_get_nameunit(dev));
800 #ifdef DEV_NETMAP
801         em_netmap_attach(adapter);
802 #endif /* DEV_NETMAP */
803
804         INIT_DEBUGOUT("em_attach: end");
805
806         return (0);
807
808 err_late:
809         em_free_transmit_structures(adapter);
810         em_free_receive_structures(adapter);
811         em_release_hw_control(adapter);
812         if (adapter->ifp != (void *)NULL)
813                 if_free(adapter->ifp);
814 err_pci:
815         em_free_pci_resources(adapter);
816         free(adapter->mta, M_DEVBUF);
817         EM_CORE_LOCK_DESTROY(adapter);
818
819         return (error);
820 }
821
822 /*********************************************************************
823  *  Device removal routine
824  *
825  *  The detach entry point is called when the driver is being removed.
826  *  This routine stops the adapter and deallocates all the resources
827  *  that were allocated for driver operation.
828  *
829  *  return 0 on success, positive on failure
830  *********************************************************************/
831
832 static int
833 em_detach(device_t dev)
834 {
835         struct adapter  *adapter = device_get_softc(dev);
836         if_t ifp = adapter->ifp;
837
838         INIT_DEBUGOUT("em_detach: begin");
839
840         /* Make sure VLANS are not using driver */
841         if (if_vlantrunkinuse(ifp)) {
842                 device_printf(dev,"Vlan in use, detach first\n");
843                 return (EBUSY);
844         }
845
846 #ifdef DEVICE_POLLING
847         if (if_getcapenable(ifp) & IFCAP_POLLING)
848                 ether_poll_deregister(ifp);
849 #endif
850
851         if (adapter->led_dev != NULL)
852                 led_destroy(adapter->led_dev);
853
854         EM_CORE_LOCK(adapter);
855         adapter->in_detach = 1;
856         em_stop(adapter);
857         EM_CORE_UNLOCK(adapter);
858         EM_CORE_LOCK_DESTROY(adapter);
859
860         e1000_phy_hw_reset(&adapter->hw);
861
862         em_release_manageability(adapter);
863         em_release_hw_control(adapter);
864
865         /* Unregister VLAN events */
866         if (adapter->vlan_attach != NULL)
867                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
868         if (adapter->vlan_detach != NULL)
869                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
870
871         ether_ifdetach(adapter->ifp);
872         callout_drain(&adapter->timer);
873
874 #ifdef DEV_NETMAP
875         netmap_detach(ifp);
876 #endif /* DEV_NETMAP */
877
878         em_free_pci_resources(adapter);
879         bus_generic_detach(dev);
880         if_free(ifp);
881
882         em_free_transmit_structures(adapter);
883         em_free_receive_structures(adapter);
884
885         em_release_hw_control(adapter);
886         free(adapter->mta, M_DEVBUF);
887
888         return (0);
889 }
890
891 /*********************************************************************
892  *
893  *  Shutdown entry point
894  *
895  **********************************************************************/
896
897 static int
898 em_shutdown(device_t dev)
899 {
900         return em_suspend(dev);
901 }
902
903 /*
904  * Suspend/resume device methods.
905  */
906 static int
907 em_suspend(device_t dev)
908 {
909         struct adapter *adapter = device_get_softc(dev);
910
911         EM_CORE_LOCK(adapter);
912
913         em_release_manageability(adapter);
914         em_release_hw_control(adapter);
915         em_enable_wakeup(dev);
916
917         EM_CORE_UNLOCK(adapter);
918
919         return bus_generic_suspend(dev);
920 }
921
922 static int
923 em_resume(device_t dev)
924 {
925         struct adapter *adapter = device_get_softc(dev);
926         struct tx_ring  *txr = adapter->tx_rings;
927         if_t ifp = adapter->ifp;
928
929         EM_CORE_LOCK(adapter);
930         if (adapter->hw.mac.type == e1000_pch2lan)
931                 e1000_resume_workarounds_pchlan(&adapter->hw);
932         em_init_locked(adapter);
933         em_init_manageability(adapter);
934
935         if ((if_getflags(ifp) & IFF_UP) &&
936             (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
937                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
938                         EM_TX_LOCK(txr);
939 #ifdef EM_MULTIQUEUE
940                         if (!drbr_empty(ifp, txr->br))
941                                 em_mq_start_locked(ifp, txr);
942 #else
943                         if (!if_sendq_empty(ifp))
944                                 em_start_locked(ifp, txr);
945 #endif
946                         EM_TX_UNLOCK(txr);
947                 }
948         }
949         EM_CORE_UNLOCK(adapter);
950
951         return bus_generic_resume(dev);
952 }
953
954
955 #ifndef EM_MULTIQUEUE
956 static void
957 em_start_locked(if_t ifp, struct tx_ring *txr)
958 {
959         struct adapter  *adapter = if_getsoftc(ifp);
960         struct mbuf     *m_head;
961
962         EM_TX_LOCK_ASSERT(txr);
963
964         if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
965             IFF_DRV_RUNNING)
966                 return;
967
968         if (!adapter->link_active)
969                 return;
970
971         while (!if_sendq_empty(ifp)) {
972                 /* Call cleanup if number of TX descriptors low */
973                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
974                         em_txeof(txr);
975                 if (txr->tx_avail < EM_MAX_SCATTER) {
976                         if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
977                         break;
978                 }
979                 m_head = if_dequeue(ifp);
980                 if (m_head == NULL)
981                         break;
982                 /*
983                  *  Encapsulation can modify our pointer, and or make it
984                  *  NULL on failure.  In that event, we can't requeue.
985                  */
986                 if (em_xmit(txr, &m_head)) {
987                         if (m_head == NULL)
988                                 break;
989                         if_sendq_prepend(ifp, m_head);
990                         break;
991                 }
992
993                 /* Mark the queue as having work */
994                 if (txr->busy == EM_TX_IDLE)
995                         txr->busy = EM_TX_BUSY;
996
997                 /* Send a copy of the frame to the BPF listener */
998                 ETHER_BPF_MTAP(ifp, m_head);
999
1000         }
1001
1002         return;
1003 }
1004
1005 static void
1006 em_start(if_t ifp)
1007 {
1008         struct adapter  *adapter = if_getsoftc(ifp);
1009         struct tx_ring  *txr = adapter->tx_rings;
1010
1011         if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1012                 EM_TX_LOCK(txr);
1013                 em_start_locked(ifp, txr);
1014                 EM_TX_UNLOCK(txr);
1015         }
1016         return;
1017 }
1018 #else /* EM_MULTIQUEUE */
1019 /*********************************************************************
1020  *  Multiqueue Transmit routines 
1021  *
1022  *  em_mq_start is called by the stack to initiate a transmit.
1023  *  however, if busy the driver can queue the request rather
1024  *  than do an immediate send. It is this that is an advantage
1025  *  in this driver, rather than also having multiple tx queues.
1026  **********************************************************************/
1027 /*
1028 ** Multiqueue capable stack interface
1029 */
1030 static int
1031 em_mq_start(if_t ifp, struct mbuf *m)
1032 {
1033         struct adapter  *adapter = if_getsoftc(ifp);
1034         struct tx_ring  *txr = adapter->tx_rings;
1035         unsigned int    i, error;
1036
1037         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1038                 i = m->m_pkthdr.flowid % adapter->num_queues;
1039         else
1040                 i = curcpu % adapter->num_queues;
1041
1042         txr = &adapter->tx_rings[i];
1043
1044         error = drbr_enqueue(ifp, txr->br, m);
1045         if (error)
1046                 return (error);
1047
1048         if (EM_TX_TRYLOCK(txr)) {
1049                 em_mq_start_locked(ifp, txr);
1050                 EM_TX_UNLOCK(txr);
1051         } else 
1052                 taskqueue_enqueue(txr->tq, &txr->tx_task);
1053
1054         return (0);
1055 }
1056
1057 static int
1058 em_mq_start_locked(if_t ifp, struct tx_ring *txr)
1059 {
1060         struct adapter  *adapter = txr->adapter;
1061         struct mbuf     *next;
1062         int             err = 0, enq = 0;
1063
1064         EM_TX_LOCK_ASSERT(txr);
1065
1066         if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
1067             adapter->link_active == 0) {
1068                 return (ENETDOWN);
1069         }
1070
1071         /* Process the queue */
1072         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1073                 if ((err = em_xmit(txr, &next)) != 0) {
1074                         if (next == NULL) {
1075                                 /* It was freed, move forward */
1076                                 drbr_advance(ifp, txr->br);
1077                         } else {
1078                                 /* 
1079                                  * Still have one left, it may not be
1080                                  * the same since the transmit function
1081                                  * may have changed it.
1082                                  */
1083                                 drbr_putback(ifp, txr->br, next);
1084                         }
1085                         break;
1086                 }
1087                 drbr_advance(ifp, txr->br);
1088                 enq++;
1089                 if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
1090                 if (next->m_flags & M_MCAST)
1091                         if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1092                 ETHER_BPF_MTAP(ifp, next);
1093                 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
1094                         break;
1095         }
1096
1097         /* Mark the queue as having work */
1098         if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1099                 txr->busy = EM_TX_BUSY;
1100
1101         if (txr->tx_avail < EM_MAX_SCATTER)
1102                 em_txeof(txr);
1103         if (txr->tx_avail < EM_MAX_SCATTER) {
1104                 if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
1105         }
1106         return (err);
1107 }
1108
1109 /*
1110 ** Flush all ring buffers
1111 */
1112 static void
1113 em_qflush(if_t ifp)
1114 {
1115         struct adapter  *adapter = if_getsoftc(ifp);
1116         struct tx_ring  *txr = adapter->tx_rings;
1117         struct mbuf     *m;
1118
1119         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1120                 EM_TX_LOCK(txr);
1121                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1122                         m_freem(m);
1123                 EM_TX_UNLOCK(txr);
1124         }
1125         if_qflush(ifp);
1126 }
1127 #endif /* EM_MULTIQUEUE */
1128
1129 /*********************************************************************
1130  *  Ioctl entry point
1131  *
1132  *  em_ioctl is called when the user wants to configure the
1133  *  interface.
1134  *
1135  *  return 0 on success, positive on failure
1136  **********************************************************************/
1137
1138 static int
1139 em_ioctl(if_t ifp, u_long command, caddr_t data)
1140 {
1141         struct adapter  *adapter = if_getsoftc(ifp);
1142         struct ifreq    *ifr = (struct ifreq *)data;
1143 #if defined(INET) || defined(INET6)
1144         struct ifaddr   *ifa = (struct ifaddr *)data;
1145 #endif
1146         bool            avoid_reset = FALSE;
1147         int             error = 0;
1148
1149         if (adapter->in_detach)
1150                 return (error);
1151
1152         switch (command) {
1153         case SIOCSIFADDR:
1154 #ifdef INET
1155                 if (ifa->ifa_addr->sa_family == AF_INET)
1156                         avoid_reset = TRUE;
1157 #endif
1158 #ifdef INET6
1159                 if (ifa->ifa_addr->sa_family == AF_INET6)
1160                         avoid_reset = TRUE;
1161 #endif
1162                 /*
1163                 ** Calling init results in link renegotiation,
1164                 ** so we avoid doing it when possible.
1165                 */
1166                 if (avoid_reset) {
1167                         if_setflagbits(ifp,IFF_UP,0);
1168                         if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1169                                 em_init(adapter);
1170 #ifdef INET
1171                         if (!(if_getflags(ifp) & IFF_NOARP))
1172                                 arp_ifinit(ifp, ifa);
1173 #endif
1174                 } else
1175                         error = ether_ioctl(ifp, command, data);
1176                 break;
1177         case SIOCSIFMTU:
1178             {
1179                 int max_frame_size;
1180
1181                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1182
1183                 EM_CORE_LOCK(adapter);
1184                 switch (adapter->hw.mac.type) {
1185                 case e1000_82571:
1186                 case e1000_82572:
1187                 case e1000_ich9lan:
1188                 case e1000_ich10lan:
1189                 case e1000_pch2lan:
1190                 case e1000_pch_lpt:
1191                 case e1000_pch_spt:
1192                 case e1000_82574:
1193                 case e1000_82583:
1194                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1195                         max_frame_size = 9234;
1196                         break;
1197                 case e1000_pchlan:
1198                         max_frame_size = 4096;
1199                         break;
1200                         /* Adapters that do not support jumbo frames */
1201                 case e1000_ich8lan:
1202                         max_frame_size = ETHER_MAX_LEN;
1203                         break;
1204                 default:
1205                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1206                 }
1207                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1208                     ETHER_CRC_LEN) {
1209                         EM_CORE_UNLOCK(adapter);
1210                         error = EINVAL;
1211                         break;
1212                 }
1213
1214                 if_setmtu(ifp, ifr->ifr_mtu);
1215                 adapter->hw.mac.max_frame_size =
1216                     if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1217                 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1218                         em_init_locked(adapter);
1219                 EM_CORE_UNLOCK(adapter);
1220                 break;
1221             }
1222         case SIOCSIFFLAGS:
1223                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1224                     SIOCSIFFLAGS (Set Interface Flags)");
1225                 EM_CORE_LOCK(adapter);
1226                 if (if_getflags(ifp) & IFF_UP) {
1227                         if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1228                                 if ((if_getflags(ifp) ^ adapter->if_flags) &
1229                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1230                                         em_disable_promisc(adapter);
1231                                         em_set_promisc(adapter);
1232                                 }
1233                         } else
1234                                 em_init_locked(adapter);
1235                 } else
1236                         if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1237                                 em_stop(adapter);
1238                 adapter->if_flags = if_getflags(ifp);
1239                 EM_CORE_UNLOCK(adapter);
1240                 break;
1241         case SIOCADDMULTI:
1242         case SIOCDELMULTI:
1243                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1244                 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1245                         EM_CORE_LOCK(adapter);
1246                         em_disable_intr(adapter);
1247                         em_set_multi(adapter);
1248 #ifdef DEVICE_POLLING
1249                         if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1250 #endif
1251                                 em_enable_intr(adapter);
1252                         EM_CORE_UNLOCK(adapter);
1253                 }
1254                 break;
1255         case SIOCSIFMEDIA:
1256                 /* Check SOL/IDER usage */
1257                 EM_CORE_LOCK(adapter);
1258                 if (e1000_check_reset_block(&adapter->hw)) {
1259                         EM_CORE_UNLOCK(adapter);
1260                         device_printf(adapter->dev, "Media change is"
1261                             " blocked due to SOL/IDER session.\n");
1262                         break;
1263                 }
1264                 EM_CORE_UNLOCK(adapter);
1265                 /* falls thru */
1266         case SIOCGIFMEDIA:
1267                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1268                     SIOCxIFMEDIA (Get/Set Interface Media)");
1269                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1270                 break;
1271         case SIOCSIFCAP:
1272             {
1273                 int mask, reinit;
1274
1275                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1276                 reinit = 0;
1277                 mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1278 #ifdef DEVICE_POLLING
1279                 if (mask & IFCAP_POLLING) {
1280                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1281                                 error = ether_poll_register(em_poll, ifp);
1282                                 if (error)
1283                                         return (error);
1284                                 EM_CORE_LOCK(adapter);
1285                                 em_disable_intr(adapter);
1286                                 if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1287                                 EM_CORE_UNLOCK(adapter);
1288                         } else {
1289                                 error = ether_poll_deregister(ifp);
1290                                 /* Enable interrupt even in error case */
1291                                 EM_CORE_LOCK(adapter);
1292                                 em_enable_intr(adapter);
1293                                 if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1294                                 EM_CORE_UNLOCK(adapter);
1295                         }
1296                 }
1297 #endif
1298                 if (mask & IFCAP_HWCSUM) {
1299                         if_togglecapenable(ifp,IFCAP_HWCSUM);
1300                         reinit = 1;
1301                 }
1302                 if (mask & IFCAP_TSO4) {
1303                         if_togglecapenable(ifp,IFCAP_TSO4);
1304                         reinit = 1;
1305                 }
1306                 if (mask & IFCAP_VLAN_HWTAGGING) {
1307                         if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1308                         reinit = 1;
1309                 }
1310                 if (mask & IFCAP_VLAN_HWFILTER) {
1311                         if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1312                         reinit = 1;
1313                 }
1314                 if (mask & IFCAP_VLAN_HWTSO) {
1315                         if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1316                         reinit = 1;
1317                 }
1318                 if ((mask & IFCAP_WOL) &&
1319                     (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1320                         if (mask & IFCAP_WOL_MCAST)
1321                                 if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1322                         if (mask & IFCAP_WOL_MAGIC)
1323                                 if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1324                 }
1325                 if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1326                         em_init(adapter);
1327                 if_vlancap(ifp);
1328                 break;
1329             }
1330
1331         default:
1332                 error = ether_ioctl(ifp, command, data);
1333                 break;
1334         }
1335
1336         return (error);
1337 }
1338
1339
1340 /*********************************************************************
1341  *  Init entry point
1342  *
1343  *  This routine is used in two ways. It is used by the stack as
1344  *  init entry point in network interface structure. It is also used
1345  *  by the driver as a hw/sw initialization routine to get to a
1346  *  consistent state.
1347  *
1348  *  return 0 on success, positive on failure
1349  **********************************************************************/
1350
1351 static void
1352 em_init_locked(struct adapter *adapter)
1353 {
1354         if_t ifp = adapter->ifp;
1355         device_t        dev = adapter->dev;
1356
1357         INIT_DEBUGOUT("em_init: begin");
1358
1359         EM_CORE_LOCK_ASSERT(adapter);
1360
1361         em_disable_intr(adapter);
1362         callout_stop(&adapter->timer);
1363
1364         /* Get the latest mac address, User can use a LAA */
1365         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1366               ETHER_ADDR_LEN);
1367
1368         /* Put the address into the Receive Address Array */
1369         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1370
1371         /*
1372          * With the 82571 adapter, RAR[0] may be overwritten
1373          * when the other port is reset, we make a duplicate
1374          * in RAR[14] for that eventuality, this assures
1375          * the interface continues to function.
1376          */
1377         if (adapter->hw.mac.type == e1000_82571) {
1378                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1379                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1380                     E1000_RAR_ENTRIES - 1);
1381         }
1382
1383         /* Initialize the hardware */
1384         em_reset(adapter);
1385         em_update_link_status(adapter);
1386
1387         /* Setup VLAN support, basic and offload if available */
1388         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1389
1390         /* Set hardware offload abilities */
1391         if_clearhwassist(ifp);
1392         if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1393                 if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1394
1395         if (if_getcapenable(ifp) & IFCAP_TSO4)
1396                 if_sethwassistbits(ifp, CSUM_TSO, 0);
1397
1398         /* Configure for OS presence */
1399         em_init_manageability(adapter);
1400
1401         /* Prepare transmit descriptors and buffers */
1402         em_setup_transmit_structures(adapter);
1403         em_initialize_transmit_unit(adapter);
1404
1405         /* Setup Multicast table */
1406         em_set_multi(adapter);
1407
1408         /*
1409         ** Figure out the desired mbuf
1410         ** pool for doing jumbos
1411         */
1412         if (adapter->hw.mac.max_frame_size <= 2048)
1413                 adapter->rx_mbuf_sz = MCLBYTES;
1414         else if (adapter->hw.mac.max_frame_size <= 4096)
1415                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1416         else
1417                 adapter->rx_mbuf_sz = MJUM9BYTES;
1418
1419         /* Prepare receive descriptors and buffers */
1420         if (em_setup_receive_structures(adapter)) {
1421                 device_printf(dev, "Could not setup receive structures\n");
1422                 em_stop(adapter);
1423                 return;
1424         }
1425         em_initialize_receive_unit(adapter);
1426
1427         /* Use real VLAN Filter support? */
1428         if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1429                 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1430                         /* Use real VLAN Filter support */
1431                         em_setup_vlan_hw_support(adapter);
1432                 else {
1433                         u32 ctrl;
1434                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1435                         ctrl |= E1000_CTRL_VME;
1436                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1437                 }
1438         }
1439
1440         /* Don't lose promiscuous settings */
1441         em_set_promisc(adapter);
1442
1443         /* Set the interface as ACTIVE */
1444         if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1445
1446         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1447         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1448
1449         /* MSI/X configuration for 82574 */
1450         if (adapter->hw.mac.type == e1000_82574) {
1451                 int tmp;
1452                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1453                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1454                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1455                 /* Set the IVAR - interrupt vector routing. */
1456                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1457         }
1458
1459 #ifdef DEVICE_POLLING
1460         /*
1461          * Only enable interrupts if we are not polling, make sure
1462          * they are off otherwise.
1463          */
1464         if (if_getcapenable(ifp) & IFCAP_POLLING)
1465                 em_disable_intr(adapter);
1466         else
1467 #endif /* DEVICE_POLLING */
1468                 em_enable_intr(adapter);
1469
1470         /* AMT based hardware can now take control from firmware */
1471         if (adapter->has_manage && adapter->has_amt)
1472                 em_get_hw_control(adapter);
1473 }
1474
1475 static void
1476 em_init(void *arg)
1477 {
1478         struct adapter *adapter = arg;
1479
1480         EM_CORE_LOCK(adapter);
1481         em_init_locked(adapter);
1482         EM_CORE_UNLOCK(adapter);
1483 }
1484
1485
1486 #ifdef DEVICE_POLLING
1487 /*********************************************************************
1488  *
1489  *  Legacy polling routine: note this only works with single queue
1490  *
1491  *********************************************************************/
1492 static int
1493 em_poll(if_t ifp, enum poll_cmd cmd, int count)
1494 {
1495         struct adapter *adapter = if_getsoftc(ifp);
1496         struct tx_ring  *txr = adapter->tx_rings;
1497         struct rx_ring  *rxr = adapter->rx_rings;
1498         u32             reg_icr;
1499         int             rx_done;
1500
1501         EM_CORE_LOCK(adapter);
1502         if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1503                 EM_CORE_UNLOCK(adapter);
1504                 return (0);
1505         }
1506
1507         if (cmd == POLL_AND_CHECK_STATUS) {
1508                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1509                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1510                         callout_stop(&adapter->timer);
1511                         adapter->hw.mac.get_link_status = 1;
1512                         em_update_link_status(adapter);
1513                         callout_reset(&adapter->timer, hz,
1514                             em_local_timer, adapter);
1515                 }
1516         }
1517         EM_CORE_UNLOCK(adapter);
1518
1519         em_rxeof(rxr, count, &rx_done);
1520
1521         EM_TX_LOCK(txr);
1522         em_txeof(txr);
1523 #ifdef EM_MULTIQUEUE
1524         if (!drbr_empty(ifp, txr->br))
1525                 em_mq_start_locked(ifp, txr);
1526 #else
1527         if (!if_sendq_empty(ifp))
1528                 em_start_locked(ifp, txr);
1529 #endif
1530         EM_TX_UNLOCK(txr);
1531
1532         return (rx_done);
1533 }
1534 #endif /* DEVICE_POLLING */
1535
1536
1537 /*********************************************************************
1538  *
1539  *  Fast Legacy/MSI Combined Interrupt Service routine  
1540  *
1541  *********************************************************************/
1542 static int
1543 em_irq_fast(void *arg)
1544 {
1545         struct adapter  *adapter = arg;
1546         if_t ifp;
1547         u32             reg_icr;
1548
1549         ifp = adapter->ifp;
1550
1551         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1552
1553         /* Hot eject?  */
1554         if (reg_icr == 0xffffffff)
1555                 return FILTER_STRAY;
1556
1557         /* Definitely not our interrupt.  */
1558         if (reg_icr == 0x0)
1559                 return FILTER_STRAY;
1560
1561         /*
1562          * Starting with the 82571 chip, bit 31 should be used to
1563          * determine whether the interrupt belongs to us.
1564          */
1565         if (adapter->hw.mac.type >= e1000_82571 &&
1566             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1567                 return FILTER_STRAY;
1568
1569         em_disable_intr(adapter);
1570         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1571
1572         /* Link status change */
1573         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1574                 adapter->hw.mac.get_link_status = 1;
1575                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1576         }
1577
1578         if (reg_icr & E1000_ICR_RXO)
1579                 adapter->rx_overruns++;
1580         return FILTER_HANDLED;
1581 }
1582
1583 /* Combined RX/TX handler, used by Legacy and MSI */
1584 static void
1585 em_handle_que(void *context, int pending)
1586 {
1587         struct adapter  *adapter = context;
1588         if_t ifp = adapter->ifp;
1589         struct tx_ring  *txr = adapter->tx_rings;
1590         struct rx_ring  *rxr = adapter->rx_rings;
1591
1592         if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1593                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1594
1595                 EM_TX_LOCK(txr);
1596                 em_txeof(txr);
1597 #ifdef EM_MULTIQUEUE
1598                 if (!drbr_empty(ifp, txr->br))
1599                         em_mq_start_locked(ifp, txr);
1600 #else
1601                 if (!if_sendq_empty(ifp))
1602                         em_start_locked(ifp, txr);
1603 #endif
1604                 EM_TX_UNLOCK(txr);
1605                 if (more) {
1606                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1607                         return;
1608                 }
1609         }
1610
1611         em_enable_intr(adapter);
1612         return;
1613 }
1614
1615
1616 /*********************************************************************
1617  *
1618  *  MSIX Interrupt Service Routines
1619  *
1620  **********************************************************************/
1621 static void
1622 em_msix_tx(void *arg)
1623 {
1624         struct tx_ring *txr = arg;
1625         struct adapter *adapter = txr->adapter;
1626         if_t ifp = adapter->ifp;
1627
1628         ++txr->tx_irq;
1629         EM_TX_LOCK(txr);
1630         em_txeof(txr);
1631 #ifdef EM_MULTIQUEUE
1632         if (!drbr_empty(ifp, txr->br))
1633                 em_mq_start_locked(ifp, txr);
1634 #else
1635         if (!if_sendq_empty(ifp))
1636                 em_start_locked(ifp, txr);
1637 #endif
1638
1639         /* Reenable this interrupt */
1640         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1641         EM_TX_UNLOCK(txr);
1642         return;
1643 }
1644
1645 /*********************************************************************
1646  *
1647  *  MSIX RX Interrupt Service routine
1648  *
1649  **********************************************************************/
1650
1651 static void
1652 em_msix_rx(void *arg)
1653 {
1654         struct rx_ring  *rxr = arg;
1655         struct adapter  *adapter = rxr->adapter;
1656         bool            more;
1657
1658         ++rxr->rx_irq;
1659         if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1660                 return;
1661         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1662         if (more)
1663                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1664         else {
1665                 /* Reenable this interrupt */
1666                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1667         }
1668         return;
1669 }
1670
1671 /*********************************************************************
1672  *
1673  *  MSIX Link Fast Interrupt Service routine
1674  *
1675  **********************************************************************/
1676 static void
1677 em_msix_link(void *arg)
1678 {
1679         struct adapter  *adapter = arg;
1680         u32             reg_icr;
1681
1682         ++adapter->link_irq;
1683         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1684
1685         if (reg_icr & E1000_ICR_RXO)
1686                 adapter->rx_overruns++;
1687
1688         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1689                 adapter->hw.mac.get_link_status = 1;
1690                 em_handle_link(adapter, 0);
1691         } else
1692                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1693                     EM_MSIX_LINK | E1000_IMS_LSC);
1694         /*
1695         ** Because we must read the ICR for this interrupt
1696         ** it may clear other causes using autoclear, for
1697         ** this reason we simply create a soft interrupt
1698         ** for all these vectors.
1699         */
1700         if (reg_icr) {
1701                 E1000_WRITE_REG(&adapter->hw,
1702                         E1000_ICS, adapter->ims);
1703         }
1704         return;
1705 }
1706
1707 static void
1708 em_handle_rx(void *context, int pending)
1709 {
1710         struct rx_ring  *rxr = context;
1711         struct adapter  *adapter = rxr->adapter;
1712         bool            more;
1713
1714         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1715         if (more)
1716                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1717         else {
1718                 /* Reenable this interrupt */
1719                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1720         }
1721 }
1722
1723 static void
1724 em_handle_tx(void *context, int pending)
1725 {
1726         struct tx_ring  *txr = context;
1727         struct adapter  *adapter = txr->adapter;
1728         if_t ifp = adapter->ifp;
1729
1730         EM_TX_LOCK(txr);
1731         em_txeof(txr);
1732 #ifdef EM_MULTIQUEUE
1733         if (!drbr_empty(ifp, txr->br))
1734                 em_mq_start_locked(ifp, txr);
1735 #else
1736         if (!if_sendq_empty(ifp))
1737                 em_start_locked(ifp, txr);
1738 #endif
1739         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1740         EM_TX_UNLOCK(txr);
1741 }
1742
1743 static void
1744 em_handle_link(void *context, int pending)
1745 {
1746         struct adapter  *adapter = context;
1747         struct tx_ring  *txr = adapter->tx_rings;
1748         if_t ifp = adapter->ifp;
1749
1750         if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1751                 return;
1752
1753         EM_CORE_LOCK(adapter);
1754         callout_stop(&adapter->timer);
1755         em_update_link_status(adapter);
1756         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1757         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1758             EM_MSIX_LINK | E1000_IMS_LSC);
1759         if (adapter->link_active) {
1760                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1761                         EM_TX_LOCK(txr);
1762 #ifdef EM_MULTIQUEUE
1763                         if (!drbr_empty(ifp, txr->br))
1764                                 em_mq_start_locked(ifp, txr);
1765 #else
1766                         if (if_sendq_empty(ifp))
1767                                 em_start_locked(ifp, txr);
1768 #endif
1769                         EM_TX_UNLOCK(txr);
1770                 }
1771         }
1772         EM_CORE_UNLOCK(adapter);
1773 }
1774
1775
1776 /*********************************************************************
1777  *
1778  *  Media Ioctl callback
1779  *
1780  *  This routine is called whenever the user queries the status of
1781  *  the interface using ifconfig.
1782  *
1783  **********************************************************************/
1784 static void
1785 em_media_status(if_t ifp, struct ifmediareq *ifmr)
1786 {
1787         struct adapter *adapter = if_getsoftc(ifp);
1788         u_char fiber_type = IFM_1000_SX;
1789
1790         INIT_DEBUGOUT("em_media_status: begin");
1791
1792         EM_CORE_LOCK(adapter);
1793         em_update_link_status(adapter);
1794
1795         ifmr->ifm_status = IFM_AVALID;
1796         ifmr->ifm_active = IFM_ETHER;
1797
1798         if (!adapter->link_active) {
1799                 EM_CORE_UNLOCK(adapter);
1800                 return;
1801         }
1802
1803         ifmr->ifm_status |= IFM_ACTIVE;
1804
1805         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1806             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1807                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1808         } else {
1809                 switch (adapter->link_speed) {
1810                 case 10:
1811                         ifmr->ifm_active |= IFM_10_T;
1812                         break;
1813                 case 100:
1814                         ifmr->ifm_active |= IFM_100_TX;
1815                         break;
1816                 case 1000:
1817                         ifmr->ifm_active |= IFM_1000_T;
1818                         break;
1819                 }
1820                 if (adapter->link_duplex == FULL_DUPLEX)
1821                         ifmr->ifm_active |= IFM_FDX;
1822                 else
1823                         ifmr->ifm_active |= IFM_HDX;
1824         }
1825         EM_CORE_UNLOCK(adapter);
1826 }
1827
1828 /*********************************************************************
1829  *
1830  *  Media Ioctl callback
1831  *
1832  *  This routine is called when the user changes speed/duplex using
1833  *  media/mediopt option with ifconfig.
1834  *
1835  **********************************************************************/
1836 static int
1837 em_media_change(if_t ifp)
1838 {
1839         struct adapter *adapter = if_getsoftc(ifp);
1840         struct ifmedia  *ifm = &adapter->media;
1841
1842         INIT_DEBUGOUT("em_media_change: begin");
1843
1844         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1845                 return (EINVAL);
1846
1847         EM_CORE_LOCK(adapter);
1848         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1849         case IFM_AUTO:
1850                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1851                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1852                 break;
1853         case IFM_1000_LX:
1854         case IFM_1000_SX:
1855         case IFM_1000_T:
1856                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1857                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1858                 break;
1859         case IFM_100_TX:
1860                 adapter->hw.mac.autoneg = FALSE;
1861                 adapter->hw.phy.autoneg_advertised = 0;
1862                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1863                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1864                 else
1865                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1866                 break;
1867         case IFM_10_T:
1868                 adapter->hw.mac.autoneg = FALSE;
1869                 adapter->hw.phy.autoneg_advertised = 0;
1870                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1871                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1872                 else
1873                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1874                 break;
1875         default:
1876                 device_printf(adapter->dev, "Unsupported media type\n");
1877         }
1878
1879         em_init_locked(adapter);
1880         EM_CORE_UNLOCK(adapter);
1881
1882         return (0);
1883 }
1884
1885 /*********************************************************************
1886  *
1887  *  This routine maps the mbufs to tx descriptors.
1888  *
1889  *  return 0 on success, positive on failure
1890  **********************************************************************/
1891
1892 static int
1893 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1894 {
1895         struct adapter          *adapter = txr->adapter;
1896         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1897         bus_dmamap_t            map;
1898         struct em_txbuffer      *tx_buffer, *tx_buffer_mapped;
1899         struct e1000_tx_desc    *ctxd = NULL;
1900         struct mbuf             *m_head;
1901         struct ether_header     *eh;
1902         struct ip               *ip = NULL;
1903         struct tcphdr           *tp = NULL;
1904         u32                     txd_upper = 0, txd_lower = 0;
1905         int                     ip_off, poff;
1906         int                     nsegs, i, j, first, last = 0;
1907         int                     error;
1908         bool                    do_tso, tso_desc, remap = TRUE;
1909
1910         m_head = *m_headp;
1911         do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO);
1912         tso_desc = FALSE;
1913         ip_off = poff = 0;
1914
1915         /*
1916          * Intel recommends entire IP/TCP header length reside in a single
1917          * buffer. If multiple descriptors are used to describe the IP and
1918          * TCP header, each descriptor should describe one or more
1919          * complete headers; descriptors referencing only parts of headers
1920          * are not supported. If all layer headers are not coalesced into
1921          * a single buffer, each buffer should not cross a 4KB boundary,
1922          * or be larger than the maximum read request size.
1923          * Controller also requires modifing IP/TCP header to make TSO work
1924          * so we firstly get a writable mbuf chain then coalesce ethernet/
1925          * IP/TCP header into a single buffer to meet the requirement of
1926          * controller. This also simplifies IP/TCP/UDP checksum offloading
1927          * which also has similar restrictions.
1928          */
1929         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1930                 if (do_tso || (m_head->m_next != NULL && 
1931                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1932                         if (M_WRITABLE(*m_headp) == 0) {
1933                                 m_head = m_dup(*m_headp, M_NOWAIT);
1934                                 m_freem(*m_headp);
1935                                 if (m_head == NULL) {
1936                                         *m_headp = NULL;
1937                                         return (ENOBUFS);
1938                                 }
1939                                 *m_headp = m_head;
1940                         }
1941                 }
1942                 /*
1943                  * XXX
1944                  * Assume IPv4, we don't have TSO/checksum offload support
1945                  * for IPv6 yet.
1946                  */
1947                 ip_off = sizeof(struct ether_header);
1948                 if (m_head->m_len < ip_off) {
1949                         m_head = m_pullup(m_head, ip_off);
1950                         if (m_head == NULL) {
1951                                 *m_headp = NULL;
1952                                 return (ENOBUFS);
1953                         }
1954                 }
1955                 eh = mtod(m_head, struct ether_header *);
1956                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1957                         ip_off = sizeof(struct ether_vlan_header);
1958                         if (m_head->m_len < ip_off) {
1959                                 m_head = m_pullup(m_head, ip_off);
1960                                 if (m_head == NULL) {
1961                                         *m_headp = NULL;
1962                                         return (ENOBUFS);
1963                                 }
1964                         }
1965                 }
1966                 if (m_head->m_len < ip_off + sizeof(struct ip)) {
1967                         m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1968                         if (m_head == NULL) {
1969                                 *m_headp = NULL;
1970                                 return (ENOBUFS);
1971                         }
1972                 }
1973                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1974                 poff = ip_off + (ip->ip_hl << 2);
1975
1976                 if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
1977                         if (m_head->m_len < poff + sizeof(struct tcphdr)) {
1978                                 m_head = m_pullup(m_head, poff +
1979                                     sizeof(struct tcphdr));
1980                                 if (m_head == NULL) {
1981                                         *m_headp = NULL;
1982                                         return (ENOBUFS);
1983                                 }
1984                         }
1985                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1986                         /*
1987                          * TSO workaround:
1988                          *   pull 4 more bytes of data into it.
1989                          */
1990                         if (m_head->m_len < poff + (tp->th_off << 2)) {
1991                                 m_head = m_pullup(m_head, poff +
1992                                                  (tp->th_off << 2) +
1993                                                  TSO_WORKAROUND);
1994                                 if (m_head == NULL) {
1995                                         *m_headp = NULL;
1996                                         return (ENOBUFS);
1997                                 }
1998                         }
1999                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2000                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2001                         if (do_tso) {
2002                                 ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2003                                                   (ip->ip_hl << 2) +
2004                                                   (tp->th_off << 2));
2005                                 ip->ip_sum = 0;
2006                                 /*
2007                                  * The pseudo TCP checksum does not include TCP
2008                                  * payload length so driver should recompute
2009                                  * the checksum here what hardware expect to
2010                                  * see. This is adherence of Microsoft's Large
2011                                  * Send specification.
2012                                 */
2013                                 tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2014                                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2015                         }
2016                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2017                         if (m_head->m_len < poff + sizeof(struct udphdr)) {
2018                                 m_head = m_pullup(m_head, poff +
2019                                     sizeof(struct udphdr));
2020                                 if (m_head == NULL) {
2021                                         *m_headp = NULL;
2022                                         return (ENOBUFS);
2023                                 }
2024                         }
2025                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2026                 }
2027                 *m_headp = m_head;
2028         }
2029
2030         /*
2031          * Map the packet for DMA
2032          *
2033          * Capture the first descriptor index,
2034          * this descriptor will have the index
2035          * of the EOP which is the only one that
2036          * now gets a DONE bit writeback.
2037          */
2038         first = txr->next_avail_desc;
2039         tx_buffer = &txr->tx_buffers[first];
2040         tx_buffer_mapped = tx_buffer;
2041         map = tx_buffer->map;
2042
2043 retry:
2044         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2045             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2046
2047         /*
2048          * There are two types of errors we can (try) to handle:
2049          * - EFBIG means the mbuf chain was too long and bus_dma ran
2050          *   out of segments.  Defragment the mbuf chain and try again.
2051          * - ENOMEM means bus_dma could not obtain enough bounce buffers
2052          *   at this point in time.  Defer sending and try again later.
2053          * All other errors, in particular EINVAL, are fatal and prevent the
2054          * mbuf chain from ever going through.  Drop it and report error.
2055          */
2056         if (error == EFBIG && remap) {
2057                 struct mbuf *m;
2058
2059                 m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2060                 if (m == NULL) {
2061                         adapter->mbuf_defrag_failed++;
2062                         m_freem(*m_headp);
2063                         *m_headp = NULL;
2064                         return (ENOBUFS);
2065                 }
2066                 *m_headp = m;
2067
2068                 /* Try it again, but only once */
2069                 remap = FALSE;
2070                 goto retry;
2071         } else if (error != 0) {
2072                 adapter->no_tx_dma_setup++;
2073                 m_freem(*m_headp);
2074                 *m_headp = NULL;
2075                 return (error);
2076         }
2077
2078         /*
2079          * TSO Hardware workaround, if this packet is not
2080          * TSO, and is only a single descriptor long, and
2081          * it follows a TSO burst, then we need to add a
2082          * sentinel descriptor to prevent premature writeback.
2083          */
2084         if ((!do_tso) && (txr->tx_tso == TRUE)) {
2085                 if (nsegs == 1)
2086                         tso_desc = TRUE;
2087                 txr->tx_tso = FALSE;
2088         }
2089
2090         if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
2091                 txr->no_desc_avail++;
2092                 bus_dmamap_unload(txr->txtag, map);
2093                 return (ENOBUFS);
2094         }
2095         m_head = *m_headp;
2096
2097         /* Do hardware assists */
2098         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2099                 em_tso_setup(txr, m_head, ip_off, ip, tp,
2100                     &txd_upper, &txd_lower);
2101                 /* we need to make a final sentinel transmit desc */
2102                 tso_desc = TRUE;
2103         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2104                 em_transmit_checksum_setup(txr, m_head,
2105                     ip_off, ip, &txd_upper, &txd_lower);
2106
2107         if (m_head->m_flags & M_VLANTAG) {
2108                 /* Set the vlan id. */
2109                 txd_upper |= htole16(if_getvtag(m_head)) << 16;
2110                 /* Tell hardware to add tag */
2111                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2112         }
2113
2114         i = txr->next_avail_desc;
2115
2116         /* Set up our transmit descriptors */
2117         for (j = 0; j < nsegs; j++) {
2118                 bus_size_t seg_len;
2119                 bus_addr_t seg_addr;
2120
2121                 tx_buffer = &txr->tx_buffers[i];
2122                 ctxd = &txr->tx_base[i];
2123                 seg_addr = segs[j].ds_addr;
2124                 seg_len  = segs[j].ds_len;
2125                 /*
2126                 ** TSO Workaround:
2127                 ** If this is the last descriptor, we want to
2128                 ** split it so we have a small final sentinel
2129                 */
2130                 if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2131                         seg_len -= TSO_WORKAROUND;
2132                         ctxd->buffer_addr = htole64(seg_addr);
2133                         ctxd->lower.data = htole32(
2134                                 adapter->txd_cmd | txd_lower | seg_len);
2135                         ctxd->upper.data = htole32(txd_upper);
2136                         if (++i == adapter->num_tx_desc)
2137                                 i = 0;
2138
2139                         /* Now make the sentinel */     
2140                         txr->tx_avail--;
2141                         ctxd = &txr->tx_base[i];
2142                         tx_buffer = &txr->tx_buffers[i];
2143                         ctxd->buffer_addr =
2144                             htole64(seg_addr + seg_len);
2145                         ctxd->lower.data = htole32(
2146                         adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2147                         ctxd->upper.data =
2148                             htole32(txd_upper);
2149                         last = i;
2150                         if (++i == adapter->num_tx_desc)
2151                                 i = 0;
2152                 } else {
2153                         ctxd->buffer_addr = htole64(seg_addr);
2154                         ctxd->lower.data = htole32(
2155                         adapter->txd_cmd | txd_lower | seg_len);
2156                         ctxd->upper.data = htole32(txd_upper);
2157                         last = i;
2158                         if (++i == adapter->num_tx_desc)
2159                                 i = 0;
2160                 }
2161                 tx_buffer->m_head = NULL;
2162                 tx_buffer->next_eop = -1;
2163         }
2164
2165         txr->next_avail_desc = i;
2166         txr->tx_avail -= nsegs;
2167
2168         tx_buffer->m_head = m_head;
2169         /*
2170         ** Here we swap the map so the last descriptor,
2171         ** which gets the completion interrupt has the
2172         ** real map, and the first descriptor gets the
2173         ** unused map from this descriptor.
2174         */
2175         tx_buffer_mapped->map = tx_buffer->map;
2176         tx_buffer->map = map;
2177         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2178
2179         /*
2180          * Last Descriptor of Packet
2181          * needs End Of Packet (EOP)
2182          * and Report Status (RS)
2183          */
2184         ctxd->lower.data |=
2185             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2186         /*
2187          * Keep track in the first buffer which
2188          * descriptor will be written back
2189          */
2190         tx_buffer = &txr->tx_buffers[first];
2191         tx_buffer->next_eop = last;
2192
2193         /*
2194          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2195          * that this frame is available to transmit.
2196          */
2197         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2198             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2199         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2200
2201         return (0);
2202 }
2203
2204 static void
2205 em_set_promisc(struct adapter *adapter)
2206 {
2207         if_t ifp = adapter->ifp;
2208         u32             reg_rctl;
2209
2210         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2211
2212         if (if_getflags(ifp) & IFF_PROMISC) {
2213                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2214                 /* Turn this on if you want to see bad packets */
2215                 if (em_debug_sbp)
2216                         reg_rctl |= E1000_RCTL_SBP;
2217                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2218         } else if (if_getflags(ifp) & IFF_ALLMULTI) {
2219                 reg_rctl |= E1000_RCTL_MPE;
2220                 reg_rctl &= ~E1000_RCTL_UPE;
2221                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2222         }
2223 }
2224
2225 static void
2226 em_disable_promisc(struct adapter *adapter)
2227 {
2228         if_t            ifp = adapter->ifp;
2229         u32             reg_rctl;
2230         int             mcnt = 0;
2231
2232         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2233         reg_rctl &=  (~E1000_RCTL_UPE);
2234         if (if_getflags(ifp) & IFF_ALLMULTI)
2235                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2236         else
2237                 mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2238         /* Don't disable if in MAX groups */
2239         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2240                 reg_rctl &=  (~E1000_RCTL_MPE);
2241         reg_rctl &=  (~E1000_RCTL_SBP);
2242         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2243 }
2244
2245
2246 /*********************************************************************
2247  *  Multicast Update
2248  *
2249  *  This routine is called whenever multicast address list is updated.
2250  *
2251  **********************************************************************/
2252
2253 static void
2254 em_set_multi(struct adapter *adapter)
2255 {
2256         if_t ifp = adapter->ifp;
2257         u32 reg_rctl = 0;
2258         u8  *mta; /* Multicast array memory */
2259         int mcnt = 0;
2260
2261         IOCTL_DEBUGOUT("em_set_multi: begin");
2262
2263         mta = adapter->mta;
2264         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2265
2266         if (adapter->hw.mac.type == e1000_82542 && 
2267             adapter->hw.revision_id == E1000_REVISION_2) {
2268                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2269                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2270                         e1000_pci_clear_mwi(&adapter->hw);
2271                 reg_rctl |= E1000_RCTL_RST;
2272                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2273                 msec_delay(5);
2274         }
2275
2276         if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2277
2278         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2279                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2280                 reg_rctl |= E1000_RCTL_MPE;
2281                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2282         } else
2283                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2284
2285         if (adapter->hw.mac.type == e1000_82542 && 
2286             adapter->hw.revision_id == E1000_REVISION_2) {
2287                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2288                 reg_rctl &= ~E1000_RCTL_RST;
2289                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2290                 msec_delay(5);
2291                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2292                         e1000_pci_set_mwi(&adapter->hw);
2293         }
2294 }
2295
2296
2297 /*********************************************************************
2298  *  Timer routine
2299  *
2300  *  This routine checks for link status and updates statistics.
2301  *
2302  **********************************************************************/
2303
2304 static void
2305 em_local_timer(void *arg)
2306 {
2307         struct adapter  *adapter = arg;
2308         if_t ifp = adapter->ifp;
2309         struct tx_ring  *txr = adapter->tx_rings;
2310         struct rx_ring  *rxr = adapter->rx_rings;
2311         u32             trigger = 0;
2312
2313         EM_CORE_LOCK_ASSERT(adapter);
2314
2315         em_update_link_status(adapter);
2316         em_update_stats_counters(adapter);
2317
2318         /* Reset LAA into RAR[0] on 82571 */
2319         if ((adapter->hw.mac.type == e1000_82571) &&
2320             e1000_get_laa_state_82571(&adapter->hw))
2321                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2322
2323         /* Mask to use in the irq trigger */
2324         if (adapter->msix_mem) {
2325                 for (int i = 0; i < adapter->num_queues; i++, rxr++)
2326                         trigger |= rxr->ims;
2327                 rxr = adapter->rx_rings;
2328         } else
2329                 trigger = E1000_ICS_RXDMT0;
2330
2331         /*
2332         ** Check on the state of the TX queue(s), this 
2333         ** can be done without the lock because its RO
2334         ** and the HUNG state will be static if set.
2335         */
2336         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2337                 if (txr->busy == EM_TX_HUNG)
2338                         goto hung;
2339                 if (txr->busy >= EM_TX_MAXTRIES)
2340                         txr->busy = EM_TX_HUNG;
2341                 /* Schedule a TX tasklet if needed */
2342                 if (txr->tx_avail <= EM_MAX_SCATTER)
2343                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2344         }
2345         
2346         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2347 #ifndef DEVICE_POLLING
2348         /* Trigger an RX interrupt to guarantee mbuf refresh */
2349         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2350 #endif
2351         return;
2352 hung:
2353         /* Looks like we're hung */
2354         device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2355                         txr->me);
2356         em_print_debug_info(adapter);
2357         if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2358         adapter->watchdog_events++;
2359         em_init_locked(adapter);
2360 }
2361
2362
2363 static void
2364 em_update_link_status(struct adapter *adapter)
2365 {
2366         struct e1000_hw *hw = &adapter->hw;
2367         if_t ifp = adapter->ifp;
2368         device_t dev = adapter->dev;
2369         struct tx_ring *txr = adapter->tx_rings;
2370         u32 link_check = 0;
2371
2372         /* Get the cached link value or read phy for real */
2373         switch (hw->phy.media_type) {
2374         case e1000_media_type_copper:
2375                 if (hw->mac.get_link_status) {
2376                         if (hw->mac.type == e1000_pch_spt)
2377                                 msec_delay(50);
2378                         /* Do the work to read phy */
2379                         e1000_check_for_link(hw);
2380                         link_check = !hw->mac.get_link_status;
2381                         if (link_check) /* ESB2 fix */
2382                                 e1000_cfg_on_link_up(hw);
2383                 } else
2384                         link_check = TRUE;
2385                 break;
2386         case e1000_media_type_fiber:
2387                 e1000_check_for_link(hw);
2388                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2389                                  E1000_STATUS_LU);
2390                 break;
2391         case e1000_media_type_internal_serdes:
2392                 e1000_check_for_link(hw);
2393                 link_check = adapter->hw.mac.serdes_has_link;
2394                 break;
2395         default:
2396         case e1000_media_type_unknown:
2397                 break;
2398         }
2399
2400         /* Now check for a transition */
2401         if (link_check && (adapter->link_active == 0)) {
2402                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2403                     &adapter->link_duplex);
2404                 /* 
2405                 ** There have proven to be problems with TSO when not
2406                 ** at full gigabit speed, so disable the assist automatically
2407                 ** when at lower speeds.  -jfv
2408                 */
2409                 if (adapter->link_speed != SPEED_1000) {
2410                         if_sethwassistbits(ifp, 0, CSUM_TSO);
2411                         if_setcapenablebit(ifp, 0, IFCAP_TSO4);
2412                         if_setcapabilitiesbit(ifp, 0, IFCAP_TSO4);
2413
2414                 }
2415
2416                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2417                 if ((adapter->link_speed != SPEED_1000) &&
2418                     ((hw->mac.type == e1000_82571) ||
2419                     (hw->mac.type == e1000_82572))) {
2420                         int tarc0;
2421                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2422                         tarc0 &= ~TARC_SPEED_MODE_BIT;
2423                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2424                 }
2425                 if (bootverbose)
2426                         device_printf(dev, "Link is up %d Mbps %s\n",
2427                             adapter->link_speed,
2428                             ((adapter->link_duplex == FULL_DUPLEX) ?
2429                             "Full Duplex" : "Half Duplex"));
2430                 adapter->link_active = 1;
2431                 adapter->smartspeed = 0;
2432                 if_setbaudrate(ifp, adapter->link_speed * 1000000);
2433                 if_link_state_change(ifp, LINK_STATE_UP);
2434         } else if (!link_check && (adapter->link_active == 1)) {
2435                 if_setbaudrate(ifp, 0);
2436                 adapter->link_speed = 0;
2437                 adapter->link_duplex = 0;
2438                 if (bootverbose)
2439                         device_printf(dev, "Link is Down\n");
2440                 adapter->link_active = 0;
2441                 /* Link down, disable hang detection */
2442                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2443                         txr->busy = EM_TX_IDLE;
2444                 if_link_state_change(ifp, LINK_STATE_DOWN);
2445         }
2446 }
2447
2448 /*********************************************************************
2449  *
2450  *  This routine disables all traffic on the adapter by issuing a
2451  *  global reset on the MAC and deallocates TX/RX buffers.
2452  *
2453  *  This routine should always be called with BOTH the CORE
2454  *  and TX locks.
2455  **********************************************************************/
2456
2457 static void
2458 em_stop(void *arg)
2459 {
2460         struct adapter  *adapter = arg;
2461         if_t ifp = adapter->ifp;
2462         struct tx_ring  *txr = adapter->tx_rings;
2463
2464         EM_CORE_LOCK_ASSERT(adapter);
2465
2466         INIT_DEBUGOUT("em_stop: begin");
2467
2468         em_disable_intr(adapter);
2469         callout_stop(&adapter->timer);
2470
2471         /* Tell the stack that the interface is no longer active */
2472         if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2473
2474         /* Disarm Hang Detection. */
2475         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2476                 EM_TX_LOCK(txr);
2477                 txr->busy = EM_TX_IDLE;
2478                 EM_TX_UNLOCK(txr);
2479         }
2480
2481         /* I219 needs some special flushing to avoid hangs */
2482         if (adapter->hw.mac.type == e1000_pch_spt)
2483                 em_flush_desc_rings(adapter);
2484
2485         e1000_reset_hw(&adapter->hw);
2486         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2487
2488         e1000_led_off(&adapter->hw);
2489         e1000_cleanup_led(&adapter->hw);
2490 }
2491
2492
2493 /*********************************************************************
2494  *
2495  *  Determine hardware revision.
2496  *
2497  **********************************************************************/
2498 static void
2499 em_identify_hardware(struct adapter *adapter)
2500 {
2501         device_t dev = adapter->dev;
2502
2503         /* Make sure our PCI config space has the necessary stuff set */
2504         pci_enable_busmaster(dev);
2505         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2506
2507         /* Save off the information about this board */
2508         adapter->hw.vendor_id = pci_get_vendor(dev);
2509         adapter->hw.device_id = pci_get_device(dev);
2510         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2511         adapter->hw.subsystem_vendor_id =
2512             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2513         adapter->hw.subsystem_device_id =
2514             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2515
2516         /* Do Shared Code Init and Setup */
2517         if (e1000_set_mac_type(&adapter->hw)) {
2518                 device_printf(dev, "Setup init failure\n");
2519                 return;
2520         }
2521 }
2522
2523 static int
2524 em_allocate_pci_resources(struct adapter *adapter)
2525 {
2526         device_t        dev = adapter->dev;
2527         int             rid;
2528
2529         rid = PCIR_BAR(0);
2530         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2531             &rid, RF_ACTIVE);
2532         if (adapter->memory == NULL) {
2533                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2534                 return (ENXIO);
2535         }
2536         adapter->osdep.mem_bus_space_tag =
2537             rman_get_bustag(adapter->memory);
2538         adapter->osdep.mem_bus_space_handle =
2539             rman_get_bushandle(adapter->memory);
2540         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2541
2542         adapter->hw.back = &adapter->osdep;
2543
2544         return (0);
2545 }
2546
2547 /*********************************************************************
2548  *
2549  *  Setup the Legacy or MSI Interrupt handler
2550  *
2551  **********************************************************************/
2552 int
2553 em_allocate_legacy(struct adapter *adapter)
2554 {
2555         device_t dev = adapter->dev;
2556         struct tx_ring  *txr = adapter->tx_rings;
2557         int error, rid = 0;
2558
2559         /* Manually turn off all interrupts */
2560         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2561
2562         if (adapter->msix == 1) /* using MSI */
2563                 rid = 1;
2564         /* We allocate a single interrupt resource */
2565         adapter->res = bus_alloc_resource_any(dev,
2566             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2567         if (adapter->res == NULL) {
2568                 device_printf(dev, "Unable to allocate bus resource: "
2569                     "interrupt\n");
2570                 return (ENXIO);
2571         }
2572
2573         /*
2574          * Allocate a fast interrupt and the associated
2575          * deferred processing contexts.
2576          */
2577         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2578         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2579             taskqueue_thread_enqueue, &adapter->tq);
2580         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2581             device_get_nameunit(adapter->dev));
2582         /* Use a TX only tasklet for local timer */
2583         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2584         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2585             taskqueue_thread_enqueue, &txr->tq);
2586         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2587             device_get_nameunit(adapter->dev));
2588         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2589         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2590             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2591                 device_printf(dev, "Failed to register fast interrupt "
2592                             "handler: %d\n", error);
2593                 taskqueue_free(adapter->tq);
2594                 adapter->tq = NULL;
2595                 return (error);
2596         }
2597         
2598         return (0);
2599 }
2600
2601 /*********************************************************************
2602  *
2603  *  Setup the MSIX Interrupt handlers
2604  *   This is not really Multiqueue, rather
2605  *   its just separate interrupt vectors
2606  *   for TX, RX, and Link.
2607  *
2608  **********************************************************************/
2609 int
2610 em_allocate_msix(struct adapter *adapter)
2611 {
2612         device_t        dev = adapter->dev;
2613         struct          tx_ring *txr = adapter->tx_rings;
2614         struct          rx_ring *rxr = adapter->rx_rings;
2615         int             error, rid, vector = 0;
2616         int             cpu_id = 0;
2617
2618
2619         /* Make sure all interrupts are disabled */
2620         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2621
2622         /* First set up ring resources */
2623         for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2624
2625                 /* RX ring */
2626                 rid = vector + 1;
2627
2628                 rxr->res = bus_alloc_resource_any(dev,
2629                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2630                 if (rxr->res == NULL) {
2631                         device_printf(dev,
2632                             "Unable to allocate bus resource: "
2633                             "RX MSIX Interrupt %d\n", i);
2634                         return (ENXIO);
2635                 }
2636                 if ((error = bus_setup_intr(dev, rxr->res,
2637                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2638                     rxr, &rxr->tag)) != 0) {
2639                         device_printf(dev, "Failed to register RX handler");
2640                         return (error);
2641                 }
2642 #if __FreeBSD_version >= 800504
2643                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2644 #endif
2645                 rxr->msix = vector;
2646
2647                 if (em_last_bind_cpu < 0)
2648                         em_last_bind_cpu = CPU_FIRST();
2649                 cpu_id = em_last_bind_cpu;
2650                 bus_bind_intr(dev, rxr->res, cpu_id);
2651
2652                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2653                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2654                     taskqueue_thread_enqueue, &rxr->tq);
2655                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2656                     device_get_nameunit(adapter->dev), cpu_id);
2657                 /*
2658                 ** Set the bit to enable interrupt
2659                 ** in E1000_IMS -- bits 20 and 21
2660                 ** are for RX0 and RX1, note this has
2661                 ** NOTHING to do with the MSIX vector
2662                 */
2663                 rxr->ims = 1 << (20 + i);
2664                 adapter->ims |= rxr->ims;
2665                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2666
2667                 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2668         }
2669
2670         for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2671                 /* TX ring */
2672                 rid = vector + 1;
2673                 txr->res = bus_alloc_resource_any(dev,
2674                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2675                 if (txr->res == NULL) {
2676                         device_printf(dev,
2677                             "Unable to allocate bus resource: "
2678                             "TX MSIX Interrupt %d\n", i);
2679                         return (ENXIO);
2680                 }
2681                 if ((error = bus_setup_intr(dev, txr->res,
2682                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2683                     txr, &txr->tag)) != 0) {
2684                         device_printf(dev, "Failed to register TX handler");
2685                         return (error);
2686                 }
2687 #if __FreeBSD_version >= 800504
2688                 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2689 #endif
2690                 txr->msix = vector;
2691
2692                 if (em_last_bind_cpu < 0)
2693                         em_last_bind_cpu = CPU_FIRST();
2694                 cpu_id = em_last_bind_cpu;
2695                 bus_bind_intr(dev, txr->res, cpu_id);
2696
2697                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2698                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2699                     taskqueue_thread_enqueue, &txr->tq);
2700                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2701                     device_get_nameunit(adapter->dev), cpu_id);
2702                 /*
2703                 ** Set the bit to enable interrupt
2704                 ** in E1000_IMS -- bits 22 and 23
2705                 ** are for TX0 and TX1, note this has
2706                 ** NOTHING to do with the MSIX vector
2707                 */
2708                 txr->ims = 1 << (22 + i);
2709                 adapter->ims |= txr->ims;
2710                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2711
2712                 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2713         }
2714
2715         /* Link interrupt */
2716         rid = vector + 1;
2717         adapter->res = bus_alloc_resource_any(dev,
2718             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2719         if (!adapter->res) {
2720                 device_printf(dev,"Unable to allocate "
2721                     "bus resource: Link interrupt [%d]\n", rid);
2722                 return (ENXIO);
2723         }
2724         /* Set the link handler function */
2725         error = bus_setup_intr(dev, adapter->res,
2726             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2727             em_msix_link, adapter, &adapter->tag);
2728         if (error) {
2729                 adapter->res = NULL;
2730                 device_printf(dev, "Failed to register LINK handler");
2731                 return (error);
2732         }
2733 #if __FreeBSD_version >= 800504
2734         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2735 #endif
2736         adapter->linkvec = vector;
2737         adapter->ivars |=  (8 | vector) << 16;
2738         adapter->ivars |= 0x80000000;
2739
2740         return (0);
2741 }
2742
2743
2744 static void
2745 em_free_pci_resources(struct adapter *adapter)
2746 {
2747         device_t        dev = adapter->dev;
2748         struct tx_ring  *txr;
2749         struct rx_ring  *rxr;
2750         int             rid;
2751
2752
2753         /*
2754         ** Release all the queue interrupt resources:
2755         */
2756         for (int i = 0; i < adapter->num_queues; i++) {
2757                 txr = &adapter->tx_rings[i];
2758                 /* an early abort? */
2759                 if (txr == NULL)
2760                         break;
2761                 rid = txr->msix +1;
2762                 if (txr->tag != NULL) {
2763                         bus_teardown_intr(dev, txr->res, txr->tag);
2764                         txr->tag = NULL;
2765                 }
2766                 if (txr->res != NULL)
2767                         bus_release_resource(dev, SYS_RES_IRQ,
2768                             rid, txr->res);
2769
2770                 rxr = &adapter->rx_rings[i];
2771                 /* an early abort? */
2772                 if (rxr == NULL)
2773                         break;
2774                 rid = rxr->msix +1;
2775                 if (rxr->tag != NULL) {
2776                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2777                         rxr->tag = NULL;
2778                 }
2779                 if (rxr->res != NULL)
2780                         bus_release_resource(dev, SYS_RES_IRQ,
2781                             rid, rxr->res);
2782         }
2783
2784         if (adapter->linkvec) /* we are doing MSIX */
2785                 rid = adapter->linkvec + 1;
2786         else
2787                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2788
2789         if (adapter->tag != NULL) {
2790                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2791                 adapter->tag = NULL;
2792         }
2793
2794         if (adapter->res != NULL)
2795                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2796
2797
2798         if (adapter->msix)
2799                 pci_release_msi(dev);
2800
2801         if (adapter->msix_mem != NULL)
2802                 bus_release_resource(dev, SYS_RES_MEMORY,
2803                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2804
2805         if (adapter->memory != NULL)
2806                 bus_release_resource(dev, SYS_RES_MEMORY,
2807                     PCIR_BAR(0), adapter->memory);
2808
2809         if (adapter->flash != NULL)
2810                 bus_release_resource(dev, SYS_RES_MEMORY,
2811                     EM_FLASH, adapter->flash);
2812 }
2813
2814 /*
2815  * Setup MSI or MSI/X
2816  */
2817 static int
2818 em_setup_msix(struct adapter *adapter)
2819 {
2820         device_t dev = adapter->dev;
2821         int val;
2822
2823         /* Nearly always going to use one queue */
2824         adapter->num_queues = 1;
2825
2826         /*
2827         ** Try using MSI-X for Hartwell adapters
2828         */
2829         if ((adapter->hw.mac.type == e1000_82574) &&
2830             (em_enable_msix == TRUE)) {
2831 #ifdef EM_MULTIQUEUE
2832                 adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2833                 if (adapter->num_queues > 1)
2834                         em_enable_vectors_82574(adapter);
2835 #endif
2836                 /* Map the MSIX BAR */
2837                 int rid = PCIR_BAR(EM_MSIX_BAR);
2838                 adapter->msix_mem = bus_alloc_resource_any(dev,
2839                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2840                 if (adapter->msix_mem == NULL) {
2841                         /* May not be enabled */
2842                         device_printf(adapter->dev,
2843                             "Unable to map MSIX table \n");
2844                         goto msi;
2845                 }
2846                 val = pci_msix_count(dev); 
2847
2848 #ifdef EM_MULTIQUEUE
2849                 /* We need 5 vectors in the multiqueue case */
2850                 if (adapter->num_queues > 1 ) {
2851                         if (val >= 5)
2852                                 val = 5;
2853                         else {
2854                                 adapter->num_queues = 1;
2855                                 device_printf(adapter->dev,
2856                                     "Insufficient MSIX vectors for >1 queue, "
2857                                     "using single queue...\n");
2858                                 goto msix_one;
2859                         }
2860                 } else {
2861 msix_one:
2862 #endif
2863                         if (val >= 3)
2864                                 val = 3;
2865                         else {
2866                                 device_printf(adapter->dev,
2867                                 "Insufficient MSIX vectors, using MSI\n");
2868                                 goto msi;
2869                         }
2870 #ifdef EM_MULTIQUEUE
2871                 }
2872 #endif
2873
2874                 if ((pci_alloc_msix(dev, &val) == 0)) {
2875                         device_printf(adapter->dev,
2876                             "Using MSIX interrupts "
2877                             "with %d vectors\n", val);
2878                         return (val);
2879                 }
2880
2881                 /*
2882                 ** If MSIX alloc failed or provided us with
2883                 ** less than needed, free and fall through to MSI
2884                 */
2885                 pci_release_msi(dev);
2886         }
2887 msi:
2888         if (adapter->msix_mem != NULL) {
2889                 bus_release_resource(dev, SYS_RES_MEMORY,
2890                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2891                 adapter->msix_mem = NULL;
2892         }
2893         val = 1;
2894         if (pci_alloc_msi(dev, &val) == 0) {
2895                 device_printf(adapter->dev, "Using an MSI interrupt\n");
2896                 return (val);
2897         } 
2898         /* Should only happen due to manual configuration */
2899         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2900         return (0);
2901 }
2902
2903
2904 /*
2905 ** The 3 following flush routines are used as a workaround in the
2906 ** I219 client parts and only for them.
2907 **
2908 ** em_flush_tx_ring - remove all descriptors from the tx_ring
2909 **
2910 ** We want to clear all pending descriptors from the TX ring.
2911 ** zeroing happens when the HW reads the regs. We  assign the ring itself as
2912 ** the data of the next descriptor. We don't care about the data we are about
2913 ** to reset the HW.
2914 */
2915 static void
2916 em_flush_tx_ring(struct adapter *adapter)
2917 {
2918         struct e1000_hw         *hw = &adapter->hw;
2919         struct tx_ring          *txr = adapter->tx_rings;
2920         struct e1000_tx_desc    *txd;
2921         u32                     tctl, txd_lower = E1000_TXD_CMD_IFCS;
2922         u16                     size = 512;
2923
2924         tctl = E1000_READ_REG(hw, E1000_TCTL);
2925         E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2926
2927         txd = &txr->tx_base[txr->next_avail_desc++];
2928         if (txr->next_avail_desc == adapter->num_tx_desc)
2929                 txr->next_avail_desc = 0;
2930
2931         /* Just use the ring as a dummy buffer addr */
2932         txd->buffer_addr = txr->txdma.dma_paddr;
2933         txd->lower.data = htole32(txd_lower | size);
2934         txd->upper.data = 0;
2935
2936         /* flush descriptors to memory before notifying the HW */
2937         wmb();
2938
2939         E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
2940         mb();
2941         usec_delay(250);
2942 }
2943
2944 /*
2945 ** em_flush_rx_ring - remove all descriptors from the rx_ring
2946 **
2947 ** Mark all descriptors in the RX ring as consumed and disable the rx ring
2948 */
2949 static void
2950 em_flush_rx_ring(struct adapter *adapter)
2951 {
2952         struct e1000_hw *hw = &adapter->hw;
2953         u32             rctl, rxdctl;
2954
2955         rctl = E1000_READ_REG(hw, E1000_RCTL);
2956         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2957         E1000_WRITE_FLUSH(hw);
2958         usec_delay(150);
2959
2960         rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
2961         /* zero the lower 14 bits (prefetch and host thresholds) */
2962         rxdctl &= 0xffffc000;
2963         /*
2964          * update thresholds: prefetch threshold to 31, host threshold to 1
2965          * and make sure the granularity is "descriptors" and not "cache lines"
2966          */
2967         rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
2968         E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
2969
2970         /* momentarily enable the RX ring for the changes to take effect */
2971         E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
2972         E1000_WRITE_FLUSH(hw);
2973         usec_delay(150);
2974         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2975 }
2976
2977 /*
2978 ** em_flush_desc_rings - remove all descriptors from the descriptor rings
2979 **
2980 ** In i219, the descriptor rings must be emptied before resetting the HW
2981 ** or before changing the device state to D3 during runtime (runtime PM).
2982 **
2983 ** Failure to do this will cause the HW to enter a unit hang state which can
2984 ** only be released by PCI reset on the device
2985 **
2986 */
2987 static void
2988 em_flush_desc_rings(struct adapter *adapter)
2989 {
2990         struct e1000_hw *hw = &adapter->hw;
2991         device_t        dev = adapter->dev;
2992         u16             hang_state;
2993         u32             fext_nvm11, tdlen;
2994  
2995         /* First, disable MULR fix in FEXTNVM11 */
2996         fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
2997         fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
2998         E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
2999         
3000         /* do nothing if we're not in faulty state, or if the queue is empty */
3001         tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
3002         hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3003         if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3004                 return;
3005         em_flush_tx_ring(adapter);
3006
3007         /* recheck, maybe the fault is caused by the rx ring */
3008         hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3009         if (hang_state & FLUSH_DESC_REQUIRED)
3010                 em_flush_rx_ring(adapter);
3011 }
3012
3013
3014 /*********************************************************************
3015  *
3016  *  Initialize the hardware to a configuration
3017  *  as specified by the adapter structure.
3018  *
3019  **********************************************************************/
3020 static void
3021 em_reset(struct adapter *adapter)
3022 {
3023         device_t        dev = adapter->dev;
3024         if_t ifp = adapter->ifp;
3025         struct e1000_hw *hw = &adapter->hw;
3026         u16             rx_buffer_size;
3027         u32             pba;
3028
3029         INIT_DEBUGOUT("em_reset: begin");
3030
3031         /* Set up smart power down as default off on newer adapters. */
3032         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3033             hw->mac.type == e1000_82572)) {
3034                 u16 phy_tmp = 0;
3035
3036                 /* Speed up time to link by disabling smart power down. */
3037                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3038                 phy_tmp &= ~IGP02E1000_PM_SPD;
3039                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3040         }
3041
3042         /*
3043          * Packet Buffer Allocation (PBA)
3044          * Writing PBA sets the receive portion of the buffer
3045          * the remainder is used for the transmit buffer.
3046          */
3047         switch (hw->mac.type) {
3048         /* Total Packet Buffer on these is 48K */
3049         case e1000_82571:
3050         case e1000_82572:
3051         case e1000_80003es2lan:
3052                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3053                 break;
3054         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3055                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3056                 break;
3057         case e1000_82574:
3058         case e1000_82583:
3059                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3060                 break;
3061         case e1000_ich8lan:
3062                 pba = E1000_PBA_8K;
3063                 break;
3064         case e1000_ich9lan:
3065         case e1000_ich10lan:
3066                 /* Boost Receive side for jumbo frames */
3067                 if (adapter->hw.mac.max_frame_size > 4096)
3068                         pba = E1000_PBA_14K;
3069                 else
3070                         pba = E1000_PBA_10K;
3071                 break;
3072         case e1000_pchlan:
3073         case e1000_pch2lan:
3074         case e1000_pch_lpt:
3075         case e1000_pch_spt:
3076                 pba = E1000_PBA_26K;
3077                 break;
3078         default:
3079                 if (adapter->hw.mac.max_frame_size > 8192)
3080                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3081                 else
3082                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3083         }
3084         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3085
3086         /*
3087          * These parameters control the automatic generation (Tx) and
3088          * response (Rx) to Ethernet PAUSE frames.
3089          * - High water mark should allow for at least two frames to be
3090          *   received after sending an XOFF.
3091          * - Low water mark works best when it is very near the high water mark.
3092          *   This allows the receiver to restart by sending XON when it has
3093          *   drained a bit. Here we use an arbitrary value of 1500 which will
3094          *   restart after one full frame is pulled from the buffer. There
3095          *   could be several smaller frames in the buffer and if so they will
3096          *   not trigger the XON until their total number reduces the buffer
3097          *   by 1500.
3098          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3099          */
3100         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3101         hw->fc.high_water = rx_buffer_size -
3102             roundup2(adapter->hw.mac.max_frame_size, 1024);
3103         hw->fc.low_water = hw->fc.high_water - 1500;
3104
3105         if (adapter->fc) /* locally set flow control value? */
3106                 hw->fc.requested_mode = adapter->fc;
3107         else
3108                 hw->fc.requested_mode = e1000_fc_full;
3109
3110         if (hw->mac.type == e1000_80003es2lan)
3111                 hw->fc.pause_time = 0xFFFF;
3112         else
3113                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
3114
3115         hw->fc.send_xon = TRUE;
3116
3117         /* Device specific overrides/settings */
3118         switch (hw->mac.type) {
3119         case e1000_pchlan:
3120                 /* Workaround: no TX flow ctrl for PCH */
3121                 hw->fc.requested_mode = e1000_fc_rx_pause;
3122                 hw->fc.pause_time = 0xFFFF; /* override */
3123                 if (if_getmtu(ifp) > ETHERMTU) {
3124                         hw->fc.high_water = 0x3500;
3125                         hw->fc.low_water = 0x1500;
3126                 } else {
3127                         hw->fc.high_water = 0x5000;
3128                         hw->fc.low_water = 0x3000;
3129                 }
3130                 hw->fc.refresh_time = 0x1000;
3131                 break;
3132         case e1000_pch2lan:
3133         case e1000_pch_lpt:
3134         case e1000_pch_spt:
3135                 hw->fc.high_water = 0x5C20;
3136                 hw->fc.low_water = 0x5048;
3137                 hw->fc.pause_time = 0x0650;
3138                 hw->fc.refresh_time = 0x0400;
3139                 /* Jumbos need adjusted PBA */
3140                 if (if_getmtu(ifp) > ETHERMTU)
3141                         E1000_WRITE_REG(hw, E1000_PBA, 12);
3142                 else
3143                         E1000_WRITE_REG(hw, E1000_PBA, 26);
3144                 break;
3145         case e1000_ich9lan:
3146         case e1000_ich10lan:
3147                 if (if_getmtu(ifp) > ETHERMTU) {
3148                         hw->fc.high_water = 0x2800;
3149                         hw->fc.low_water = hw->fc.high_water - 8;
3150                         break;
3151                 } 
3152                 /* else fall thru */
3153         default:
3154                 if (hw->mac.type == e1000_80003es2lan)
3155                         hw->fc.pause_time = 0xFFFF;
3156                 break;
3157         }
3158
3159         /* I219 needs some special flushing to avoid hangs */
3160         if (hw->mac.type == e1000_pch_spt)
3161                 em_flush_desc_rings(adapter);
3162
3163         /* Issue a global reset */
3164         e1000_reset_hw(hw);
3165         E1000_WRITE_REG(hw, E1000_WUC, 0);
3166         em_disable_aspm(adapter);
3167         /* and a re-init */
3168         if (e1000_init_hw(hw) < 0) {
3169                 device_printf(dev, "Hardware Initialization Failed\n");
3170                 return;
3171         }
3172
3173         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3174         e1000_get_phy_info(hw);
3175         e1000_check_for_link(hw);
3176         return;
3177 }
3178
3179 /*********************************************************************
3180  *
3181  *  Setup networking device structure and register an interface.
3182  *
3183  **********************************************************************/
3184 static int
3185 em_setup_interface(device_t dev, struct adapter *adapter)
3186 {
3187         if_t ifp;
3188
3189         INIT_DEBUGOUT("em_setup_interface: begin");
3190
3191         ifp = adapter->ifp = if_gethandle(IFT_ETHER);
3192         if (ifp == 0) {
3193                 device_printf(dev, "can not allocate ifnet structure\n");
3194                 return (-1);
3195         }
3196         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3197         if_setdev(ifp, dev);
3198         if_setinitfn(ifp, em_init);
3199         if_setsoftc(ifp, adapter);
3200         if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
3201         if_setioctlfn(ifp, em_ioctl);
3202         if_setgetcounterfn(ifp, em_get_counter);
3203
3204         /* TSO parameters */
3205         ifp->if_hw_tsomax = IP_MAXPACKET;
3206         /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
3207         ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
3208         ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3209
3210 #ifdef EM_MULTIQUEUE
3211         /* Multiqueue stack interface */
3212         if_settransmitfn(ifp, em_mq_start);
3213         if_setqflushfn(ifp, em_qflush);
3214 #else
3215         if_setstartfn(ifp, em_start);
3216         if_setsendqlen(ifp, adapter->num_tx_desc - 1);
3217         if_setsendqready(ifp);
3218 #endif  
3219
3220         ether_ifattach(ifp, adapter->hw.mac.addr);
3221
3222         if_setcapabilities(ifp, 0);
3223         if_setcapenable(ifp, 0);
3224
3225
3226         if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
3227             IFCAP_TSO4, 0);
3228         /*
3229          * Tell the upper layer(s) we
3230          * support full VLAN capability
3231          */
3232         if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
3233         if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
3234             IFCAP_VLAN_MTU, 0);
3235         if_setcapenable(ifp, if_getcapabilities(ifp));
3236
3237         /*
3238         ** Don't turn this on by default, if vlans are
3239         ** created on another pseudo device (eg. lagg)
3240         ** then vlan events are not passed thru, breaking
3241         ** operation, but with HW FILTER off it works. If
3242         ** using vlans directly on the em driver you can
3243         ** enable this and get full hardware tag filtering.
3244         */
3245         if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
3246
3247 #ifdef DEVICE_POLLING
3248         if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
3249 #endif
3250
3251         /* Enable only WOL MAGIC by default */
3252         if (adapter->wol) {
3253                 if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
3254                 if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
3255         }
3256                 
3257         /*
3258          * Specify the media types supported by this adapter and register
3259          * callbacks to update media and link information
3260          */
3261         ifmedia_init(&adapter->media, IFM_IMASK,
3262             em_media_change, em_media_status);
3263         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3264             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3265                 u_char fiber_type = IFM_1000_SX;        /* default type */
3266
3267                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
3268                             0, NULL);
3269                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3270         } else {
3271                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3272                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3273                             0, NULL);
3274                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3275                             0, NULL);
3276                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3277                             0, NULL);
3278                 if (adapter->hw.phy.type != e1000_phy_ife) {
3279                         ifmedia_add(&adapter->media,
3280                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3281                         ifmedia_add(&adapter->media,
3282                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3283                 }
3284         }
3285         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3286         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3287         return (0);
3288 }
3289
3290
3291 /*
3292  * Manage DMA'able memory.
3293  */
3294 static void
3295 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3296 {
3297         if (error)
3298                 return;
3299         *(bus_addr_t *) arg = segs[0].ds_addr;
3300 }
3301
3302 static int
3303 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3304         struct em_dma_alloc *dma, int mapflags)
3305 {
3306         int error;
3307
3308         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3309                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3310                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3311                                 BUS_SPACE_MAXADDR,      /* highaddr */
3312                                 NULL, NULL,             /* filter, filterarg */
3313                                 size,                   /* maxsize */
3314                                 1,                      /* nsegments */
3315                                 size,                   /* maxsegsize */
3316                                 0,                      /* flags */
3317                                 NULL,                   /* lockfunc */
3318                                 NULL,                   /* lockarg */
3319                                 &dma->dma_tag);
3320         if (error) {
3321                 device_printf(adapter->dev,
3322                     "%s: bus_dma_tag_create failed: %d\n",
3323                     __func__, error);
3324                 goto fail_0;
3325         }
3326
3327         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3328             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3329         if (error) {
3330                 device_printf(adapter->dev,
3331                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3332                     __func__, (uintmax_t)size, error);
3333                 goto fail_2;
3334         }
3335
3336         dma->dma_paddr = 0;
3337         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3338             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3339         if (error || dma->dma_paddr == 0) {
3340                 device_printf(adapter->dev,
3341                     "%s: bus_dmamap_load failed: %d\n",
3342                     __func__, error);
3343                 goto fail_3;
3344         }
3345
3346         return (0);
3347
3348 fail_3:
3349         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3350 fail_2:
3351         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3352         bus_dma_tag_destroy(dma->dma_tag);
3353 fail_0:
3354         dma->dma_tag = NULL;
3355
3356         return (error);
3357 }
3358
3359 static void
3360 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3361 {
3362         if (dma->dma_tag == NULL)
3363                 return;
3364         if (dma->dma_paddr != 0) {
3365                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3366                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3367                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3368                 dma->dma_paddr = 0;
3369         }
3370         if (dma->dma_vaddr != NULL) {
3371                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3372                 dma->dma_vaddr = NULL;
3373         }
3374         bus_dma_tag_destroy(dma->dma_tag);
3375         dma->dma_tag = NULL;
3376 }
3377
3378
3379 /*********************************************************************
3380  *
3381  *  Allocate memory for the transmit and receive rings, and then
3382  *  the descriptors associated with each, called only once at attach.
3383  *
3384  **********************************************************************/
3385 static int
3386 em_allocate_queues(struct adapter *adapter)
3387 {
3388         device_t                dev = adapter->dev;
3389         struct tx_ring          *txr = NULL;
3390         struct rx_ring          *rxr = NULL;
3391         int rsize, tsize, error = E1000_SUCCESS;
3392         int txconf = 0, rxconf = 0;
3393
3394
3395         /* Allocate the TX ring struct memory */
3396         if (!(adapter->tx_rings =
3397             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3398             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3399                 device_printf(dev, "Unable to allocate TX ring memory\n");
3400                 error = ENOMEM;
3401                 goto fail;
3402         }
3403
3404         /* Now allocate the RX */
3405         if (!(adapter->rx_rings =
3406             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3407             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3408                 device_printf(dev, "Unable to allocate RX ring memory\n");
3409                 error = ENOMEM;
3410                 goto rx_fail;
3411         }
3412
3413         tsize = roundup2(adapter->num_tx_desc *
3414             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3415         /*
3416          * Now set up the TX queues, txconf is needed to handle the
3417          * possibility that things fail midcourse and we need to
3418          * undo memory gracefully
3419          */ 
3420         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3421                 /* Set up some basics */
3422                 txr = &adapter->tx_rings[i];
3423                 txr->adapter = adapter;
3424                 txr->me = i;
3425
3426                 /* Initialize the TX lock */
3427                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3428                     device_get_nameunit(dev), txr->me);
3429                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3430
3431                 if (em_dma_malloc(adapter, tsize,
3432                         &txr->txdma, BUS_DMA_NOWAIT)) {
3433                         device_printf(dev,
3434                             "Unable to allocate TX Descriptor memory\n");
3435                         error = ENOMEM;
3436                         goto err_tx_desc;
3437                 }
3438                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3439                 bzero((void *)txr->tx_base, tsize);
3440
3441                 if (em_allocate_transmit_buffers(txr)) {
3442                         device_printf(dev,
3443                             "Critical Failure setting up transmit buffers\n");
3444                         error = ENOMEM;
3445                         goto err_tx_desc;
3446                 }
3447 #if __FreeBSD_version >= 800000
3448                 /* Allocate a buf ring */
3449                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3450                     M_WAITOK, &txr->tx_mtx);
3451 #endif
3452         }
3453
3454         /*
3455          * Next the RX queues...
3456          */ 
3457         rsize = roundup2(adapter->num_rx_desc *
3458             sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3459         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3460                 rxr = &adapter->rx_rings[i];
3461                 rxr->adapter = adapter;
3462                 rxr->me = i;
3463
3464                 /* Initialize the RX lock */
3465                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3466                     device_get_nameunit(dev), txr->me);
3467                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3468
3469                 if (em_dma_malloc(adapter, rsize,
3470                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3471                         device_printf(dev,
3472                             "Unable to allocate RxDescriptor memory\n");
3473                         error = ENOMEM;
3474                         goto err_rx_desc;
3475                 }
3476                 rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3477                 bzero((void *)rxr->rx_base, rsize);
3478
3479                 /* Allocate receive buffers for the ring*/
3480                 if (em_allocate_receive_buffers(rxr)) {
3481                         device_printf(dev,
3482                             "Critical Failure setting up receive buffers\n");
3483                         error = ENOMEM;
3484                         goto err_rx_desc;
3485                 }
3486         }
3487
3488         return (0);
3489
3490 err_rx_desc:
3491         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3492                 em_dma_free(adapter, &rxr->rxdma);
3493 err_tx_desc:
3494         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3495                 em_dma_free(adapter, &txr->txdma);
3496         free(adapter->rx_rings, M_DEVBUF);
3497 rx_fail:
3498 #if __FreeBSD_version >= 800000
3499         buf_ring_free(txr->br, M_DEVBUF);
3500 #endif
3501         free(adapter->tx_rings, M_DEVBUF);
3502 fail:
3503         return (error);
3504 }
3505
3506
3507 /*********************************************************************
3508  *
3509  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3510  *  the information needed to transmit a packet on the wire. This is
3511  *  called only once at attach, setup is done every reset.
3512  *
3513  **********************************************************************/
3514 static int
3515 em_allocate_transmit_buffers(struct tx_ring *txr)
3516 {
3517         struct adapter *adapter = txr->adapter;
3518         device_t dev = adapter->dev;
3519         struct em_txbuffer *txbuf;
3520         int error, i;
3521
3522         /*
3523          * Setup DMA descriptor areas.
3524          */
3525         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3526                                1, 0,                    /* alignment, bounds */
3527                                BUS_SPACE_MAXADDR,       /* lowaddr */
3528                                BUS_SPACE_MAXADDR,       /* highaddr */
3529                                NULL, NULL,              /* filter, filterarg */
3530                                EM_TSO_SIZE,             /* maxsize */
3531                                EM_MAX_SCATTER,          /* nsegments */
3532                                PAGE_SIZE,               /* maxsegsize */
3533                                0,                       /* flags */
3534                                NULL,                    /* lockfunc */
3535                                NULL,                    /* lockfuncarg */
3536                                &txr->txtag))) {
3537                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3538                 goto fail;
3539         }
3540
3541         if (!(txr->tx_buffers =
3542             (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3543             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3544                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3545                 error = ENOMEM;
3546                 goto fail;
3547         }
3548
3549         /* Create the descriptor buffer dma maps */
3550         txbuf = txr->tx_buffers;
3551         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3552                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3553                 if (error != 0) {
3554                         device_printf(dev, "Unable to create TX DMA map\n");
3555                         goto fail;
3556                 }
3557         }
3558
3559         return 0;
3560 fail:
3561         /* We free all, it handles case where we are in the middle */
3562         em_free_transmit_structures(adapter);
3563         return (error);
3564 }
3565
3566 /*********************************************************************
3567  *
3568  *  Initialize a transmit ring.
3569  *
3570  **********************************************************************/
3571 static void
3572 em_setup_transmit_ring(struct tx_ring *txr)
3573 {
3574         struct adapter *adapter = txr->adapter;
3575         struct em_txbuffer *txbuf;
3576         int i;
3577 #ifdef DEV_NETMAP
3578         struct netmap_slot *slot;
3579         struct netmap_adapter *na = netmap_getna(adapter->ifp);
3580 #endif /* DEV_NETMAP */
3581
3582         /* Clear the old descriptor contents */
3583         EM_TX_LOCK(txr);
3584 #ifdef DEV_NETMAP
3585         slot = netmap_reset(na, NR_TX, txr->me, 0);
3586 #endif /* DEV_NETMAP */
3587
3588         bzero((void *)txr->tx_base,
3589               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3590         /* Reset indices */
3591         txr->next_avail_desc = 0;
3592         txr->next_to_clean = 0;
3593
3594         /* Free any existing tx buffers. */
3595         txbuf = txr->tx_buffers;
3596         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3597                 if (txbuf->m_head != NULL) {
3598                         bus_dmamap_sync(txr->txtag, txbuf->map,
3599                             BUS_DMASYNC_POSTWRITE);
3600                         bus_dmamap_unload(txr->txtag, txbuf->map);
3601                         m_freem(txbuf->m_head);
3602                         txbuf->m_head = NULL;
3603                 }
3604 #ifdef DEV_NETMAP
3605                 if (slot) {
3606                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3607                         uint64_t paddr;
3608                         void *addr;
3609
3610                         addr = PNMB(na, slot + si, &paddr);
3611                         txr->tx_base[i].buffer_addr = htole64(paddr);
3612                         /* reload the map for netmap mode */
3613                         netmap_load_map(na, txr->txtag, txbuf->map, addr);
3614                 }
3615 #endif /* DEV_NETMAP */
3616
3617                 /* clear the watch index */
3618                 txbuf->next_eop = -1;
3619         }
3620
3621         /* Set number of descriptors available */
3622         txr->tx_avail = adapter->num_tx_desc;
3623         txr->busy = EM_TX_IDLE;
3624
3625         /* Clear checksum offload context. */
3626         txr->last_hw_offload = 0;
3627         txr->last_hw_ipcss = 0;
3628         txr->last_hw_ipcso = 0;
3629         txr->last_hw_tucss = 0;
3630         txr->last_hw_tucso = 0;
3631
3632         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3633             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3634         EM_TX_UNLOCK(txr);
3635 }
3636
3637 /*********************************************************************
3638  *
3639  *  Initialize all transmit rings.
3640  *
3641  **********************************************************************/
3642 static void
3643 em_setup_transmit_structures(struct adapter *adapter)
3644 {
3645         struct tx_ring *txr = adapter->tx_rings;
3646
3647         for (int i = 0; i < adapter->num_queues; i++, txr++)
3648                 em_setup_transmit_ring(txr);
3649
3650         return;
3651 }
3652
3653 /*********************************************************************
3654  *
3655  *  Enable transmit unit.
3656  *
3657  **********************************************************************/
3658 static void
3659 em_initialize_transmit_unit(struct adapter *adapter)
3660 {
3661         struct tx_ring  *txr = adapter->tx_rings;
3662         struct e1000_hw *hw = &adapter->hw;
3663         u32     tctl, txdctl = 0, tarc, tipg = 0;
3664
3665          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3666
3667         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3668                 u64 bus_addr = txr->txdma.dma_paddr;
3669                 /* Base and Len of TX Ring */
3670                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3671                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3672                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3673                     (u32)(bus_addr >> 32));
3674                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3675                     (u32)bus_addr);
3676                 /* Init the HEAD/TAIL indices */
3677                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3678                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3679
3680                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3681                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3682                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3683
3684                 txr->busy = EM_TX_IDLE;
3685                 txdctl = 0; /* clear txdctl */
3686                 txdctl |= 0x1f; /* PTHRESH */
3687                 txdctl |= 1 << 8; /* HTHRESH */
3688                 txdctl |= 1 << 16;/* WTHRESH */
3689                 txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3690                 txdctl |= E1000_TXDCTL_GRAN;
3691                 txdctl |= 1 << 25; /* LWTHRESH */
3692
3693                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3694         }
3695
3696         /* Set the default values for the Tx Inter Packet Gap timer */
3697         switch (adapter->hw.mac.type) {
3698         case e1000_80003es2lan:
3699                 tipg = DEFAULT_82543_TIPG_IPGR1;
3700                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3701                     E1000_TIPG_IPGR2_SHIFT;
3702                 break;
3703         default:
3704                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3705                     (adapter->hw.phy.media_type ==
3706                     e1000_media_type_internal_serdes))
3707                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3708                 else
3709                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3710                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3711                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3712         }
3713
3714         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3715         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3716
3717         if(adapter->hw.mac.type >= e1000_82540)
3718                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3719                     adapter->tx_abs_int_delay.value);
3720
3721         if ((adapter->hw.mac.type == e1000_82571) ||
3722             (adapter->hw.mac.type == e1000_82572)) {
3723                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3724                 tarc |= TARC_SPEED_MODE_BIT;
3725                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3726         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3727                 /* errata: program both queues to unweighted RR */
3728                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3729                 tarc |= 1;
3730                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3731                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3732                 tarc |= 1;
3733                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3734         } else if (adapter->hw.mac.type == e1000_82574) {
3735                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3736                 tarc |= TARC_ERRATA_BIT;
3737                 if ( adapter->num_queues > 1) {
3738                         tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3739                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3740                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3741                 } else
3742                         E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3743         }
3744
3745         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3746         if (adapter->tx_int_delay.value > 0)
3747                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3748
3749         /* Program the Transmit Control Register */
3750         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3751         tctl &= ~E1000_TCTL_CT;
3752         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3753                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3754
3755         if (adapter->hw.mac.type >= e1000_82571)
3756                 tctl |= E1000_TCTL_MULR;
3757
3758         /* This write will effectively turn on the transmit unit. */
3759         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3760
3761         if (hw->mac.type == e1000_pch_spt) {
3762                 u32 reg;
3763                 reg = E1000_READ_REG(hw, E1000_IOSFPC);
3764                 reg |= E1000_RCTL_RDMTS_HEX;
3765                 E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3766                 reg = E1000_READ_REG(hw, E1000_TARC(0));
3767                 reg |= E1000_TARC0_CB_MULTIQ_3_REQ;
3768                 E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3769         }
3770 }
3771
3772
3773 /*********************************************************************
3774  *
3775  *  Free all transmit rings.
3776  *
3777  **********************************************************************/
3778 static void
3779 em_free_transmit_structures(struct adapter *adapter)
3780 {
3781         struct tx_ring *txr = adapter->tx_rings;
3782
3783         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3784                 EM_TX_LOCK(txr);
3785                 em_free_transmit_buffers(txr);
3786                 em_dma_free(adapter, &txr->txdma);
3787                 EM_TX_UNLOCK(txr);
3788                 EM_TX_LOCK_DESTROY(txr);
3789         }
3790
3791         free(adapter->tx_rings, M_DEVBUF);
3792 }
3793
3794 /*********************************************************************
3795  *
3796  *  Free transmit ring related data structures.
3797  *
3798  **********************************************************************/
3799 static void
3800 em_free_transmit_buffers(struct tx_ring *txr)
3801 {
3802         struct adapter          *adapter = txr->adapter;
3803         struct em_txbuffer      *txbuf;
3804
3805         INIT_DEBUGOUT("free_transmit_ring: begin");
3806
3807         if (txr->tx_buffers == NULL)
3808                 return;
3809
3810         for (int i = 0; i < adapter->num_tx_desc; i++) {
3811                 txbuf = &txr->tx_buffers[i];
3812                 if (txbuf->m_head != NULL) {
3813                         bus_dmamap_sync(txr->txtag, txbuf->map,
3814                             BUS_DMASYNC_POSTWRITE);
3815                         bus_dmamap_unload(txr->txtag,
3816                             txbuf->map);
3817                         m_freem(txbuf->m_head);
3818                         txbuf->m_head = NULL;
3819                         if (txbuf->map != NULL) {
3820                                 bus_dmamap_destroy(txr->txtag,
3821                                     txbuf->map);
3822                                 txbuf->map = NULL;
3823                         }
3824                 } else if (txbuf->map != NULL) {
3825                         bus_dmamap_unload(txr->txtag,
3826                             txbuf->map);
3827                         bus_dmamap_destroy(txr->txtag,
3828                             txbuf->map);
3829                         txbuf->map = NULL;
3830                 }
3831         }
3832 #if __FreeBSD_version >= 800000
3833         if (txr->br != NULL)
3834                 buf_ring_free(txr->br, M_DEVBUF);
3835 #endif
3836         if (txr->tx_buffers != NULL) {
3837                 free(txr->tx_buffers, M_DEVBUF);
3838                 txr->tx_buffers = NULL;
3839         }
3840         if (txr->txtag != NULL) {
3841                 bus_dma_tag_destroy(txr->txtag);
3842                 txr->txtag = NULL;
3843         }
3844         return;
3845 }
3846
3847
3848 /*********************************************************************
3849  *  The offload context is protocol specific (TCP/UDP) and thus
3850  *  only needs to be set when the protocol changes. The occasion
3851  *  of a context change can be a performance detriment, and
3852  *  might be better just disabled. The reason arises in the way
3853  *  in which the controller supports pipelined requests from the
3854  *  Tx data DMA. Up to four requests can be pipelined, and they may
3855  *  belong to the same packet or to multiple packets. However all
3856  *  requests for one packet are issued before a request is issued
3857  *  for a subsequent packet and if a request for the next packet
3858  *  requires a context change, that request will be stalled
3859  *  until the previous request completes. This means setting up
3860  *  a new context effectively disables pipelined Tx data DMA which
3861  *  in turn greatly slow down performance to send small sized
3862  *  frames. 
3863  **********************************************************************/
3864 static void
3865 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3866     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3867 {
3868         struct adapter                  *adapter = txr->adapter;
3869         struct e1000_context_desc       *TXD = NULL;
3870         struct em_txbuffer              *tx_buffer;
3871         int                             cur, hdr_len;
3872         u32                             cmd = 0;
3873         u16                             offload = 0;
3874         u8                              ipcso, ipcss, tucso, tucss;
3875
3876         ipcss = ipcso = tucss = tucso = 0;
3877         hdr_len = ip_off + (ip->ip_hl << 2);
3878         cur = txr->next_avail_desc;
3879
3880         /* Setup of IP header checksum. */
3881         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3882                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3883                 offload |= CSUM_IP;
3884                 ipcss = ip_off;
3885                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3886                 /*
3887                  * Start offset for header checksum calculation.
3888                  * End offset for header checksum calculation.
3889                  * Offset of place to put the checksum.
3890                  */
3891                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3892                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3893                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3894                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3895                 cmd |= E1000_TXD_CMD_IP;
3896         }
3897
3898         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3899                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3900                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3901                 offload |= CSUM_TCP;
3902                 tucss = hdr_len;
3903                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3904                 /*
3905                  * The 82574L can only remember the *last* context used
3906                  * regardless of queue that it was use for.  We cannot reuse
3907                  * contexts on this hardware platform and must generate a new
3908                  * context every time.  82574L hardware spec, section 7.2.6,
3909                  * second note.
3910                  */
3911                 if (adapter->num_queues < 2) {
3912                         /*
3913                         * Setting up new checksum offload context for every
3914                         * frames takes a lot of processing time for hardware.
3915                         * This also reduces performance a lot for small sized
3916                         * frames so avoid it if driver can use previously
3917                         * configured checksum offload context.
3918                         */
3919                         if (txr->last_hw_offload == offload) {
3920                                 if (offload & CSUM_IP) {
3921                                         if (txr->last_hw_ipcss == ipcss &&
3922                                         txr->last_hw_ipcso == ipcso &&
3923                                         txr->last_hw_tucss == tucss &&
3924                                         txr->last_hw_tucso == tucso)
3925                                                 return;
3926                                 } else {
3927                                         if (txr->last_hw_tucss == tucss &&
3928                                         txr->last_hw_tucso == tucso)
3929                                                 return;
3930                                 }
3931                         }
3932                         txr->last_hw_offload = offload;
3933                         txr->last_hw_tucss = tucss;
3934                         txr->last_hw_tucso = tucso;
3935                 }
3936                 /*
3937                  * Start offset for payload checksum calculation.
3938                  * End offset for payload checksum calculation.
3939                  * Offset of place to put the checksum.
3940                  */
3941                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3942                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3943                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3944                 TXD->upper_setup.tcp_fields.tucso = tucso;
3945                 cmd |= E1000_TXD_CMD_TCP;
3946         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3947                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3948                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3949                 tucss = hdr_len;
3950                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3951                 /*
3952                  * The 82574L can only remember the *last* context used
3953                  * regardless of queue that it was use for.  We cannot reuse
3954                  * contexts on this hardware platform and must generate a new
3955                  * context every time.  82574L hardware spec, section 7.2.6,
3956                  * second note.
3957                  */
3958                 if (adapter->num_queues < 2) {
3959                         /*
3960                         * Setting up new checksum offload context for every
3961                         * frames takes a lot of processing time for hardware.
3962                         * This also reduces performance a lot for small sized
3963                         * frames so avoid it if driver can use previously
3964                         * configured checksum offload context.
3965                         */
3966                         if (txr->last_hw_offload == offload) {
3967                                 if (offload & CSUM_IP) {
3968                                         if (txr->last_hw_ipcss == ipcss &&
3969                                         txr->last_hw_ipcso == ipcso &&
3970                                         txr->last_hw_tucss == tucss &&
3971                                         txr->last_hw_tucso == tucso)
3972                                                 return;
3973                                 } else {
3974                                         if (txr->last_hw_tucss == tucss &&
3975                                         txr->last_hw_tucso == tucso)
3976                                                 return;
3977                                 }
3978                         }
3979                         txr->last_hw_offload = offload;
3980                         txr->last_hw_tucss = tucss;
3981                         txr->last_hw_tucso = tucso;
3982                 }
3983                 /*
3984                  * Start offset for header checksum calculation.
3985                  * End offset for header checksum calculation.
3986                  * Offset of place to put the checksum.
3987                  */
3988                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3989                 TXD->upper_setup.tcp_fields.tucss = tucss;
3990                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3991                 TXD->upper_setup.tcp_fields.tucso = tucso;
3992         }
3993   
3994         if (offload & CSUM_IP) {
3995                 txr->last_hw_ipcss = ipcss;
3996                 txr->last_hw_ipcso = ipcso;
3997         }
3998
3999         TXD->tcp_seg_setup.data = htole32(0);
4000         TXD->cmd_and_length =
4001             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
4002         tx_buffer = &txr->tx_buffers[cur];
4003         tx_buffer->m_head = NULL;
4004         tx_buffer->next_eop = -1;
4005
4006         if (++cur == adapter->num_tx_desc)
4007                 cur = 0;
4008
4009         txr->tx_avail--;
4010         txr->next_avail_desc = cur;
4011 }
4012
4013
4014 /**********************************************************************
4015  *
4016  *  Setup work for hardware segmentation offload (TSO)
4017  *
4018  **********************************************************************/
4019 static void
4020 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4021     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4022 {
4023         struct adapter                  *adapter = txr->adapter;
4024         struct e1000_context_desc       *TXD;
4025         struct em_txbuffer              *tx_buffer;
4026         int cur, hdr_len;
4027
4028         /*
4029          * In theory we can use the same TSO context if and only if
4030          * frame is the same type(IP/TCP) and the same MSS. However
4031          * checking whether a frame has the same IP/TCP structure is
4032          * hard thing so just ignore that and always restablish a
4033          * new TSO context.
4034          */
4035         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4036         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
4037                       E1000_TXD_DTYP_D |        /* Data descr type */
4038                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
4039
4040         /* IP and/or TCP header checksum calculation and insertion. */
4041         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4042
4043         cur = txr->next_avail_desc;
4044         tx_buffer = &txr->tx_buffers[cur];
4045         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4046
4047         /*
4048          * Start offset for header checksum calculation.
4049          * End offset for header checksum calculation.
4050          * Offset of place put the checksum.
4051          */
4052         TXD->lower_setup.ip_fields.ipcss = ip_off;
4053         TXD->lower_setup.ip_fields.ipcse =
4054             htole16(ip_off + (ip->ip_hl << 2) - 1);
4055         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4056         /*
4057          * Start offset for payload checksum calculation.
4058          * End offset for payload checksum calculation.
4059          * Offset of place to put the checksum.
4060          */
4061         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4062         TXD->upper_setup.tcp_fields.tucse = 0;
4063         TXD->upper_setup.tcp_fields.tucso =
4064             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4065         /*
4066          * Payload size per packet w/o any headers.
4067          * Length of all headers up to payload.
4068          */
4069         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4070         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4071
4072         TXD->cmd_and_length = htole32(adapter->txd_cmd |
4073                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
4074                                 E1000_TXD_CMD_TSE |     /* TSE context */
4075                                 E1000_TXD_CMD_IP |      /* Do IP csum */
4076                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
4077                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
4078
4079         tx_buffer->m_head = NULL;
4080         tx_buffer->next_eop = -1;
4081
4082         if (++cur == adapter->num_tx_desc)
4083                 cur = 0;
4084
4085         txr->tx_avail--;
4086         txr->next_avail_desc = cur;
4087         txr->tx_tso = TRUE;
4088 }
4089
4090
4091 /**********************************************************************
4092  *
4093  *  Examine each tx_buffer in the used queue. If the hardware is done
4094  *  processing the packet then free associated resources. The
4095  *  tx_buffer is put back on the free queue.
4096  *
4097  **********************************************************************/
4098 static void
4099 em_txeof(struct tx_ring *txr)
4100 {
4101         struct adapter  *adapter = txr->adapter;
4102         int first, last, done, processed;
4103         struct em_txbuffer *tx_buffer;
4104         struct e1000_tx_desc   *tx_desc, *eop_desc;
4105         if_t ifp = adapter->ifp;
4106
4107         EM_TX_LOCK_ASSERT(txr);
4108 #ifdef DEV_NETMAP
4109         if (netmap_tx_irq(ifp, txr->me))
4110                 return;
4111 #endif /* DEV_NETMAP */
4112
4113         /* No work, make sure hang detection is disabled */
4114         if (txr->tx_avail == adapter->num_tx_desc) {
4115                 txr->busy = EM_TX_IDLE;
4116                 return;
4117         }
4118
4119         processed = 0;
4120         first = txr->next_to_clean;
4121         tx_desc = &txr->tx_base[first];
4122         tx_buffer = &txr->tx_buffers[first];
4123         last = tx_buffer->next_eop;
4124         eop_desc = &txr->tx_base[last];
4125
4126         /*
4127          * What this does is get the index of the
4128          * first descriptor AFTER the EOP of the 
4129          * first packet, that way we can do the
4130          * simple comparison on the inner while loop.
4131          */
4132         if (++last == adapter->num_tx_desc)
4133                 last = 0;
4134         done = last;
4135
4136         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4137             BUS_DMASYNC_POSTREAD);
4138
4139         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4140                 /* We clean the range of the packet */
4141                 while (first != done) {
4142                         tx_desc->upper.data = 0;
4143                         tx_desc->lower.data = 0;
4144                         tx_desc->buffer_addr = 0;
4145                         ++txr->tx_avail;
4146                         ++processed;
4147
4148                         if (tx_buffer->m_head) {
4149                                 bus_dmamap_sync(txr->txtag,
4150                                     tx_buffer->map,
4151                                     BUS_DMASYNC_POSTWRITE);
4152                                 bus_dmamap_unload(txr->txtag,
4153                                     tx_buffer->map);
4154                                 m_freem(tx_buffer->m_head);
4155                                 tx_buffer->m_head = NULL;
4156                         }
4157                         tx_buffer->next_eop = -1;
4158
4159                         if (++first == adapter->num_tx_desc)
4160                                 first = 0;
4161
4162                         tx_buffer = &txr->tx_buffers[first];
4163                         tx_desc = &txr->tx_base[first];
4164                 }
4165                 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
4166                 /* See if we can continue to the next packet */
4167                 last = tx_buffer->next_eop;
4168                 if (last != -1) {
4169                         eop_desc = &txr->tx_base[last];
4170                         /* Get new done point */
4171                         if (++last == adapter->num_tx_desc) last = 0;
4172                         done = last;
4173                 } else
4174                         break;
4175         }
4176         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4177             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4178
4179         txr->next_to_clean = first;
4180
4181         /*
4182         ** Hang detection: we know there's work outstanding
4183         ** or the entry return would have been taken, so no
4184         ** descriptor processed here indicates a potential hang.
4185         ** The local timer will examine this and do a reset if needed.
4186         */
4187         if (processed == 0) {
4188                 if (txr->busy != EM_TX_HUNG)
4189                         ++txr->busy;
4190         } else /* At least one descriptor was cleaned */
4191                 txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4192
4193         /*
4194          * If we have a minimum free, clear IFF_DRV_OACTIVE
4195          * to tell the stack that it is OK to send packets.
4196          * Notice that all writes of OACTIVE happen under the
4197          * TX lock which, with a single queue, guarantees 
4198          * sanity.
4199          */
4200         if (txr->tx_avail >= EM_MAX_SCATTER) {
4201                 if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
4202         }
4203
4204         /* Disable hang detection if all clean */
4205         if (txr->tx_avail == adapter->num_tx_desc)
4206                 txr->busy = EM_TX_IDLE;
4207 }
4208
4209 /*********************************************************************
4210  *
4211  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4212  *
4213  **********************************************************************/
4214 static void
4215 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4216 {
4217         struct adapter          *adapter = rxr->adapter;
4218         struct mbuf             *m;
4219         bus_dma_segment_t       segs;
4220         struct em_rxbuffer      *rxbuf;
4221         int                     i, j, error, nsegs;
4222         bool                    cleaned = FALSE;
4223
4224         i = j = rxr->next_to_refresh;
4225         /*
4226         ** Get one descriptor beyond
4227         ** our work mark to control
4228         ** the loop.
4229         */
4230         if (++j == adapter->num_rx_desc)
4231                 j = 0;
4232
4233         while (j != limit) {
4234                 rxbuf = &rxr->rx_buffers[i];
4235                 if (rxbuf->m_head == NULL) {
4236                         m = m_getjcl(M_NOWAIT, MT_DATA,
4237                             M_PKTHDR, adapter->rx_mbuf_sz);
4238                         /*
4239                         ** If we have a temporary resource shortage
4240                         ** that causes a failure, just abort refresh
4241                         ** for now, we will return to this point when
4242                         ** reinvoked from em_rxeof.
4243                         */
4244                         if (m == NULL)
4245                                 goto update;
4246                 } else
4247                         m = rxbuf->m_head;
4248
4249                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4250                 m->m_flags |= M_PKTHDR;
4251                 m->m_data = m->m_ext.ext_buf;
4252
4253                 /* Use bus_dma machinery to setup the memory mapping  */
4254                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4255                     m, &segs, &nsegs, BUS_DMA_NOWAIT);
4256                 if (error != 0) {
4257                         printf("Refresh mbufs: hdr dmamap load"
4258                             " failure - %d\n", error);
4259                         m_free(m);
4260                         rxbuf->m_head = NULL;
4261                         goto update;
4262                 }
4263                 rxbuf->m_head = m;
4264                 rxbuf->paddr = segs.ds_addr;
4265                 bus_dmamap_sync(rxr->rxtag,
4266                     rxbuf->map, BUS_DMASYNC_PREREAD);
4267                 em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4268                 cleaned = TRUE;
4269
4270                 i = j; /* Next is precalulated for us */
4271                 rxr->next_to_refresh = i;
4272                 /* Calculate next controlling index */
4273                 if (++j == adapter->num_rx_desc)
4274                         j = 0;
4275         }
4276 update:
4277         /*
4278         ** Update the tail pointer only if,
4279         ** and as far as we have refreshed.
4280         */
4281         if (cleaned)
4282                 E1000_WRITE_REG(&adapter->hw,
4283                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4284
4285         return;
4286 }
4287
4288
4289 /*********************************************************************
4290  *
4291  *  Allocate memory for rx_buffer structures. Since we use one
4292  *  rx_buffer per received packet, the maximum number of rx_buffer's
4293  *  that we'll need is equal to the number of receive descriptors
4294  *  that we've allocated.
4295  *
4296  **********************************************************************/
4297 static int
4298 em_allocate_receive_buffers(struct rx_ring *rxr)
4299 {
4300         struct adapter          *adapter = rxr->adapter;
4301         device_t                dev = adapter->dev;
4302         struct em_rxbuffer      *rxbuf;
4303         int                     error;
4304
4305         rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4306             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4307         if (rxr->rx_buffers == NULL) {
4308                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4309                 return (ENOMEM);
4310         }
4311
4312         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4313                                 1, 0,                   /* alignment, bounds */
4314                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4315                                 BUS_SPACE_MAXADDR,      /* highaddr */
4316                                 NULL, NULL,             /* filter, filterarg */
4317                                 MJUM9BYTES,             /* maxsize */
4318                                 1,                      /* nsegments */
4319                                 MJUM9BYTES,             /* maxsegsize */
4320                                 0,                      /* flags */
4321                                 NULL,                   /* lockfunc */
4322                                 NULL,                   /* lockarg */
4323                                 &rxr->rxtag);
4324         if (error) {
4325                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4326                     __func__, error);
4327                 goto fail;
4328         }
4329
4330         rxbuf = rxr->rx_buffers;
4331         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4332                 rxbuf = &rxr->rx_buffers[i];
4333                 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4334                 if (error) {
4335                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4336                             __func__, error);
4337                         goto fail;
4338                 }
4339         }
4340
4341         return (0);
4342
4343 fail:
4344         em_free_receive_structures(adapter);
4345         return (error);
4346 }
4347
4348
4349 /*********************************************************************
4350  *
4351  *  Initialize a receive ring and its buffers.
4352  *
4353  **********************************************************************/
4354 static int
4355 em_setup_receive_ring(struct rx_ring *rxr)
4356 {
4357         struct  adapter         *adapter = rxr->adapter;
4358         struct em_rxbuffer      *rxbuf;
4359         bus_dma_segment_t       seg[1];
4360         int                     rsize, nsegs, error = 0;
4361 #ifdef DEV_NETMAP
4362         struct netmap_slot *slot;
4363         struct netmap_adapter *na = netmap_getna(adapter->ifp);
4364 #endif
4365
4366
4367         /* Clear the ring contents */
4368         EM_RX_LOCK(rxr);
4369         rsize = roundup2(adapter->num_rx_desc *
4370             sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4371         bzero((void *)rxr->rx_base, rsize);
4372 #ifdef DEV_NETMAP
4373         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4374 #endif
4375
4376         /*
4377         ** Free current RX buffer structs and their mbufs
4378         */
4379         for (int i = 0; i < adapter->num_rx_desc; i++) {
4380                 rxbuf = &rxr->rx_buffers[i];
4381                 if (rxbuf->m_head != NULL) {
4382                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4383                             BUS_DMASYNC_POSTREAD);
4384                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4385                         m_freem(rxbuf->m_head);
4386                         rxbuf->m_head = NULL; /* mark as freed */
4387                 }
4388         }
4389
4390         /* Now replenish the mbufs */
4391         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4392                 rxbuf = &rxr->rx_buffers[j];
4393 #ifdef DEV_NETMAP
4394                 if (slot) {
4395                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4396                         uint64_t paddr;
4397                         void *addr;
4398
4399                         addr = PNMB(na, slot + si, &paddr);
4400                         netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4401                         rxbuf->paddr = paddr;
4402                         em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4403                         continue;
4404                 }
4405 #endif /* DEV_NETMAP */
4406                 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4407                     M_PKTHDR, adapter->rx_mbuf_sz);
4408                 if (rxbuf->m_head == NULL) {
4409                         error = ENOBUFS;
4410                         goto fail;
4411                 }
4412                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4413                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4414                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4415
4416                 /* Get the memory mapping */
4417                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4418                     rxbuf->map, rxbuf->m_head, seg,
4419                     &nsegs, BUS_DMA_NOWAIT);
4420                 if (error != 0) {
4421                         m_freem(rxbuf->m_head);
4422                         rxbuf->m_head = NULL;
4423                         goto fail;
4424                 }
4425                 bus_dmamap_sync(rxr->rxtag,
4426                     rxbuf->map, BUS_DMASYNC_PREREAD);
4427
4428                 rxbuf->paddr = seg[0].ds_addr;
4429                 em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4430         }
4431         rxr->next_to_check = 0;
4432         rxr->next_to_refresh = 0;
4433         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4434             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4435
4436 fail:
4437         EM_RX_UNLOCK(rxr);
4438         return (error);
4439 }
4440
4441 /*********************************************************************
4442  *
4443  *  Initialize all receive rings.
4444  *
4445  **********************************************************************/
4446 static int
4447 em_setup_receive_structures(struct adapter *adapter)
4448 {
4449         struct rx_ring *rxr = adapter->rx_rings;
4450         int q;
4451
4452         for (q = 0; q < adapter->num_queues; q++, rxr++)
4453                 if (em_setup_receive_ring(rxr))
4454                         goto fail;
4455
4456         return (0);
4457 fail:
4458         /*
4459          * Free RX buffers allocated so far, we will only handle
4460          * the rings that completed, the failing case will have
4461          * cleaned up for itself. 'q' failed, so its the terminus.
4462          */
4463         for (int i = 0; i < q; ++i) {
4464                 rxr = &adapter->rx_rings[i];
4465                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4466                         struct em_rxbuffer *rxbuf;
4467                         rxbuf = &rxr->rx_buffers[n];
4468                         if (rxbuf->m_head != NULL) {
4469                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4470                                   BUS_DMASYNC_POSTREAD);
4471                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4472                                 m_freem(rxbuf->m_head);
4473                                 rxbuf->m_head = NULL;
4474                         }
4475                 }
4476                 rxr->next_to_check = 0;
4477                 rxr->next_to_refresh = 0;
4478         }
4479
4480         return (ENOBUFS);
4481 }
4482
4483 /*********************************************************************
4484  *
4485  *  Free all receive rings.
4486  *
4487  **********************************************************************/
4488 static void
4489 em_free_receive_structures(struct adapter *adapter)
4490 {
4491         struct rx_ring *rxr = adapter->rx_rings;
4492
4493         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4494                 em_free_receive_buffers(rxr);
4495                 /* Free the ring memory as well */
4496                 em_dma_free(adapter, &rxr->rxdma);
4497                 EM_RX_LOCK_DESTROY(rxr);
4498         }
4499
4500         free(adapter->rx_rings, M_DEVBUF);
4501 }
4502
4503
4504 /*********************************************************************
4505  *
4506  *  Free receive ring data structures
4507  *
4508  **********************************************************************/
4509 static void
4510 em_free_receive_buffers(struct rx_ring *rxr)
4511 {
4512         struct adapter          *adapter = rxr->adapter;
4513         struct em_rxbuffer      *rxbuf = NULL;
4514
4515         INIT_DEBUGOUT("free_receive_buffers: begin");
4516
4517         if (rxr->rx_buffers != NULL) {
4518                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4519                         rxbuf = &rxr->rx_buffers[i];
4520                         if (rxbuf->map != NULL) {
4521                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4522                                     BUS_DMASYNC_POSTREAD);
4523                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4524                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4525                         }
4526                         if (rxbuf->m_head != NULL) {
4527                                 m_freem(rxbuf->m_head);
4528                                 rxbuf->m_head = NULL;
4529                         }
4530                 }
4531                 free(rxr->rx_buffers, M_DEVBUF);
4532                 rxr->rx_buffers = NULL;
4533                 rxr->next_to_check = 0;
4534                 rxr->next_to_refresh = 0;
4535         }
4536
4537         if (rxr->rxtag != NULL) {
4538                 bus_dma_tag_destroy(rxr->rxtag);
4539                 rxr->rxtag = NULL;
4540         }
4541
4542         return;
4543 }
4544
4545
4546 /*********************************************************************
4547  *
4548  *  Enable receive unit.
4549  *
4550  **********************************************************************/
4551
4552 static void
4553 em_initialize_receive_unit(struct adapter *adapter)
4554 {
4555         struct rx_ring *rxr = adapter->rx_rings;
4556         if_t ifp = adapter->ifp;
4557         struct e1000_hw *hw = &adapter->hw;
4558         u32     rctl, rxcsum, rfctl;
4559
4560         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4561
4562         /*
4563          * Make sure receives are disabled while setting
4564          * up the descriptor ring
4565          */
4566         rctl = E1000_READ_REG(hw, E1000_RCTL);
4567         /* Do not disable if ever enabled on this hardware */
4568         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4569                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4570
4571         /* Setup the Receive Control Register */
4572         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4573         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4574             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4575             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4576
4577         /* Do not store bad packets */
4578         rctl &= ~E1000_RCTL_SBP;
4579
4580         /* Enable Long Packet receive */
4581         if (if_getmtu(ifp) > ETHERMTU)
4582                 rctl |= E1000_RCTL_LPE;
4583         else
4584                 rctl &= ~E1000_RCTL_LPE;
4585
4586         /* Strip the CRC */
4587         if (!em_disable_crc_stripping)
4588                 rctl |= E1000_RCTL_SECRC;
4589
4590         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4591             adapter->rx_abs_int_delay.value);
4592
4593         E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4594             adapter->rx_int_delay.value);
4595         /*
4596          * Set the interrupt throttling rate. Value is calculated
4597          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4598          */
4599         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4600
4601         /* Use extended rx descriptor formats */
4602         rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4603         rfctl |= E1000_RFCTL_EXTEN;
4604         /*
4605         ** When using MSIX interrupts we need to throttle
4606         ** using the EITR register (82574 only)
4607         */
4608         if (hw->mac.type == e1000_82574) {
4609                 for (int i = 0; i < 4; i++)
4610                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4611                             DEFAULT_ITR);
4612                 /* Disable accelerated acknowledge */
4613                 rfctl |= E1000_RFCTL_ACK_DIS;
4614         }
4615         E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4616
4617         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4618         if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
4619 #ifdef EM_MULTIQUEUE
4620                 rxcsum |= E1000_RXCSUM_TUOFL |
4621                           E1000_RXCSUM_IPOFL |
4622                           E1000_RXCSUM_PCSD;
4623 #else
4624                 rxcsum |= E1000_RXCSUM_TUOFL;
4625 #endif
4626         } else
4627                 rxcsum &= ~E1000_RXCSUM_TUOFL;
4628
4629         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4630
4631 #ifdef EM_MULTIQUEUE
4632 #define RSSKEYLEN 10
4633         if (adapter->num_queues > 1) {
4634                 uint8_t  rss_key[4 * RSSKEYLEN];
4635                 uint32_t reta = 0;
4636                 int i;
4637
4638                 /*
4639                 * Configure RSS key
4640                 */
4641                 arc4rand(rss_key, sizeof(rss_key), 0);
4642                 for (i = 0; i < RSSKEYLEN; ++i) {
4643                         uint32_t rssrk = 0;
4644
4645                         rssrk = EM_RSSRK_VAL(rss_key, i);
4646                         E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4647                 }
4648
4649                 /*
4650                 * Configure RSS redirect table in following fashion:
4651                 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4652                 */
4653                 for (i = 0; i < sizeof(reta); ++i) {
4654                         uint32_t q;
4655
4656                         q = (i % adapter->num_queues) << 7;
4657                         reta |= q << (8 * i);
4658                 }
4659
4660                 for (i = 0; i < 32; ++i) {
4661                         E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4662                 }
4663
4664                 E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | 
4665                                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
4666                                 E1000_MRQC_RSS_FIELD_IPV4 |
4667                                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4668                                 E1000_MRQC_RSS_FIELD_IPV6_EX |
4669                                 E1000_MRQC_RSS_FIELD_IPV6);
4670         }
4671 #endif
4672         /*
4673         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4674         ** long latencies are observed, like Lenovo X60. This
4675         ** change eliminates the problem, but since having positive
4676         ** values in RDTR is a known source of problems on other
4677         ** platforms another solution is being sought.
4678         */
4679         if (hw->mac.type == e1000_82573)
4680                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4681
4682         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4683                 /* Setup the Base and Length of the Rx Descriptor Ring */
4684                 u64 bus_addr = rxr->rxdma.dma_paddr;
4685                 u32 rdt = adapter->num_rx_desc - 1; /* default */
4686
4687                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4688                     adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4689                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4690                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4691                 /* Setup the Head and Tail Descriptor Pointers */
4692                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4693 #ifdef DEV_NETMAP
4694                 /*
4695                  * an init() while a netmap client is active must
4696                  * preserve the rx buffers passed to userspace.
4697                  */
4698                 if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4699                         struct netmap_adapter *na = netmap_getna(adapter->ifp);
4700                         rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4701                 }
4702 #endif /* DEV_NETMAP */
4703                 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4704         }
4705
4706         /*
4707          * Set PTHRESH for improved jumbo performance
4708          * According to 10.2.5.11 of Intel 82574 Datasheet,
4709          * RXDCTL(1) is written whenever RXDCTL(0) is written.
4710          * Only write to RXDCTL(1) if there is a need for different
4711          * settings.
4712          */
4713         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4714             (adapter->hw.mac.type == e1000_pch2lan) ||
4715             (adapter->hw.mac.type == e1000_ich10lan)) &&
4716             (if_getmtu(ifp) > ETHERMTU)) {
4717                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4718                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4719         } else if (adapter->hw.mac.type == e1000_82574) {
4720                 for (int i = 0; i < adapter->num_queues; i++) {
4721                         u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4722
4723                         rxdctl |= 0x20; /* PTHRESH */
4724                         rxdctl |= 4 << 8; /* HTHRESH */
4725                         rxdctl |= 4 << 16;/* WTHRESH */
4726                         rxdctl |= 1 << 24; /* Switch to granularity */
4727                         E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4728                 }
4729         }
4730                 
4731         if (adapter->hw.mac.type >= e1000_pch2lan) {
4732                 if (if_getmtu(ifp) > ETHERMTU)
4733                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4734                 else
4735                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4736         }
4737
4738         /* Make sure VLAN Filters are off */
4739         rctl &= ~E1000_RCTL_VFE;
4740
4741         if (adapter->rx_mbuf_sz == MCLBYTES)
4742                 rctl |= E1000_RCTL_SZ_2048;
4743         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4744                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4745         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4746                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4747
4748         /* ensure we clear use DTYPE of 00 here */
4749         rctl &= ~0x00000C00;
4750         /* Write out the settings */
4751         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4752
4753         return;
4754 }
4755
4756
4757 /*********************************************************************
4758  *
4759  *  This routine executes in interrupt context. It replenishes
4760  *  the mbufs in the descriptor and sends data which has been
4761  *  dma'ed into host memory to upper layer.
4762  *
4763  *  We loop at most count times if count is > 0, or until done if
4764  *  count < 0.
4765  *  
4766  *  For polling we also now return the number of cleaned packets
4767  *********************************************************************/
4768 static bool
4769 em_rxeof(struct rx_ring *rxr, int count, int *done)
4770 {
4771         struct adapter          *adapter = rxr->adapter;
4772         if_t ifp = adapter->ifp;
4773         struct mbuf             *mp, *sendmp;
4774         u32                     status = 0;
4775         u16                     len;
4776         int                     i, processed, rxdone = 0;
4777         bool                    eop;
4778         union e1000_rx_desc_extended    *cur;
4779
4780         EM_RX_LOCK(rxr);
4781
4782         /* Sync the ring */
4783         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4784             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4785
4786
4787 #ifdef DEV_NETMAP
4788         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4789                 EM_RX_UNLOCK(rxr);
4790                 return (FALSE);
4791         }
4792 #endif /* DEV_NETMAP */
4793
4794         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4795                 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4796                         break;
4797
4798                 cur = &rxr->rx_base[i];
4799                 status = le32toh(cur->wb.upper.status_error);
4800                 mp = sendmp = NULL;
4801
4802                 if ((status & E1000_RXD_STAT_DD) == 0)
4803                         break;
4804
4805                 len = le16toh(cur->wb.upper.length);
4806                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4807
4808                 if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4809                     (rxr->discard == TRUE)) {
4810                         adapter->dropped_pkts++;
4811                         ++rxr->rx_discarded;
4812                         if (!eop) /* Catch subsequent segs */
4813                                 rxr->discard = TRUE;
4814                         else
4815                                 rxr->discard = FALSE;
4816                         em_rx_discard(rxr, i);
4817                         goto next_desc;
4818                 }
4819                 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4820
4821                 /* Assign correct length to the current fragment */
4822                 mp = rxr->rx_buffers[i].m_head;
4823                 mp->m_len = len;
4824
4825                 /* Trigger for refresh */
4826                 rxr->rx_buffers[i].m_head = NULL;
4827
4828                 /* First segment? */
4829                 if (rxr->fmp == NULL) {
4830                         mp->m_pkthdr.len = len;
4831                         rxr->fmp = rxr->lmp = mp;
4832                 } else {
4833                         /* Chain mbuf's together */
4834                         mp->m_flags &= ~M_PKTHDR;
4835                         rxr->lmp->m_next = mp;
4836                         rxr->lmp = mp;
4837                         rxr->fmp->m_pkthdr.len += len;
4838                 }
4839
4840                 if (eop) {
4841                         --count;
4842                         sendmp = rxr->fmp;
4843                         if_setrcvif(sendmp, ifp);
4844                         if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4845                         em_receive_checksum(status, sendmp);
4846 #ifndef __NO_STRICT_ALIGNMENT
4847                         if (adapter->hw.mac.max_frame_size >
4848                             (MCLBYTES - ETHER_ALIGN) &&
4849                             em_fixup_rx(rxr) != 0)
4850                                 goto skip;
4851 #endif
4852                         if (status & E1000_RXD_STAT_VP) {
4853                                 if_setvtag(sendmp, 
4854                                     le16toh(cur->wb.upper.vlan));
4855                                 sendmp->m_flags |= M_VLANTAG;
4856                         }
4857 #ifndef __NO_STRICT_ALIGNMENT
4858 skip:
4859 #endif
4860                         rxr->fmp = rxr->lmp = NULL;
4861                 }
4862 next_desc:
4863                 /* Sync the ring */
4864                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4865                         BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4866
4867                 /* Zero out the receive descriptors status. */
4868                 cur->wb.upper.status_error &= htole32(~0xFF);
4869                 ++rxdone;       /* cumulative for POLL */
4870                 ++processed;
4871
4872                 /* Advance our pointers to the next descriptor. */
4873                 if (++i == adapter->num_rx_desc)
4874                         i = 0;
4875
4876                 /* Send to the stack */
4877                 if (sendmp != NULL) {
4878                         rxr->next_to_check = i;
4879                         EM_RX_UNLOCK(rxr);
4880                         if_input(ifp, sendmp);
4881                         EM_RX_LOCK(rxr);
4882                         i = rxr->next_to_check;
4883                 }
4884
4885                 /* Only refresh mbufs every 8 descriptors */
4886                 if (processed == 8) {
4887                         em_refresh_mbufs(rxr, i);
4888                         processed = 0;
4889                 }
4890         }
4891
4892         /* Catch any remaining refresh work */
4893         if (e1000_rx_unrefreshed(rxr))
4894                 em_refresh_mbufs(rxr, i);
4895
4896         rxr->next_to_check = i;
4897         if (done != NULL)
4898                 *done = rxdone;
4899         EM_RX_UNLOCK(rxr);
4900
4901         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4902 }
4903
4904 static __inline void
4905 em_rx_discard(struct rx_ring *rxr, int i)
4906 {
4907         struct em_rxbuffer      *rbuf;
4908
4909         rbuf = &rxr->rx_buffers[i];
4910         bus_dmamap_unload(rxr->rxtag, rbuf->map);
4911
4912         /* Free any previous pieces */
4913         if (rxr->fmp != NULL) {
4914                 rxr->fmp->m_flags |= M_PKTHDR;
4915                 m_freem(rxr->fmp);
4916                 rxr->fmp = NULL;
4917                 rxr->lmp = NULL;
4918         }
4919         /*
4920         ** Free buffer and allow em_refresh_mbufs()
4921         ** to clean up and recharge buffer.
4922         */
4923         if (rbuf->m_head) {
4924                 m_free(rbuf->m_head);
4925                 rbuf->m_head = NULL;
4926         }
4927         return;
4928 }
4929
4930 #ifndef __NO_STRICT_ALIGNMENT
4931 /*
4932  * When jumbo frames are enabled we should realign entire payload on
4933  * architecures with strict alignment. This is serious design mistake of 8254x
4934  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4935  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4936  * payload. On architecures without strict alignment restrictions 8254x still
4937  * performs unaligned memory access which would reduce the performance too.
4938  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4939  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4940  * existing mbuf chain.
4941  *
4942  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4943  * not used at all on architectures with strict alignment.
4944  */
4945 static int
4946 em_fixup_rx(struct rx_ring *rxr)
4947 {
4948         struct adapter *adapter = rxr->adapter;
4949         struct mbuf *m, *n;
4950         int error;
4951
4952         error = 0;
4953         m = rxr->fmp;
4954         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4955                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4956                 m->m_data += ETHER_HDR_LEN;
4957         } else {
4958                 MGETHDR(n, M_NOWAIT, MT_DATA);
4959                 if (n != NULL) {
4960                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4961                         m->m_data += ETHER_HDR_LEN;
4962                         m->m_len -= ETHER_HDR_LEN;
4963                         n->m_len = ETHER_HDR_LEN;
4964                         M_MOVE_PKTHDR(n, m);
4965                         n->m_next = m;
4966                         rxr->fmp = n;
4967                 } else {
4968                         adapter->dropped_pkts++;
4969                         m_freem(rxr->fmp);
4970                         rxr->fmp = NULL;
4971                         error = ENOMEM;
4972                 }
4973         }
4974
4975         return (error);
4976 }
4977 #endif
4978
4979 static void
4980 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
4981 {
4982         rxd->read.buffer_addr = htole64(rxbuf->paddr);
4983         /* DD bits must be cleared */
4984         rxd->wb.upper.status_error= 0;
4985 }
4986
4987 /*********************************************************************
4988  *
4989  *  Verify that the hardware indicated that the checksum is valid.
4990  *  Inform the stack about the status of checksum so that stack
4991  *  doesn't spend time verifying the checksum.
4992  *
4993  *********************************************************************/
4994 static void
4995 em_receive_checksum(uint32_t status, struct mbuf *mp)
4996 {
4997         mp->m_pkthdr.csum_flags = 0;
4998
4999         /* Ignore Checksum bit is set */
5000         if (status & E1000_RXD_STAT_IXSM)
5001                 return;
5002
5003         /* If the IP checksum exists and there is no IP Checksum error */
5004         if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
5005                 E1000_RXD_STAT_IPCS) {
5006                 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
5007         }
5008
5009         /* TCP or UDP checksum */
5010         if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5011             E1000_RXD_STAT_TCPCS) {
5012                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5013                 mp->m_pkthdr.csum_data = htons(0xffff);
5014         }
5015         if (status & E1000_RXD_STAT_UDPCS) {
5016                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5017                 mp->m_pkthdr.csum_data = htons(0xffff);
5018         }
5019 }
5020
5021 /*
5022  * This routine is run via an vlan
5023  * config EVENT
5024  */
5025 static void
5026 em_register_vlan(void *arg, if_t ifp, u16 vtag)
5027 {
5028         struct adapter  *adapter = if_getsoftc(ifp);
5029         u32             index, bit;
5030
5031         if ((void*)adapter !=  arg)   /* Not our event */
5032                 return;
5033
5034         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
5035                 return;
5036
5037         EM_CORE_LOCK(adapter);
5038         index = (vtag >> 5) & 0x7F;
5039         bit = vtag & 0x1F;
5040         adapter->shadow_vfta[index] |= (1 << bit);
5041         ++adapter->num_vlans;
5042         /* Re-init to load the changes */
5043         if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5044                 em_init_locked(adapter);
5045         EM_CORE_UNLOCK(adapter);
5046 }
5047
5048 /*
5049  * This routine is run via an vlan
5050  * unconfig EVENT
5051  */
5052 static void
5053 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
5054 {
5055         struct adapter  *adapter = if_getsoftc(ifp);
5056         u32             index, bit;
5057
5058         if (adapter != arg)
5059                 return;
5060
5061         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5062                 return;
5063
5064         EM_CORE_LOCK(adapter);
5065         index = (vtag >> 5) & 0x7F;
5066         bit = vtag & 0x1F;
5067         adapter->shadow_vfta[index] &= ~(1 << bit);
5068         --adapter->num_vlans;
5069         /* Re-init to load the changes */
5070         if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5071                 em_init_locked(adapter);
5072         EM_CORE_UNLOCK(adapter);
5073 }
5074
5075 static void
5076 em_setup_vlan_hw_support(struct adapter *adapter)
5077 {
5078         struct e1000_hw *hw = &adapter->hw;
5079         u32             reg;
5080
5081         /*
5082         ** We get here thru init_locked, meaning
5083         ** a soft reset, this has already cleared
5084         ** the VFTA and other state, so if there
5085         ** have been no vlan's registered do nothing.
5086         */
5087         if (adapter->num_vlans == 0)
5088                 return;
5089
5090         /*
5091         ** A soft reset zero's out the VFTA, so
5092         ** we need to repopulate it now.
5093         */
5094         for (int i = 0; i < EM_VFTA_SIZE; i++)
5095                 if (adapter->shadow_vfta[i] != 0)
5096                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5097                             i, adapter->shadow_vfta[i]);
5098
5099         reg = E1000_READ_REG(hw, E1000_CTRL);
5100         reg |= E1000_CTRL_VME;
5101         E1000_WRITE_REG(hw, E1000_CTRL, reg);
5102
5103         /* Enable the Filter Table */
5104         reg = E1000_READ_REG(hw, E1000_RCTL);
5105         reg &= ~E1000_RCTL_CFIEN;
5106         reg |= E1000_RCTL_VFE;
5107         E1000_WRITE_REG(hw, E1000_RCTL, reg);
5108 }
5109
5110 static void
5111 em_enable_intr(struct adapter *adapter)
5112 {
5113         struct e1000_hw *hw = &adapter->hw;
5114         u32 ims_mask = IMS_ENABLE_MASK;
5115
5116         if (hw->mac.type == e1000_82574) {
5117                 E1000_WRITE_REG(hw, EM_EIAC, adapter->ims);
5118                 ims_mask |= adapter->ims;
5119         } 
5120         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5121 }
5122
5123 static void
5124 em_disable_intr(struct adapter *adapter)
5125 {
5126         struct e1000_hw *hw = &adapter->hw;
5127
5128         if (hw->mac.type == e1000_82574)
5129                 E1000_WRITE_REG(hw, EM_EIAC, 0);
5130         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5131 }
5132
5133 /*
5134  * Bit of a misnomer, what this really means is
5135  * to enable OS management of the system... aka
5136  * to disable special hardware management features 
5137  */
5138 static void
5139 em_init_manageability(struct adapter *adapter)
5140 {
5141         /* A shared code workaround */
5142 #define E1000_82542_MANC2H E1000_MANC2H
5143         if (adapter->has_manage) {
5144                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5145                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5146
5147                 /* disable hardware interception of ARP */
5148                 manc &= ~(E1000_MANC_ARP_EN);
5149
5150                 /* enable receiving management packets to the host */
5151                 manc |= E1000_MANC_EN_MNG2HOST;
5152 #define E1000_MNG2HOST_PORT_623 (1 << 5)
5153 #define E1000_MNG2HOST_PORT_664 (1 << 6)
5154                 manc2h |= E1000_MNG2HOST_PORT_623;
5155                 manc2h |= E1000_MNG2HOST_PORT_664;
5156                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5157                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5158         }
5159 }
5160
5161 /*
5162  * Give control back to hardware management
5163  * controller if there is one.
5164  */
5165 static void
5166 em_release_manageability(struct adapter *adapter)
5167 {
5168         if (adapter->has_manage) {
5169                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5170
5171                 /* re-enable hardware interception of ARP */
5172                 manc |= E1000_MANC_ARP_EN;
5173                 manc &= ~E1000_MANC_EN_MNG2HOST;
5174
5175                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5176         }
5177 }
5178
5179 /*
5180  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5181  * For ASF and Pass Through versions of f/w this means
5182  * that the driver is loaded. For AMT version type f/w
5183  * this means that the network i/f is open.
5184  */
5185 static void
5186 em_get_hw_control(struct adapter *adapter)
5187 {
5188         u32 ctrl_ext, swsm;
5189
5190         if (adapter->hw.mac.type == e1000_82573) {
5191                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5192                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5193                     swsm | E1000_SWSM_DRV_LOAD);
5194                 return;
5195         }
5196         /* else */
5197         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5198         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5199             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5200         return;
5201 }
5202
5203 /*
5204  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5205  * For ASF and Pass Through versions of f/w this means that
5206  * the driver is no longer loaded. For AMT versions of the
5207  * f/w this means that the network i/f is closed.
5208  */
5209 static void
5210 em_release_hw_control(struct adapter *adapter)
5211 {
5212         u32 ctrl_ext, swsm;
5213
5214         if (!adapter->has_manage)
5215                 return;
5216
5217         if (adapter->hw.mac.type == e1000_82573) {
5218                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5219                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5220                     swsm & ~E1000_SWSM_DRV_LOAD);
5221                 return;
5222         }
5223         /* else */
5224         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5225         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5226             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5227         return;
5228 }
5229
5230 static int
5231 em_is_valid_ether_addr(u8 *addr)
5232 {
5233         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5234
5235         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5236                 return (FALSE);
5237         }
5238
5239         return (TRUE);
5240 }
5241
5242 /*
5243 ** Parse the interface capabilities with regard
5244 ** to both system management and wake-on-lan for
5245 ** later use.
5246 */
5247 static void
5248 em_get_wakeup(device_t dev)
5249 {
5250         struct adapter  *adapter = device_get_softc(dev);
5251         u16             eeprom_data = 0, device_id, apme_mask;
5252
5253         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5254         apme_mask = EM_EEPROM_APME;
5255
5256         switch (adapter->hw.mac.type) {
5257         case e1000_82573:
5258         case e1000_82583:
5259                 adapter->has_amt = TRUE;
5260                 /* Falls thru */
5261         case e1000_82571:
5262         case e1000_82572:
5263         case e1000_80003es2lan:
5264                 if (adapter->hw.bus.func == 1) {
5265                         e1000_read_nvm(&adapter->hw,
5266                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5267                         break;
5268                 } else
5269                         e1000_read_nvm(&adapter->hw,
5270                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5271                 break;
5272         case e1000_ich8lan:
5273         case e1000_ich9lan:
5274         case e1000_ich10lan:
5275         case e1000_pchlan:
5276         case e1000_pch2lan:
5277         case e1000_pch_lpt:
5278         case e1000_pch_spt:
5279                 apme_mask = E1000_WUC_APME;
5280                 adapter->has_amt = TRUE;
5281                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5282                 break;
5283         default:
5284                 e1000_read_nvm(&adapter->hw,
5285                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5286                 break;
5287         }
5288         if (eeprom_data & apme_mask)
5289                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5290         /*
5291          * We have the eeprom settings, now apply the special cases
5292          * where the eeprom may be wrong or the board won't support
5293          * wake on lan on a particular port
5294          */
5295         device_id = pci_get_device(dev);
5296         switch (device_id) {
5297         case E1000_DEV_ID_82571EB_FIBER:
5298                 /* Wake events only supported on port A for dual fiber
5299                  * regardless of eeprom setting */
5300                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5301                     E1000_STATUS_FUNC_1)
5302                         adapter->wol = 0;
5303                 break;
5304         case E1000_DEV_ID_82571EB_QUAD_COPPER:
5305         case E1000_DEV_ID_82571EB_QUAD_FIBER:
5306         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5307                 /* if quad port adapter, disable WoL on all but port A */
5308                 if (global_quad_port_a != 0)
5309                         adapter->wol = 0;
5310                 /* Reset for multiple quad port adapters */
5311                 if (++global_quad_port_a == 4)
5312                         global_quad_port_a = 0;
5313                 break;
5314         }
5315         return;
5316 }
5317
5318
5319 /*
5320  * Enable PCI Wake On Lan capability
5321  */
5322 static void
5323 em_enable_wakeup(device_t dev)
5324 {
5325         struct adapter  *adapter = device_get_softc(dev);
5326         if_t ifp = adapter->ifp;
5327         u32             pmc, ctrl, ctrl_ext, rctl, wuc;
5328         u16             status;
5329
5330         if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
5331                 return;
5332
5333         /* Advertise the wakeup capability */
5334         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5335         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5336         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5337         wuc = E1000_READ_REG(&adapter->hw, E1000_WUC);
5338         wuc |= E1000_WUC_PME_EN;
5339         E1000_WRITE_REG(&adapter->hw, E1000_WUC, wuc);
5340
5341         if ((adapter->hw.mac.type == e1000_ich8lan) ||
5342             (adapter->hw.mac.type == e1000_pchlan) ||
5343             (adapter->hw.mac.type == e1000_ich9lan) ||
5344             (adapter->hw.mac.type == e1000_ich10lan))
5345                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
5346
5347         /* Keep the laser running on Fiber adapters */
5348         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5349             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5350                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5351                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5352                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5353         }
5354
5355         /*
5356         ** Determine type of Wakeup: note that wol
5357         ** is set with all bits on by default.
5358         */
5359         if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
5360                 adapter->wol &= ~E1000_WUFC_MAG;
5361
5362         if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
5363                 adapter->wol &= ~E1000_WUFC_MC;
5364         else {
5365                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5366                 rctl |= E1000_RCTL_MPE;
5367                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5368         }
5369
5370         if ((adapter->hw.mac.type == e1000_pchlan)  ||
5371             (adapter->hw.mac.type == e1000_pch2lan) ||
5372             (adapter->hw.mac.type == e1000_pch_lpt) ||
5373             (adapter->hw.mac.type == e1000_pch_spt)) {
5374                 if (em_enable_phy_wakeup(adapter))
5375                         return;
5376         } else {
5377                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5378                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5379         }
5380
5381         if (adapter->hw.phy.type == e1000_phy_igp_3)
5382                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5383
5384         /* Request PME */
5385         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5386         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5387         if (if_getcapenable(ifp) & IFCAP_WOL)
5388                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5389         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5390
5391         return;
5392 }
5393
5394 /*
5395 ** WOL in the newer chipset interfaces (pchlan)
5396 ** require thing to be copied into the phy
5397 */
5398 static int
5399 em_enable_phy_wakeup(struct adapter *adapter)
5400 {
5401         struct e1000_hw *hw = &adapter->hw;
5402         u32 mreg, ret = 0;
5403         u16 preg;
5404
5405         /* copy MAC RARs to PHY RARs */
5406         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5407
5408         /* copy MAC MTA to PHY MTA */
5409         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5410                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5411                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5412                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5413                     (u16)((mreg >> 16) & 0xFFFF));
5414         }
5415
5416         /* configure PHY Rx Control register */
5417         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5418         mreg = E1000_READ_REG(hw, E1000_RCTL);
5419         if (mreg & E1000_RCTL_UPE)
5420                 preg |= BM_RCTL_UPE;
5421         if (mreg & E1000_RCTL_MPE)
5422                 preg |= BM_RCTL_MPE;
5423         preg &= ~(BM_RCTL_MO_MASK);
5424         if (mreg & E1000_RCTL_MO_3)
5425                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5426                                 << BM_RCTL_MO_SHIFT);
5427         if (mreg & E1000_RCTL_BAM)
5428                 preg |= BM_RCTL_BAM;
5429         if (mreg & E1000_RCTL_PMCF)
5430                 preg |= BM_RCTL_PMCF;
5431         mreg = E1000_READ_REG(hw, E1000_CTRL);
5432         if (mreg & E1000_CTRL_RFCE)
5433                 preg |= BM_RCTL_RFCE;
5434         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5435
5436         /* enable PHY wakeup in MAC register */
5437         E1000_WRITE_REG(hw, E1000_WUC,
5438             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5439         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5440
5441         /* configure and enable PHY wakeup in PHY registers */
5442         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5443         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5444
5445         /* activate PHY wakeup */
5446         ret = hw->phy.ops.acquire(hw);
5447         if (ret) {
5448                 printf("Could not acquire PHY\n");
5449                 return ret;
5450         }
5451         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5452                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5453         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5454         if (ret) {
5455                 printf("Could not read PHY page 769\n");
5456                 goto out;
5457         }
5458         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5459         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5460         if (ret)
5461                 printf("Could not set PHY Host Wakeup bit\n");
5462 out:
5463         hw->phy.ops.release(hw);
5464
5465         return ret;
5466 }
5467
5468 static void
5469 em_led_func(void *arg, int onoff)
5470 {
5471         struct adapter  *adapter = arg;
5472  
5473         EM_CORE_LOCK(adapter);
5474         if (onoff) {
5475                 e1000_setup_led(&adapter->hw);
5476                 e1000_led_on(&adapter->hw);
5477         } else {
5478                 e1000_led_off(&adapter->hw);
5479                 e1000_cleanup_led(&adapter->hw);
5480         }
5481         EM_CORE_UNLOCK(adapter);
5482 }
5483
5484 /*
5485 ** Disable the L0S and L1 LINK states
5486 */
5487 static void
5488 em_disable_aspm(struct adapter *adapter)
5489 {
5490         int             base, reg;
5491         u16             link_cap,link_ctrl;
5492         device_t        dev = adapter->dev;
5493
5494         switch (adapter->hw.mac.type) {
5495                 case e1000_82573:
5496                 case e1000_82574:
5497                 case e1000_82583:
5498                         break;
5499                 default:
5500                         return;
5501         }
5502         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5503                 return;
5504         reg = base + PCIER_LINK_CAP;
5505         link_cap = pci_read_config(dev, reg, 2);
5506         if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5507                 return;
5508         reg = base + PCIER_LINK_CTL;
5509         link_ctrl = pci_read_config(dev, reg, 2);
5510         link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5511         pci_write_config(dev, reg, link_ctrl, 2);
5512         return;
5513 }
5514
5515 /**********************************************************************
5516  *
5517  *  Update the board statistics counters.
5518  *
5519  **********************************************************************/
5520 static void
5521 em_update_stats_counters(struct adapter *adapter)
5522 {
5523
5524         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5525            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5526                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5527                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5528         }
5529         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5530         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5531         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5532         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5533
5534         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5535         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5536         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5537         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5538         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5539         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5540         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5541         adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5542         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5543         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5544         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5545         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5546         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5547         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5548         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5549         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5550         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5551         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5552         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5553         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5554
5555         /* For the 64-bit byte counters the low dword must be read first. */
5556         /* Both registers clear on the read of the high dword */
5557
5558         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5559             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5560         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5561             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5562
5563         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5564         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5565         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5566         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5567         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5568
5569         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5570         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5571
5572         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5573         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5574         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5575         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5576         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5577         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5578         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5579         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5580         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5581         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5582
5583         /* Interrupt Counts */
5584
5585         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5586         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5587         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5588         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5589         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5590         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5591         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5592         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5593         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5594
5595         if (adapter->hw.mac.type >= e1000_82543) {
5596                 adapter->stats.algnerrc += 
5597                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5598                 adapter->stats.rxerrc += 
5599                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5600                 adapter->stats.tncrs += 
5601                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5602                 adapter->stats.cexterr += 
5603                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5604                 adapter->stats.tsctc += 
5605                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5606                 adapter->stats.tsctfc += 
5607                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5608         }
5609 }
5610
5611 static uint64_t
5612 em_get_counter(if_t ifp, ift_counter cnt)
5613 {
5614         struct adapter *adapter;
5615
5616         adapter = if_getsoftc(ifp);
5617
5618         switch (cnt) {
5619         case IFCOUNTER_COLLISIONS:
5620                 return (adapter->stats.colc);
5621         case IFCOUNTER_IERRORS:
5622                 return (adapter->dropped_pkts + adapter->stats.rxerrc +
5623                     adapter->stats.crcerrs + adapter->stats.algnerrc +
5624                     adapter->stats.ruc + adapter->stats.roc +
5625                     adapter->stats.mpc + adapter->stats.cexterr);
5626         case IFCOUNTER_OERRORS:
5627                 return (adapter->stats.ecol + adapter->stats.latecol +
5628                     adapter->watchdog_events);
5629         default:
5630                 return (if_get_counter_default(ifp, cnt));
5631         }
5632 }
5633
5634 /* Export a single 32-bit register via a read-only sysctl. */
5635 static int
5636 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5637 {
5638         struct adapter *adapter;
5639         u_int val;
5640
5641         adapter = oidp->oid_arg1;
5642         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5643         return (sysctl_handle_int(oidp, &val, 0, req));
5644 }
5645
5646 /*
5647  * Add sysctl variables, one per statistic, to the system.
5648  */
5649 static void
5650 em_add_hw_stats(struct adapter *adapter)
5651 {
5652         device_t dev = adapter->dev;
5653
5654         struct tx_ring *txr = adapter->tx_rings;
5655         struct rx_ring *rxr = adapter->rx_rings;
5656
5657         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5658         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5659         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5660         struct e1000_hw_stats *stats = &adapter->stats;
5661
5662         struct sysctl_oid *stat_node, *queue_node, *int_node;
5663         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5664
5665 #define QUEUE_NAME_LEN 32
5666         char namebuf[QUEUE_NAME_LEN];
5667         
5668         /* Driver Statistics */
5669         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5670                         CTLFLAG_RD, &adapter->dropped_pkts,
5671                         "Driver dropped packets");
5672         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5673                         CTLFLAG_RD, &adapter->link_irq,
5674                         "Link MSIX IRQ Handled");
5675         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", 
5676                          CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5677                          "Defragmenting mbuf chain failed");
5678         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5679                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5680                         "Driver tx dma failure in xmit");
5681         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5682                         CTLFLAG_RD, &adapter->rx_overruns,
5683                         "RX overruns");
5684         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5685                         CTLFLAG_RD, &adapter->watchdog_events,
5686                         "Watchdog timeouts");
5687         
5688         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5689                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5690                         em_sysctl_reg_handler, "IU",
5691                         "Device Control Register");
5692         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5693                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5694                         em_sysctl_reg_handler, "IU",
5695                         "Receiver Control Register");
5696         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5697                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5698                         "Flow Control High Watermark");
5699         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5700                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5701                         "Flow Control Low Watermark");
5702
5703         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5704                 snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5705                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5706                                             CTLFLAG_RD, NULL, "TX Queue Name");
5707                 queue_list = SYSCTL_CHILDREN(queue_node);
5708
5709                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5710                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5711                                 E1000_TDH(txr->me),
5712                                 em_sysctl_reg_handler, "IU",
5713                                 "Transmit Descriptor Head");
5714                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5715                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5716                                 E1000_TDT(txr->me),
5717                                 em_sysctl_reg_handler, "IU",
5718                                 "Transmit Descriptor Tail");
5719                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5720                                 CTLFLAG_RD, &txr->tx_irq,
5721                                 "Queue MSI-X Transmit Interrupts");
5722                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5723                                 CTLFLAG_RD, &txr->no_desc_avail,
5724                                 "Queue No Descriptor Available");
5725
5726                 snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5727                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5728                                             CTLFLAG_RD, NULL, "RX Queue Name");
5729                 queue_list = SYSCTL_CHILDREN(queue_node);
5730
5731                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5732                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5733                                 E1000_RDH(rxr->me),
5734                                 em_sysctl_reg_handler, "IU",
5735                                 "Receive Descriptor Head");
5736                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5737                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5738                                 E1000_RDT(rxr->me),
5739                                 em_sysctl_reg_handler, "IU",
5740                                 "Receive Descriptor Tail");
5741                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5742                                 CTLFLAG_RD, &rxr->rx_irq,
5743                                 "Queue MSI-X Receive Interrupts");
5744         }
5745
5746         /* MAC stats get their own sub node */
5747
5748         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5749                                     CTLFLAG_RD, NULL, "Statistics");
5750         stat_list = SYSCTL_CHILDREN(stat_node);
5751
5752         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5753                         CTLFLAG_RD, &stats->ecol,
5754                         "Excessive collisions");
5755         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5756                         CTLFLAG_RD, &stats->scc,
5757                         "Single collisions");
5758         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5759                         CTLFLAG_RD, &stats->mcc,
5760                         "Multiple collisions");
5761         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5762                         CTLFLAG_RD, &stats->latecol,
5763                         "Late collisions");
5764         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5765                         CTLFLAG_RD, &stats->colc,
5766                         "Collision Count");
5767         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5768                         CTLFLAG_RD, &adapter->stats.symerrs,
5769                         "Symbol Errors");
5770         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5771                         CTLFLAG_RD, &adapter->stats.sec,
5772                         "Sequence Errors");
5773         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5774                         CTLFLAG_RD, &adapter->stats.dc,
5775                         "Defer Count");
5776         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5777                         CTLFLAG_RD, &adapter->stats.mpc,
5778                         "Missed Packets");
5779         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5780                         CTLFLAG_RD, &adapter->stats.rnbc,
5781                         "Receive No Buffers");
5782         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5783                         CTLFLAG_RD, &adapter->stats.ruc,
5784                         "Receive Undersize");
5785         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5786                         CTLFLAG_RD, &adapter->stats.rfc,
5787                         "Fragmented Packets Received ");
5788         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5789                         CTLFLAG_RD, &adapter->stats.roc,
5790                         "Oversized Packets Received");
5791         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5792                         CTLFLAG_RD, &adapter->stats.rjc,
5793                         "Recevied Jabber");
5794         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5795                         CTLFLAG_RD, &adapter->stats.rxerrc,
5796                         "Receive Errors");
5797         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5798                         CTLFLAG_RD, &adapter->stats.crcerrs,
5799                         "CRC errors");
5800         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5801                         CTLFLAG_RD, &adapter->stats.algnerrc,
5802                         "Alignment Errors");
5803         /* On 82575 these are collision counts */
5804         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5805                         CTLFLAG_RD, &adapter->stats.cexterr,
5806                         "Collision/Carrier extension errors");
5807         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5808                         CTLFLAG_RD, &adapter->stats.xonrxc,
5809                         "XON Received");
5810         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5811                         CTLFLAG_RD, &adapter->stats.xontxc,
5812                         "XON Transmitted");
5813         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5814                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5815                         "XOFF Received");
5816         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5817                         CTLFLAG_RD, &adapter->stats.xofftxc,
5818                         "XOFF Transmitted");
5819
5820         /* Packet Reception Stats */
5821         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5822                         CTLFLAG_RD, &adapter->stats.tpr,
5823                         "Total Packets Received ");
5824         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5825                         CTLFLAG_RD, &adapter->stats.gprc,
5826                         "Good Packets Received");
5827         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5828                         CTLFLAG_RD, &adapter->stats.bprc,
5829                         "Broadcast Packets Received");
5830         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5831                         CTLFLAG_RD, &adapter->stats.mprc,
5832                         "Multicast Packets Received");
5833         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5834                         CTLFLAG_RD, &adapter->stats.prc64,
5835                         "64 byte frames received ");
5836         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5837                         CTLFLAG_RD, &adapter->stats.prc127,
5838                         "65-127 byte frames received");
5839         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5840                         CTLFLAG_RD, &adapter->stats.prc255,
5841                         "128-255 byte frames received");
5842         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5843                         CTLFLAG_RD, &adapter->stats.prc511,
5844                         "256-511 byte frames received");
5845         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5846                         CTLFLAG_RD, &adapter->stats.prc1023,
5847                         "512-1023 byte frames received");
5848         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5849                         CTLFLAG_RD, &adapter->stats.prc1522,
5850                         "1023-1522 byte frames received");
5851         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5852                         CTLFLAG_RD, &adapter->stats.gorc, 
5853                         "Good Octets Received"); 
5854
5855         /* Packet Transmission Stats */
5856         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5857                         CTLFLAG_RD, &adapter->stats.gotc, 
5858                         "Good Octets Transmitted"); 
5859         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5860                         CTLFLAG_RD, &adapter->stats.tpt,
5861                         "Total Packets Transmitted");
5862         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5863                         CTLFLAG_RD, &adapter->stats.gptc,
5864                         "Good Packets Transmitted");
5865         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5866                         CTLFLAG_RD, &adapter->stats.bptc,
5867                         "Broadcast Packets Transmitted");
5868         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5869                         CTLFLAG_RD, &adapter->stats.mptc,
5870                         "Multicast Packets Transmitted");
5871         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5872                         CTLFLAG_RD, &adapter->stats.ptc64,
5873                         "64 byte frames transmitted ");
5874         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5875                         CTLFLAG_RD, &adapter->stats.ptc127,
5876                         "65-127 byte frames transmitted");
5877         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5878                         CTLFLAG_RD, &adapter->stats.ptc255,
5879                         "128-255 byte frames transmitted");
5880         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5881                         CTLFLAG_RD, &adapter->stats.ptc511,
5882                         "256-511 byte frames transmitted");
5883         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5884                         CTLFLAG_RD, &adapter->stats.ptc1023,
5885                         "512-1023 byte frames transmitted");
5886         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5887                         CTLFLAG_RD, &adapter->stats.ptc1522,
5888                         "1024-1522 byte frames transmitted");
5889         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5890                         CTLFLAG_RD, &adapter->stats.tsctc,
5891                         "TSO Contexts Transmitted");
5892         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5893                         CTLFLAG_RD, &adapter->stats.tsctfc,
5894                         "TSO Contexts Failed");
5895
5896
5897         /* Interrupt Stats */
5898
5899         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5900                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5901         int_list = SYSCTL_CHILDREN(int_node);
5902
5903         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5904                         CTLFLAG_RD, &adapter->stats.iac,
5905                         "Interrupt Assertion Count");
5906
5907         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5908                         CTLFLAG_RD, &adapter->stats.icrxptc,
5909                         "Interrupt Cause Rx Pkt Timer Expire Count");
5910
5911         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5912                         CTLFLAG_RD, &adapter->stats.icrxatc,
5913                         "Interrupt Cause Rx Abs Timer Expire Count");
5914
5915         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5916                         CTLFLAG_RD, &adapter->stats.ictxptc,
5917                         "Interrupt Cause Tx Pkt Timer Expire Count");
5918
5919         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5920                         CTLFLAG_RD, &adapter->stats.ictxatc,
5921                         "Interrupt Cause Tx Abs Timer Expire Count");
5922
5923         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5924                         CTLFLAG_RD, &adapter->stats.ictxqec,
5925                         "Interrupt Cause Tx Queue Empty Count");
5926
5927         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5928                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5929                         "Interrupt Cause Tx Queue Min Thresh Count");
5930
5931         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5932                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5933                         "Interrupt Cause Rx Desc Min Thresh Count");
5934
5935         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5936                         CTLFLAG_RD, &adapter->stats.icrxoc,
5937                         "Interrupt Cause Receiver Overrun Count");
5938 }
5939
5940 /**********************************************************************
5941  *
5942  *  This routine provides a way to dump out the adapter eeprom,
5943  *  often a useful debug/service tool. This only dumps the first
5944  *  32 words, stuff that matters is in that extent.
5945  *
5946  **********************************************************************/
5947 static int
5948 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5949 {
5950         struct adapter *adapter = (struct adapter *)arg1;
5951         int error;
5952         int result;
5953
5954         result = -1;
5955         error = sysctl_handle_int(oidp, &result, 0, req);
5956
5957         if (error || !req->newptr)
5958                 return (error);
5959
5960         /*
5961          * This value will cause a hex dump of the
5962          * first 32 16-bit words of the EEPROM to
5963          * the screen.
5964          */
5965         if (result == 1)
5966                 em_print_nvm_info(adapter);
5967
5968         return (error);
5969 }
5970
5971 static void
5972 em_print_nvm_info(struct adapter *adapter)
5973 {
5974         u16     eeprom_data;
5975         int     i, j, row = 0;
5976
5977         /* Its a bit crude, but it gets the job done */
5978         printf("\nInterface EEPROM Dump:\n");
5979         printf("Offset\n0x0000  ");
5980         for (i = 0, j = 0; i < 32; i++, j++) {
5981                 if (j == 8) { /* Make the offset block */
5982                         j = 0; ++row;
5983                         printf("\n0x00%x0  ",row);
5984                 }
5985                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5986                 printf("%04x ", eeprom_data);
5987         }
5988         printf("\n");
5989 }
5990
5991 static int
5992 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5993 {
5994         struct em_int_delay_info *info;
5995         struct adapter *adapter;
5996         u32 regval;
5997         int error, usecs, ticks;
5998
5999         info = (struct em_int_delay_info *)arg1;
6000         usecs = info->value;
6001         error = sysctl_handle_int(oidp, &usecs, 0, req);
6002         if (error != 0 || req->newptr == NULL)
6003                 return (error);
6004         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
6005                 return (EINVAL);
6006         info->value = usecs;
6007         ticks = EM_USECS_TO_TICKS(usecs);
6008         if (info->offset == E1000_ITR)  /* units are 256ns here */
6009                 ticks *= 4;
6010
6011         adapter = info->adapter;
6012         
6013         EM_CORE_LOCK(adapter);
6014         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
6015         regval = (regval & ~0xffff) | (ticks & 0xffff);
6016         /* Handle a few special cases. */
6017         switch (info->offset) {
6018         case E1000_RDTR:
6019                 break;
6020         case E1000_TIDV:
6021                 if (ticks == 0) {
6022                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6023                         /* Don't write 0 into the TIDV register. */
6024                         regval++;
6025                 } else
6026                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6027                 break;
6028         }
6029         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6030         EM_CORE_UNLOCK(adapter);
6031         return (0);
6032 }
6033
6034 static void
6035 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6036         const char *description, struct em_int_delay_info *info,
6037         int offset, int value)
6038 {
6039         info->adapter = adapter;
6040         info->offset = offset;
6041         info->value = value;
6042         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6043             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6044             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6045             info, 0, em_sysctl_int_delay, "I", description);
6046 }
6047
6048 static void
6049 em_set_sysctl_value(struct adapter *adapter, const char *name,
6050         const char *description, int *limit, int value)
6051 {
6052         *limit = value;
6053         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6054             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6055             OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6056 }
6057
6058
6059 /*
6060 ** Set flow control using sysctl:
6061 ** Flow control values:
6062 **      0 - off
6063 **      1 - rx pause
6064 **      2 - tx pause
6065 **      3 - full
6066 */
6067 static int
6068 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6069 {       
6070         int             error;
6071         static int      input = 3; /* default is full */
6072         struct adapter  *adapter = (struct adapter *) arg1;
6073                     
6074         error = sysctl_handle_int(oidp, &input, 0, req);
6075     
6076         if ((error) || (req->newptr == NULL))
6077                 return (error);
6078                 
6079         if (input == adapter->fc) /* no change? */
6080                 return (error);
6081
6082         switch (input) {
6083                 case e1000_fc_rx_pause:
6084                 case e1000_fc_tx_pause:
6085                 case e1000_fc_full:
6086                 case e1000_fc_none:
6087                         adapter->hw.fc.requested_mode = input;
6088                         adapter->fc = input;
6089                         break;
6090                 default:
6091                         /* Do nothing */
6092                         return (error);
6093         }
6094
6095         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6096         e1000_force_mac_fc(&adapter->hw);
6097         return (error);
6098 }
6099
6100 /*
6101 ** Manage Energy Efficient Ethernet:
6102 ** Control values:
6103 **     0/1 - enabled/disabled
6104 */
6105 static int
6106 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6107 {
6108        struct adapter *adapter = (struct adapter *) arg1;
6109        int             error, value;
6110
6111        value = adapter->hw.dev_spec.ich8lan.eee_disable;
6112        error = sysctl_handle_int(oidp, &value, 0, req);
6113        if (error || req->newptr == NULL)
6114                return (error);
6115        EM_CORE_LOCK(adapter);
6116        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6117        em_init_locked(adapter);
6118        EM_CORE_UNLOCK(adapter);
6119        return (0);
6120 }
6121
6122 static int
6123 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6124 {
6125         struct adapter *adapter;
6126         int error;
6127         int result;
6128
6129         result = -1;
6130         error = sysctl_handle_int(oidp, &result, 0, req);
6131
6132         if (error || !req->newptr)
6133                 return (error);
6134
6135         if (result == 1) {
6136                 adapter = (struct adapter *)arg1;
6137                 em_print_debug_info(adapter);
6138         }
6139
6140         return (error);
6141 }
6142
6143 /*
6144 ** This routine is meant to be fluid, add whatever is
6145 ** needed for debugging a problem.  -jfv
6146 */
6147 static void
6148 em_print_debug_info(struct adapter *adapter)
6149 {
6150         device_t dev = adapter->dev;
6151         struct tx_ring *txr = adapter->tx_rings;
6152         struct rx_ring *rxr = adapter->rx_rings;
6153
6154         if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
6155                 printf("Interface is RUNNING ");
6156         else
6157                 printf("Interface is NOT RUNNING\n");
6158
6159         if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
6160                 printf("and INACTIVE\n");
6161         else
6162                 printf("and ACTIVE\n");
6163
6164         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6165                 device_printf(dev, "TX Queue %d ------\n", i);
6166                 device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6167                         E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6168                         E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6169                 device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6170                 device_printf(dev, "TX descriptors avail = %d\n",
6171                         txr->tx_avail);
6172                 device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6173                         txr->no_desc_avail);
6174                 device_printf(dev, "RX Queue %d ------\n", i);
6175                 device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6176                         E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6177                         E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6178                 device_printf(dev, "RX discarded packets = %ld\n",
6179                         rxr->rx_discarded);
6180                 device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6181                 device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6182         }
6183 }
6184
6185 #ifdef EM_MULTIQUEUE
6186 /*
6187  * 82574 only:
6188  * Write a new value to the EEPROM increasing the number of MSIX
6189  * vectors from 3 to 5, for proper multiqueue support.
6190  */
6191 static void
6192 em_enable_vectors_82574(struct adapter *adapter)
6193 {
6194         struct e1000_hw *hw = &adapter->hw;
6195         device_t dev = adapter->dev;
6196         u16 edata;
6197
6198         e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6199         printf("Current cap: %#06x\n", edata);
6200         if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6201                 device_printf(dev, "Writing to eeprom: increasing "
6202                     "reported MSIX vectors from 3 to 5...\n");
6203                 edata &= ~(EM_NVM_MSIX_N_MASK);
6204                 edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6205                 e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6206                 e1000_update_nvm_checksum(hw);
6207                 device_printf(dev, "Writing to eeprom: done\n");
6208         }
6209 }
6210 #endif
6211
6212 #ifdef DDB
6213 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6214 {
6215         devclass_t      dc;
6216         int max_em;
6217
6218         dc = devclass_find("em");
6219         max_em = devclass_get_maxunit(dc);
6220
6221         for (int index = 0; index < (max_em - 1); index++) {
6222                 device_t dev;
6223                 dev = devclass_get_device(dc, index);
6224                 if (device_get_driver(dev) == &em_driver) {
6225                         struct adapter *adapter = device_get_softc(dev);
6226                         EM_CORE_LOCK(adapter);
6227                         em_init_locked(adapter);
6228                         EM_CORE_UNLOCK(adapter);
6229                 }
6230         }
6231 }
6232 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6233 {
6234         devclass_t      dc;
6235         int max_em;
6236
6237         dc = devclass_find("em");
6238         max_em = devclass_get_maxunit(dc);
6239
6240         for (int index = 0; index < (max_em - 1); index++) {
6241                 device_t dev;
6242                 dev = devclass_get_device(dc, index);
6243                 if (device_get_driver(dev) == &em_driver)
6244                         em_print_debug_info(device_get_softc(dev));
6245         }
6246
6247 }
6248 #endif