]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/e1000/if_em.c
MFC r263710, r273377, r273378, r273423 and r273455:
[FreeBSD/stable/10.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2014, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37
38 #ifdef HAVE_KERNEL_OPTION_HEADERS
39 #include "opt_device_polling.h"
40 #endif
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #if __FreeBSD_version >= 800000
45 #include <sys/buf_ring.h>
46 #endif
47 #include <sys/bus.h>
48 #include <sys/endian.h>
49 #include <sys/kernel.h>
50 #include <sys/kthread.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/rman.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/taskqueue.h>
59 #include <sys/eventhandler.h>
60 #include <machine/bus.h>
61 #include <machine/resource.h>
62
63 #include <net/bpf.h>
64 #include <net/ethernet.h>
65 #include <net/if.h>
66 #include <net/if_arp.h>
67 #include <net/if_dl.h>
68 #include <net/if_media.h>
69
70 #include <net/if_types.h>
71 #include <net/if_vlan_var.h>
72
73 #include <netinet/in_systm.h>
74 #include <netinet/in.h>
75 #include <netinet/if_ether.h>
76 #include <netinet/ip.h>
77 #include <netinet/ip6.h>
78 #include <netinet/tcp.h>
79 #include <netinet/udp.h>
80
81 #include <machine/in_cksum.h>
82 #include <dev/led/led.h>
83 #include <dev/pci/pcivar.h>
84 #include <dev/pci/pcireg.h>
85
86 #include "e1000_api.h"
87 #include "e1000_82571.h"
88 #include "if_em.h"
89
90 /*********************************************************************
91  *  Set this to one to display debug statistics
92  *********************************************************************/
93 int     em_display_debug_stats = 0;
94
95 /*********************************************************************
96  *  Driver version:
97  *********************************************************************/
98 char em_driver_version[] = "7.4.2";
99
100 /*********************************************************************
101  *  PCI Device ID Table
102  *
103  *  Used by probe to select devices to load on
104  *  Last field stores an index into e1000_strings
105  *  Last entry must be all 0s
106  *
107  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
108  *********************************************************************/
109
110 static em_vendor_info_t em_vendor_info_array[] =
111 {
112         /* Intel(R) PRO/1000 Network Connection */
113         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
114         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
115         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
116         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
117                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
119                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
123                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
125                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
127                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
132
133         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
138                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
140                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
141         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
142                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
143         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
144                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
175         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
176         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
177         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
178         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
179                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
180         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
181                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
182         { 0x8086, E1000_DEV_ID_PCH_I218_LM2,    PCI_ANY_ID, PCI_ANY_ID, 0},
183         { 0x8086, E1000_DEV_ID_PCH_I218_V2,     PCI_ANY_ID, PCI_ANY_ID, 0},
184         { 0x8086, E1000_DEV_ID_PCH_I218_LM3,    PCI_ANY_ID, PCI_ANY_ID, 0},
185         { 0x8086, E1000_DEV_ID_PCH_I218_V3,     PCI_ANY_ID, PCI_ANY_ID, 0},
186         /* required last entry */
187         { 0, 0, 0, 0, 0}
188 };
189
190 /*********************************************************************
191  *  Table of branding strings for all supported NICs.
192  *********************************************************************/
193
194 static char *em_strings[] = {
195         "Intel(R) PRO/1000 Network Connection"
196 };
197
198 /*********************************************************************
199  *  Function prototypes
200  *********************************************************************/
201 static int      em_probe(device_t);
202 static int      em_attach(device_t);
203 static int      em_detach(device_t);
204 static int      em_shutdown(device_t);
205 static int      em_suspend(device_t);
206 static int      em_resume(device_t);
207 #ifdef EM_MULTIQUEUE
208 static int      em_mq_start(struct ifnet *, struct mbuf *);
209 static int      em_mq_start_locked(struct ifnet *,
210                     struct tx_ring *, struct mbuf *);
211 static void     em_qflush(struct ifnet *);
212 #else
213 static void     em_start(struct ifnet *);
214 static void     em_start_locked(struct ifnet *, struct tx_ring *);
215 #endif
216 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
217 static void     em_init(void *);
218 static void     em_init_locked(struct adapter *);
219 static void     em_stop(void *);
220 static void     em_media_status(struct ifnet *, struct ifmediareq *);
221 static int      em_media_change(struct ifnet *);
222 static void     em_identify_hardware(struct adapter *);
223 static int      em_allocate_pci_resources(struct adapter *);
224 static int      em_allocate_legacy(struct adapter *);
225 static int      em_allocate_msix(struct adapter *);
226 static int      em_allocate_queues(struct adapter *);
227 static int      em_setup_msix(struct adapter *);
228 static void     em_free_pci_resources(struct adapter *);
229 static void     em_local_timer(void *);
230 static void     em_reset(struct adapter *);
231 static int      em_setup_interface(device_t, struct adapter *);
232
233 static void     em_setup_transmit_structures(struct adapter *);
234 static void     em_initialize_transmit_unit(struct adapter *);
235 static int      em_allocate_transmit_buffers(struct tx_ring *);
236 static void     em_free_transmit_structures(struct adapter *);
237 static void     em_free_transmit_buffers(struct tx_ring *);
238
239 static int      em_setup_receive_structures(struct adapter *);
240 static int      em_allocate_receive_buffers(struct rx_ring *);
241 static void     em_initialize_receive_unit(struct adapter *);
242 static void     em_free_receive_structures(struct adapter *);
243 static void     em_free_receive_buffers(struct rx_ring *);
244
245 static void     em_enable_intr(struct adapter *);
246 static void     em_disable_intr(struct adapter *);
247 static void     em_update_stats_counters(struct adapter *);
248 static void     em_add_hw_stats(struct adapter *adapter);
249 static void     em_txeof(struct tx_ring *);
250 static bool     em_rxeof(struct rx_ring *, int, int *);
251 #ifndef __NO_STRICT_ALIGNMENT
252 static int      em_fixup_rx(struct rx_ring *);
253 #endif
254 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
255 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
256                     struct ip *, u32 *, u32 *);
257 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
258                     struct tcphdr *, u32 *, u32 *);
259 static void     em_set_promisc(struct adapter *);
260 static void     em_disable_promisc(struct adapter *);
261 static void     em_set_multi(struct adapter *);
262 static void     em_update_link_status(struct adapter *);
263 static void     em_refresh_mbufs(struct rx_ring *, int);
264 static void     em_register_vlan(void *, struct ifnet *, u16);
265 static void     em_unregister_vlan(void *, struct ifnet *, u16);
266 static void     em_setup_vlan_hw_support(struct adapter *);
267 static int      em_xmit(struct tx_ring *, struct mbuf **);
268 static int      em_dma_malloc(struct adapter *, bus_size_t,
269                     struct em_dma_alloc *, int);
270 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
271 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
272 static void     em_print_nvm_info(struct adapter *);
273 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
274 static void     em_print_debug_info(struct adapter *);
275 static int      em_is_valid_ether_addr(u8 *);
276 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
277 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
278                     const char *, struct em_int_delay_info *, int, int);
279 /* Management and WOL Support */
280 static void     em_init_manageability(struct adapter *);
281 static void     em_release_manageability(struct adapter *);
282 static void     em_get_hw_control(struct adapter *);
283 static void     em_release_hw_control(struct adapter *);
284 static void     em_get_wakeup(device_t);
285 static void     em_enable_wakeup(device_t);
286 static int      em_enable_phy_wakeup(struct adapter *);
287 static void     em_led_func(void *, int);
288 static void     em_disable_aspm(struct adapter *);
289
290 static int      em_irq_fast(void *);
291
292 /* MSIX handlers */
293 static void     em_msix_tx(void *);
294 static void     em_msix_rx(void *);
295 static void     em_msix_link(void *);
296 static void     em_handle_tx(void *context, int pending);
297 static void     em_handle_rx(void *context, int pending);
298 static void     em_handle_link(void *context, int pending);
299
300 static void     em_set_sysctl_value(struct adapter *, const char *,
301                     const char *, int *, int);
302 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
303 static int      em_sysctl_eee(SYSCTL_HANDLER_ARGS);
304
305 static __inline void em_rx_discard(struct rx_ring *, int);
306
307 #ifdef DEVICE_POLLING
308 static poll_handler_t em_poll;
309 #endif /* POLLING */
310
311 /*********************************************************************
312  *  FreeBSD Device Interface Entry Points
313  *********************************************************************/
314
315 static device_method_t em_methods[] = {
316         /* Device interface */
317         DEVMETHOD(device_probe, em_probe),
318         DEVMETHOD(device_attach, em_attach),
319         DEVMETHOD(device_detach, em_detach),
320         DEVMETHOD(device_shutdown, em_shutdown),
321         DEVMETHOD(device_suspend, em_suspend),
322         DEVMETHOD(device_resume, em_resume),
323         DEVMETHOD_END
324 };
325
326 static driver_t em_driver = {
327         "em", em_methods, sizeof(struct adapter),
328 };
329
330 devclass_t em_devclass;
331 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
332 MODULE_DEPEND(em, pci, 1, 1, 1);
333 MODULE_DEPEND(em, ether, 1, 1, 1);
334
335 /*********************************************************************
336  *  Tunable default values.
337  *********************************************************************/
338
339 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
340 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
341 #define M_TSO_LEN                       66
342
343 #define MAX_INTS_PER_SEC        8000
344 #define DEFAULT_ITR             (1000000000/(MAX_INTS_PER_SEC * 256))
345
346 /* Allow common code without TSO */
347 #ifndef CSUM_TSO
348 #define CSUM_TSO        0
349 #endif
350
351 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
352
353 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
354 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
355 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
356 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
357 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
358     0, "Default transmit interrupt delay in usecs");
359 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
360     0, "Default receive interrupt delay in usecs");
361
362 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
363 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
364 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
365 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
366 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
367     &em_tx_abs_int_delay_dflt, 0,
368     "Default transmit interrupt delay limit in usecs");
369 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
370     &em_rx_abs_int_delay_dflt, 0,
371     "Default receive interrupt delay limit in usecs");
372
373 static int em_rxd = EM_DEFAULT_RXD;
374 static int em_txd = EM_DEFAULT_TXD;
375 TUNABLE_INT("hw.em.rxd", &em_rxd);
376 TUNABLE_INT("hw.em.txd", &em_txd);
377 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
378     "Number of receive descriptors per queue");
379 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
380     "Number of transmit descriptors per queue");
381
382 static int em_smart_pwr_down = FALSE;
383 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
384 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
385     0, "Set to true to leave smart power down enabled on newer adapters");
386
387 /* Controls whether promiscuous also shows bad packets */
388 static int em_debug_sbp = FALSE;
389 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
390 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
391     "Show bad packets in promiscuous mode");
392
393 static int em_enable_msix = TRUE;
394 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
395 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
396     "Enable MSI-X interrupts");
397
398 /* How many packets rxeof tries to clean at a time */
399 static int em_rx_process_limit = 100;
400 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
401 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
402     &em_rx_process_limit, 0,
403     "Maximum number of received packets to process "
404     "at a time, -1 means unlimited");
405
406 /* Energy efficient ethernet - default to OFF */
407 static int eee_setting = 1;
408 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
409 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
410     "Enable Energy Efficient Ethernet");
411
412 /* Global used in WOL setup with multiport cards */
413 static int global_quad_port_a = 0;
414
415 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
416 #include <dev/netmap/if_em_netmap.h>
417 #endif /* DEV_NETMAP */
418
419 /*********************************************************************
420  *  Device identification routine
421  *
422  *  em_probe determines if the driver should be loaded on
423  *  adapter based on PCI vendor/device id of the adapter.
424  *
425  *  return BUS_PROBE_DEFAULT on success, positive on failure
426  *********************************************************************/
427
428 static int
429 em_probe(device_t dev)
430 {
431         char            adapter_name[60];
432         u16             pci_vendor_id = 0;
433         u16             pci_device_id = 0;
434         u16             pci_subvendor_id = 0;
435         u16             pci_subdevice_id = 0;
436         em_vendor_info_t *ent;
437
438         INIT_DEBUGOUT("em_probe: begin");
439
440         pci_vendor_id = pci_get_vendor(dev);
441         if (pci_vendor_id != EM_VENDOR_ID)
442                 return (ENXIO);
443
444         pci_device_id = pci_get_device(dev);
445         pci_subvendor_id = pci_get_subvendor(dev);
446         pci_subdevice_id = pci_get_subdevice(dev);
447
448         ent = em_vendor_info_array;
449         while (ent->vendor_id != 0) {
450                 if ((pci_vendor_id == ent->vendor_id) &&
451                     (pci_device_id == ent->device_id) &&
452
453                     ((pci_subvendor_id == ent->subvendor_id) ||
454                     (ent->subvendor_id == PCI_ANY_ID)) &&
455
456                     ((pci_subdevice_id == ent->subdevice_id) ||
457                     (ent->subdevice_id == PCI_ANY_ID))) {
458                         sprintf(adapter_name, "%s %s",
459                                 em_strings[ent->index],
460                                 em_driver_version);
461                         device_set_desc_copy(dev, adapter_name);
462                         return (BUS_PROBE_DEFAULT);
463                 }
464                 ent++;
465         }
466
467         return (ENXIO);
468 }
469
470 /*********************************************************************
471  *  Device initialization routine
472  *
473  *  The attach entry point is called when the driver is being loaded.
474  *  This routine identifies the type of hardware, allocates all resources
475  *  and initializes the hardware.
476  *
477  *  return 0 on success, positive on failure
478  *********************************************************************/
479
480 static int
481 em_attach(device_t dev)
482 {
483         struct adapter  *adapter;
484         struct e1000_hw *hw;
485         int             error = 0;
486
487         INIT_DEBUGOUT("em_attach: begin");
488
489         if (resource_disabled("em", device_get_unit(dev))) {
490                 device_printf(dev, "Disabled by device hint\n");
491                 return (ENXIO);
492         }
493
494         adapter = device_get_softc(dev);
495         adapter->dev = adapter->osdep.dev = dev;
496         hw = &adapter->hw;
497         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
498
499         /* SYSCTL stuff */
500         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
501             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
502             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
503             em_sysctl_nvm_info, "I", "NVM Information");
504
505         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
506             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
507             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
508             em_sysctl_debug_info, "I", "Debug Information");
509
510         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
511             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
512             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
513             em_set_flowcntl, "I", "Flow Control");
514
515         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
516
517         /* Determine hardware and mac info */
518         em_identify_hardware(adapter);
519
520         /* Setup PCI resources */
521         if (em_allocate_pci_resources(adapter)) {
522                 device_printf(dev, "Allocation of PCI resources failed\n");
523                 error = ENXIO;
524                 goto err_pci;
525         }
526
527         /*
528         ** For ICH8 and family we need to
529         ** map the flash memory, and this
530         ** must happen after the MAC is 
531         ** identified
532         */
533         if ((hw->mac.type == e1000_ich8lan) ||
534             (hw->mac.type == e1000_ich9lan) ||
535             (hw->mac.type == e1000_ich10lan) ||
536             (hw->mac.type == e1000_pchlan) ||
537             (hw->mac.type == e1000_pch2lan) ||
538             (hw->mac.type == e1000_pch_lpt)) {
539                 int rid = EM_BAR_TYPE_FLASH;
540                 adapter->flash = bus_alloc_resource_any(dev,
541                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
542                 if (adapter->flash == NULL) {
543                         device_printf(dev, "Mapping of Flash failed\n");
544                         error = ENXIO;
545                         goto err_pci;
546                 }
547                 /* This is used in the shared code */
548                 hw->flash_address = (u8 *)adapter->flash;
549                 adapter->osdep.flash_bus_space_tag =
550                     rman_get_bustag(adapter->flash);
551                 adapter->osdep.flash_bus_space_handle =
552                     rman_get_bushandle(adapter->flash);
553         }
554
555         /* Do Shared Code initialization */
556         if (e1000_setup_init_funcs(hw, TRUE)) {
557                 device_printf(dev, "Setup of Shared code failed\n");
558                 error = ENXIO;
559                 goto err_pci;
560         }
561
562         e1000_get_bus_info(hw);
563
564         /* Set up some sysctls for the tunable interrupt delays */
565         em_add_int_delay_sysctl(adapter, "rx_int_delay",
566             "receive interrupt delay in usecs", &adapter->rx_int_delay,
567             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
568         em_add_int_delay_sysctl(adapter, "tx_int_delay",
569             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
570             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
571         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
572             "receive interrupt delay limit in usecs",
573             &adapter->rx_abs_int_delay,
574             E1000_REGISTER(hw, E1000_RADV),
575             em_rx_abs_int_delay_dflt);
576         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
577             "transmit interrupt delay limit in usecs",
578             &adapter->tx_abs_int_delay,
579             E1000_REGISTER(hw, E1000_TADV),
580             em_tx_abs_int_delay_dflt);
581         em_add_int_delay_sysctl(adapter, "itr",
582             "interrupt delay limit in usecs/4",
583             &adapter->tx_itr,
584             E1000_REGISTER(hw, E1000_ITR),
585             DEFAULT_ITR);
586
587         /* Sysctl for limiting the amount of work done in the taskqueue */
588         em_set_sysctl_value(adapter, "rx_processing_limit",
589             "max number of rx packets to process", &adapter->rx_process_limit,
590             em_rx_process_limit);
591
592         /*
593          * Validate number of transmit and receive descriptors. It
594          * must not exceed hardware maximum, and must be multiple
595          * of E1000_DBA_ALIGN.
596          */
597         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
598             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
599                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
600                     EM_DEFAULT_TXD, em_txd);
601                 adapter->num_tx_desc = EM_DEFAULT_TXD;
602         } else
603                 adapter->num_tx_desc = em_txd;
604
605         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
606             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
607                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
608                     EM_DEFAULT_RXD, em_rxd);
609                 adapter->num_rx_desc = EM_DEFAULT_RXD;
610         } else
611                 adapter->num_rx_desc = em_rxd;
612
613         hw->mac.autoneg = DO_AUTO_NEG;
614         hw->phy.autoneg_wait_to_complete = FALSE;
615         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
616
617         /* Copper options */
618         if (hw->phy.media_type == e1000_media_type_copper) {
619                 hw->phy.mdix = AUTO_ALL_MODES;
620                 hw->phy.disable_polarity_correction = FALSE;
621                 hw->phy.ms_type = EM_MASTER_SLAVE;
622         }
623
624         /*
625          * Set the frame limits assuming
626          * standard ethernet sized frames.
627          */
628         adapter->hw.mac.max_frame_size =
629             ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
630
631         /*
632          * This controls when hardware reports transmit completion
633          * status.
634          */
635         hw->mac.report_tx_early = 1;
636
637         /* 
638         ** Get queue/ring memory
639         */
640         if (em_allocate_queues(adapter)) {
641                 error = ENOMEM;
642                 goto err_pci;
643         }
644
645         /* Allocate multicast array memory. */
646         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
647             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
648         if (adapter->mta == NULL) {
649                 device_printf(dev, "Can not allocate multicast setup array\n");
650                 error = ENOMEM;
651                 goto err_late;
652         }
653
654         /* Check SOL/IDER usage */
655         if (e1000_check_reset_block(hw))
656                 device_printf(dev, "PHY reset is blocked"
657                     " due to SOL/IDER session.\n");
658
659         /* Sysctl for setting Energy Efficient Ethernet */
660         hw->dev_spec.ich8lan.eee_disable = eee_setting;
661         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
662             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
663             OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
664             adapter, 0, em_sysctl_eee, "I",
665             "Disable Energy Efficient Ethernet");
666
667         /*
668         ** Start from a known state, this is
669         ** important in reading the nvm and
670         ** mac from that.
671         */
672         e1000_reset_hw(hw);
673
674
675         /* Make sure we have a good EEPROM before we read from it */
676         if (e1000_validate_nvm_checksum(hw) < 0) {
677                 /*
678                 ** Some PCI-E parts fail the first check due to
679                 ** the link being in sleep state, call it again,
680                 ** if it fails a second time its a real issue.
681                 */
682                 if (e1000_validate_nvm_checksum(hw) < 0) {
683                         device_printf(dev,
684                             "The EEPROM Checksum Is Not Valid\n");
685                         error = EIO;
686                         goto err_late;
687                 }
688         }
689
690         /* Copy the permanent MAC address out of the EEPROM */
691         if (e1000_read_mac_addr(hw) < 0) {
692                 device_printf(dev, "EEPROM read error while reading MAC"
693                     " address\n");
694                 error = EIO;
695                 goto err_late;
696         }
697
698         if (!em_is_valid_ether_addr(hw->mac.addr)) {
699                 device_printf(dev, "Invalid MAC address\n");
700                 error = EIO;
701                 goto err_late;
702         }
703
704         /* Disable ULP support */
705         e1000_disable_ulp_lpt_lp(hw, TRUE);
706
707         /*
708         **  Do interrupt configuration
709         */
710         if (adapter->msix > 1) /* Do MSIX */
711                 error = em_allocate_msix(adapter);
712         else  /* MSI or Legacy */
713                 error = em_allocate_legacy(adapter);
714         if (error)
715                 goto err_late;
716
717         /*
718          * Get Wake-on-Lan and Management info for later use
719          */
720         em_get_wakeup(dev);
721
722         /* Setup OS specific network interface */
723         if (em_setup_interface(dev, adapter) != 0)
724                 goto err_late;
725
726         em_reset(adapter);
727
728         /* Initialize statistics */
729         em_update_stats_counters(adapter);
730
731         hw->mac.get_link_status = 1;
732         em_update_link_status(adapter);
733
734         /* Register for VLAN events */
735         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
736             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
737         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
738             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
739
740         em_add_hw_stats(adapter);
741
742         /* Non-AMT based hardware can now take control from firmware */
743         if (adapter->has_manage && !adapter->has_amt)
744                 em_get_hw_control(adapter);
745
746         /* Tell the stack that the interface is not active */
747         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
748         adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
749
750         adapter->led_dev = led_create(em_led_func, adapter,
751             device_get_nameunit(dev));
752 #ifdef DEV_NETMAP
753         em_netmap_attach(adapter);
754 #endif /* DEV_NETMAP */
755
756         INIT_DEBUGOUT("em_attach: end");
757
758         return (0);
759
760 err_late:
761         em_free_transmit_structures(adapter);
762         em_free_receive_structures(adapter);
763         em_release_hw_control(adapter);
764         if (adapter->ifp != NULL)
765                 if_free(adapter->ifp);
766 err_pci:
767         em_free_pci_resources(adapter);
768         free(adapter->mta, M_DEVBUF);
769         EM_CORE_LOCK_DESTROY(adapter);
770
771         return (error);
772 }
773
774 /*********************************************************************
775  *  Device removal routine
776  *
777  *  The detach entry point is called when the driver is being removed.
778  *  This routine stops the adapter and deallocates all the resources
779  *  that were allocated for driver operation.
780  *
781  *  return 0 on success, positive on failure
782  *********************************************************************/
783
784 static int
785 em_detach(device_t dev)
786 {
787         struct adapter  *adapter = device_get_softc(dev);
788         struct ifnet    *ifp = adapter->ifp;
789
790         INIT_DEBUGOUT("em_detach: begin");
791
792         /* Make sure VLANS are not using driver */
793         if (adapter->ifp->if_vlantrunk != NULL) {
794                 device_printf(dev,"Vlan in use, detach first\n");
795                 return (EBUSY);
796         }
797
798 #ifdef DEVICE_POLLING
799         if (ifp->if_capenable & IFCAP_POLLING)
800                 ether_poll_deregister(ifp);
801 #endif
802
803         if (adapter->led_dev != NULL)
804                 led_destroy(adapter->led_dev);
805
806         EM_CORE_LOCK(adapter);
807         adapter->in_detach = 1;
808         em_stop(adapter);
809         EM_CORE_UNLOCK(adapter);
810         EM_CORE_LOCK_DESTROY(adapter);
811
812         e1000_phy_hw_reset(&adapter->hw);
813
814         em_release_manageability(adapter);
815         em_release_hw_control(adapter);
816
817         /* Unregister VLAN events */
818         if (adapter->vlan_attach != NULL)
819                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
820         if (adapter->vlan_detach != NULL)
821                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
822
823         ether_ifdetach(adapter->ifp);
824         callout_drain(&adapter->timer);
825
826 #ifdef DEV_NETMAP
827         netmap_detach(ifp);
828 #endif /* DEV_NETMAP */
829
830         em_free_pci_resources(adapter);
831         bus_generic_detach(dev);
832         if_free(ifp);
833
834         em_free_transmit_structures(adapter);
835         em_free_receive_structures(adapter);
836
837         em_release_hw_control(adapter);
838         free(adapter->mta, M_DEVBUF);
839
840         return (0);
841 }
842
843 /*********************************************************************
844  *
845  *  Shutdown entry point
846  *
847  **********************************************************************/
848
849 static int
850 em_shutdown(device_t dev)
851 {
852         return em_suspend(dev);
853 }
854
855 /*
856  * Suspend/resume device methods.
857  */
858 static int
859 em_suspend(device_t dev)
860 {
861         struct adapter *adapter = device_get_softc(dev);
862
863         EM_CORE_LOCK(adapter);
864
865         em_release_manageability(adapter);
866         em_release_hw_control(adapter);
867         em_enable_wakeup(dev);
868
869         EM_CORE_UNLOCK(adapter);
870
871         return bus_generic_suspend(dev);
872 }
873
874 static int
875 em_resume(device_t dev)
876 {
877         struct adapter *adapter = device_get_softc(dev);
878         struct tx_ring  *txr = adapter->tx_rings;
879         struct ifnet *ifp = adapter->ifp;
880
881         EM_CORE_LOCK(adapter);
882         if (adapter->hw.mac.type == e1000_pch2lan)
883                 e1000_resume_workarounds_pchlan(&adapter->hw);
884         em_init_locked(adapter);
885         em_init_manageability(adapter);
886
887         if ((ifp->if_flags & IFF_UP) &&
888             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
889                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
890                         EM_TX_LOCK(txr);
891 #ifdef EM_MULTIQUEUE
892                         if (!drbr_empty(ifp, txr->br))
893                                 em_mq_start_locked(ifp, txr, NULL);
894 #else
895                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
896                                 em_start_locked(ifp, txr);
897 #endif
898                         EM_TX_UNLOCK(txr);
899                 }
900         }
901         EM_CORE_UNLOCK(adapter);
902
903         return bus_generic_resume(dev);
904 }
905
906
907 #ifdef EM_MULTIQUEUE
908 /*********************************************************************
909  *  Multiqueue Transmit routines 
910  *
911  *  em_mq_start is called by the stack to initiate a transmit.
912  *  however, if busy the driver can queue the request rather
913  *  than do an immediate send. It is this that is an advantage
914  *  in this driver, rather than also having multiple tx queues.
915  **********************************************************************/
916 static int
917 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
918 {
919         struct adapter  *adapter = txr->adapter;
920         struct mbuf     *next;
921         int             err = 0, enq = 0;
922
923         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
924             IFF_DRV_RUNNING || adapter->link_active == 0) {
925                 if (m != NULL)
926                         err = drbr_enqueue(ifp, txr->br, m);
927                 return (err);
928         }
929
930         enq = 0;
931         if (m != NULL) {
932                 err = drbr_enqueue(ifp, txr->br, m);
933                 if (err)
934                         return (err);
935         } 
936
937         /* Process the queue */
938         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
939                 if ((err = em_xmit(txr, &next)) != 0) {
940                         if (next == NULL)
941                                 drbr_advance(ifp, txr->br);
942                         else 
943                                 drbr_putback(ifp, txr->br, next);
944                         break;
945                 }
946                 drbr_advance(ifp, txr->br);
947                 enq++;
948                 ifp->if_obytes += next->m_pkthdr.len;
949                 if (next->m_flags & M_MCAST)
950                         ifp->if_omcasts++;
951                 ETHER_BPF_MTAP(ifp, next);
952                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
953                         break;
954         }
955
956         if (enq > 0) {
957                 /* Set the watchdog */
958                 txr->queue_status = EM_QUEUE_WORKING;
959                 txr->watchdog_time = ticks;
960         }
961
962         if (txr->tx_avail < EM_MAX_SCATTER)
963                 em_txeof(txr);
964         if (txr->tx_avail < EM_MAX_SCATTER)
965                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
966         return (err);
967 }
968
969 /*
970 ** Multiqueue capable stack interface
971 */
972 static int
973 em_mq_start(struct ifnet *ifp, struct mbuf *m)
974 {
975         struct adapter  *adapter = ifp->if_softc;
976         struct tx_ring  *txr = adapter->tx_rings;
977         int             error;
978
979         if (EM_TX_TRYLOCK(txr)) {
980                 error = em_mq_start_locked(ifp, txr, m);
981                 EM_TX_UNLOCK(txr);
982         } else 
983                 error = drbr_enqueue(ifp, txr->br, m);
984
985         return (error);
986 }
987
988 /*
989 ** Flush all ring buffers
990 */
991 static void
992 em_qflush(struct ifnet *ifp)
993 {
994         struct adapter  *adapter = ifp->if_softc;
995         struct tx_ring  *txr = adapter->tx_rings;
996         struct mbuf     *m;
997
998         for (int i = 0; i < adapter->num_queues; i++, txr++) {
999                 EM_TX_LOCK(txr);
1000                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1001                         m_freem(m);
1002                 EM_TX_UNLOCK(txr);
1003         }
1004         if_qflush(ifp);
1005 }
1006 #else  /* !EM_MULTIQUEUE */
1007
1008 static void
1009 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1010 {
1011         struct adapter  *adapter = ifp->if_softc;
1012         struct mbuf     *m_head;
1013
1014         EM_TX_LOCK_ASSERT(txr);
1015
1016         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1017             IFF_DRV_RUNNING)
1018                 return;
1019
1020         if (!adapter->link_active)
1021                 return;
1022
1023         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1024                 /* Call cleanup if number of TX descriptors low */
1025                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1026                         em_txeof(txr);
1027                 if (txr->tx_avail < EM_MAX_SCATTER) {
1028                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1029                         break;
1030                 }
1031                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1032                 if (m_head == NULL)
1033                         break;
1034                 /*
1035                  *  Encapsulation can modify our pointer, and or make it
1036                  *  NULL on failure.  In that event, we can't requeue.
1037                  */
1038                 if (em_xmit(txr, &m_head)) {
1039                         if (m_head == NULL)
1040                                 break;
1041                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1042                         break;
1043                 }
1044
1045                 /* Send a copy of the frame to the BPF listener */
1046                 ETHER_BPF_MTAP(ifp, m_head);
1047
1048                 /* Set timeout in case hardware has problems transmitting. */
1049                 txr->watchdog_time = ticks;
1050                 txr->queue_status = EM_QUEUE_WORKING;
1051         }
1052
1053         return;
1054 }
1055
1056 static void
1057 em_start(struct ifnet *ifp)
1058 {
1059         struct adapter  *adapter = ifp->if_softc;
1060         struct tx_ring  *txr = adapter->tx_rings;
1061
1062         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1063                 EM_TX_LOCK(txr);
1064                 em_start_locked(ifp, txr);
1065                 EM_TX_UNLOCK(txr);
1066         }
1067         return;
1068 }
1069 #endif /* EM_MULTIQUEUE */
1070
1071 /*********************************************************************
1072  *  Ioctl entry point
1073  *
1074  *  em_ioctl is called when the user wants to configure the
1075  *  interface.
1076  *
1077  *  return 0 on success, positive on failure
1078  **********************************************************************/
1079
1080 static int
1081 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1082 {
1083         struct adapter  *adapter = ifp->if_softc;
1084         struct ifreq    *ifr = (struct ifreq *)data;
1085 #if defined(INET) || defined(INET6)
1086         struct ifaddr   *ifa = (struct ifaddr *)data;
1087 #endif
1088         bool            avoid_reset = FALSE;
1089         int             error = 0;
1090
1091         if (adapter->in_detach)
1092                 return (error);
1093
1094         switch (command) {
1095         case SIOCSIFADDR:
1096 #ifdef INET
1097                 if (ifa->ifa_addr->sa_family == AF_INET)
1098                         avoid_reset = TRUE;
1099 #endif
1100 #ifdef INET6
1101                 if (ifa->ifa_addr->sa_family == AF_INET6)
1102                         avoid_reset = TRUE;
1103 #endif
1104                 /*
1105                 ** Calling init results in link renegotiation,
1106                 ** so we avoid doing it when possible.
1107                 */
1108                 if (avoid_reset) {
1109                         ifp->if_flags |= IFF_UP;
1110                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1111                                 em_init(adapter);
1112 #ifdef INET
1113                         if (!(ifp->if_flags & IFF_NOARP))
1114                                 arp_ifinit(ifp, ifa);
1115 #endif
1116                 } else
1117                         error = ether_ioctl(ifp, command, data);
1118                 break;
1119         case SIOCSIFMTU:
1120             {
1121                 int max_frame_size;
1122
1123                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1124
1125                 EM_CORE_LOCK(adapter);
1126                 switch (adapter->hw.mac.type) {
1127                 case e1000_82571:
1128                 case e1000_82572:
1129                 case e1000_ich9lan:
1130                 case e1000_ich10lan:
1131                 case e1000_pch2lan:
1132                 case e1000_pch_lpt:
1133                 case e1000_82574:
1134                 case e1000_82583:
1135                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1136                         max_frame_size = 9234;
1137                         break;
1138                 case e1000_pchlan:
1139                         max_frame_size = 4096;
1140                         break;
1141                         /* Adapters that do not support jumbo frames */
1142                 case e1000_ich8lan:
1143                         max_frame_size = ETHER_MAX_LEN;
1144                         break;
1145                 default:
1146                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1147                 }
1148                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1149                     ETHER_CRC_LEN) {
1150                         EM_CORE_UNLOCK(adapter);
1151                         error = EINVAL;
1152                         break;
1153                 }
1154
1155                 ifp->if_mtu = ifr->ifr_mtu;
1156                 adapter->hw.mac.max_frame_size =
1157                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1158                 em_init_locked(adapter);
1159                 EM_CORE_UNLOCK(adapter);
1160                 break;
1161             }
1162         case SIOCSIFFLAGS:
1163                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1164                     SIOCSIFFLAGS (Set Interface Flags)");
1165                 EM_CORE_LOCK(adapter);
1166                 if (ifp->if_flags & IFF_UP) {
1167                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1168                                 if ((ifp->if_flags ^ adapter->if_flags) &
1169                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1170                                         em_disable_promisc(adapter);
1171                                         em_set_promisc(adapter);
1172                                 }
1173                         } else
1174                                 em_init_locked(adapter);
1175                 } else
1176                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1177                                 em_stop(adapter);
1178                 adapter->if_flags = ifp->if_flags;
1179                 EM_CORE_UNLOCK(adapter);
1180                 break;
1181         case SIOCADDMULTI:
1182         case SIOCDELMULTI:
1183                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1184                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1185                         EM_CORE_LOCK(adapter);
1186                         em_disable_intr(adapter);
1187                         em_set_multi(adapter);
1188 #ifdef DEVICE_POLLING
1189                         if (!(ifp->if_capenable & IFCAP_POLLING))
1190 #endif
1191                                 em_enable_intr(adapter);
1192                         EM_CORE_UNLOCK(adapter);
1193                 }
1194                 break;
1195         case SIOCSIFMEDIA:
1196                 /* Check SOL/IDER usage */
1197                 EM_CORE_LOCK(adapter);
1198                 if (e1000_check_reset_block(&adapter->hw)) {
1199                         EM_CORE_UNLOCK(adapter);
1200                         device_printf(adapter->dev, "Media change is"
1201                             " blocked due to SOL/IDER session.\n");
1202                         break;
1203                 }
1204                 EM_CORE_UNLOCK(adapter);
1205                 /* falls thru */
1206         case SIOCGIFMEDIA:
1207                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1208                     SIOCxIFMEDIA (Get/Set Interface Media)");
1209                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1210                 break;
1211         case SIOCSIFCAP:
1212             {
1213                 int mask, reinit;
1214
1215                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1216                 reinit = 0;
1217                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1218 #ifdef DEVICE_POLLING
1219                 if (mask & IFCAP_POLLING) {
1220                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1221                                 error = ether_poll_register(em_poll, ifp);
1222                                 if (error)
1223                                         return (error);
1224                                 EM_CORE_LOCK(adapter);
1225                                 em_disable_intr(adapter);
1226                                 ifp->if_capenable |= IFCAP_POLLING;
1227                                 EM_CORE_UNLOCK(adapter);
1228                         } else {
1229                                 error = ether_poll_deregister(ifp);
1230                                 /* Enable interrupt even in error case */
1231                                 EM_CORE_LOCK(adapter);
1232                                 em_enable_intr(adapter);
1233                                 ifp->if_capenable &= ~IFCAP_POLLING;
1234                                 EM_CORE_UNLOCK(adapter);
1235                         }
1236                 }
1237 #endif
1238                 if (mask & IFCAP_HWCSUM) {
1239                         ifp->if_capenable ^= IFCAP_HWCSUM;
1240                         reinit = 1;
1241                 }
1242                 if (mask & IFCAP_TSO4) {
1243                         ifp->if_capenable ^= IFCAP_TSO4;
1244                         reinit = 1;
1245                 }
1246                 if (mask & IFCAP_VLAN_HWTAGGING) {
1247                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1248                         reinit = 1;
1249                 }
1250                 if (mask & IFCAP_VLAN_HWFILTER) {
1251                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1252                         reinit = 1;
1253                 }
1254                 if (mask & IFCAP_VLAN_HWTSO) {
1255                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1256                         reinit = 1;
1257                 }
1258                 if ((mask & IFCAP_WOL) &&
1259                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1260                         if (mask & IFCAP_WOL_MCAST)
1261                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1262                         if (mask & IFCAP_WOL_MAGIC)
1263                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1264                 }
1265                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1266                         em_init(adapter);
1267                 VLAN_CAPABILITIES(ifp);
1268                 break;
1269             }
1270
1271         default:
1272                 error = ether_ioctl(ifp, command, data);
1273                 break;
1274         }
1275
1276         return (error);
1277 }
1278
1279
1280 /*********************************************************************
1281  *  Init entry point
1282  *
1283  *  This routine is used in two ways. It is used by the stack as
1284  *  init entry point in network interface structure. It is also used
1285  *  by the driver as a hw/sw initialization routine to get to a
1286  *  consistent state.
1287  *
1288  *  return 0 on success, positive on failure
1289  **********************************************************************/
1290
1291 static void
1292 em_init_locked(struct adapter *adapter)
1293 {
1294         struct ifnet    *ifp = adapter->ifp;
1295         device_t        dev = adapter->dev;
1296
1297         INIT_DEBUGOUT("em_init: begin");
1298
1299         EM_CORE_LOCK_ASSERT(adapter);
1300
1301         em_disable_intr(adapter);
1302         callout_stop(&adapter->timer);
1303
1304         /* Get the latest mac address, User can use a LAA */
1305         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1306               ETHER_ADDR_LEN);
1307
1308         /* Put the address into the Receive Address Array */
1309         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1310
1311         /*
1312          * With the 82571 adapter, RAR[0] may be overwritten
1313          * when the other port is reset, we make a duplicate
1314          * in RAR[14] for that eventuality, this assures
1315          * the interface continues to function.
1316          */
1317         if (adapter->hw.mac.type == e1000_82571) {
1318                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1319                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1320                     E1000_RAR_ENTRIES - 1);
1321         }
1322
1323         /* Initialize the hardware */
1324         em_reset(adapter);
1325         em_update_link_status(adapter);
1326
1327         /* Setup VLAN support, basic and offload if available */
1328         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1329
1330         /* Set hardware offload abilities */
1331         ifp->if_hwassist = 0;
1332         if (ifp->if_capenable & IFCAP_TXCSUM)
1333                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1334         if (ifp->if_capenable & IFCAP_TSO4)
1335                 ifp->if_hwassist |= CSUM_TSO;
1336
1337         /* Configure for OS presence */
1338         em_init_manageability(adapter);
1339
1340         /* Prepare transmit descriptors and buffers */
1341         em_setup_transmit_structures(adapter);
1342         em_initialize_transmit_unit(adapter);
1343
1344         /* Setup Multicast table */
1345         em_set_multi(adapter);
1346
1347         /*
1348         ** Figure out the desired mbuf
1349         ** pool for doing jumbos
1350         */
1351         if (adapter->hw.mac.max_frame_size <= 2048)
1352                 adapter->rx_mbuf_sz = MCLBYTES;
1353         else if (adapter->hw.mac.max_frame_size <= 4096)
1354                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1355         else
1356                 adapter->rx_mbuf_sz = MJUM9BYTES;
1357
1358         /* Prepare receive descriptors and buffers */
1359         if (em_setup_receive_structures(adapter)) {
1360                 device_printf(dev, "Could not setup receive structures\n");
1361                 em_stop(adapter);
1362                 return;
1363         }
1364         em_initialize_receive_unit(adapter);
1365
1366         /* Use real VLAN Filter support? */
1367         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1368                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1369                         /* Use real VLAN Filter support */
1370                         em_setup_vlan_hw_support(adapter);
1371                 else {
1372                         u32 ctrl;
1373                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1374                         ctrl |= E1000_CTRL_VME;
1375                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1376                 }
1377         }
1378
1379         /* Don't lose promiscuous settings */
1380         em_set_promisc(adapter);
1381
1382         /* Set the interface as ACTIVE */
1383         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1384         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1385
1386         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1387         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1388
1389         /* MSI/X configuration for 82574 */
1390         if (adapter->hw.mac.type == e1000_82574) {
1391                 int tmp;
1392                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1393                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1394                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1395                 /* Set the IVAR - interrupt vector routing. */
1396                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1397         }
1398
1399 #ifdef DEVICE_POLLING
1400         /*
1401          * Only enable interrupts if we are not polling, make sure
1402          * they are off otherwise.
1403          */
1404         if (ifp->if_capenable & IFCAP_POLLING)
1405                 em_disable_intr(adapter);
1406         else
1407 #endif /* DEVICE_POLLING */
1408                 em_enable_intr(adapter);
1409
1410         /* AMT based hardware can now take control from firmware */
1411         if (adapter->has_manage && adapter->has_amt)
1412                 em_get_hw_control(adapter);
1413 }
1414
1415 static void
1416 em_init(void *arg)
1417 {
1418         struct adapter *adapter = arg;
1419
1420         EM_CORE_LOCK(adapter);
1421         em_init_locked(adapter);
1422         EM_CORE_UNLOCK(adapter);
1423 }
1424
1425
1426 #ifdef DEVICE_POLLING
1427 /*********************************************************************
1428  *
1429  *  Legacy polling routine: note this only works with single queue
1430  *
1431  *********************************************************************/
1432 static int
1433 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1434 {
1435         struct adapter *adapter = ifp->if_softc;
1436         struct tx_ring  *txr = adapter->tx_rings;
1437         struct rx_ring  *rxr = adapter->rx_rings;
1438         u32             reg_icr;
1439         int             rx_done;
1440
1441         EM_CORE_LOCK(adapter);
1442         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1443                 EM_CORE_UNLOCK(adapter);
1444                 return (0);
1445         }
1446
1447         if (cmd == POLL_AND_CHECK_STATUS) {
1448                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1449                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1450                         callout_stop(&adapter->timer);
1451                         adapter->hw.mac.get_link_status = 1;
1452                         em_update_link_status(adapter);
1453                         callout_reset(&adapter->timer, hz,
1454                             em_local_timer, adapter);
1455                 }
1456         }
1457         EM_CORE_UNLOCK(adapter);
1458
1459         em_rxeof(rxr, count, &rx_done);
1460
1461         EM_TX_LOCK(txr);
1462         em_txeof(txr);
1463 #ifdef EM_MULTIQUEUE
1464         if (!drbr_empty(ifp, txr->br))
1465                 em_mq_start_locked(ifp, txr, NULL);
1466 #else
1467         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1468                 em_start_locked(ifp, txr);
1469 #endif
1470         EM_TX_UNLOCK(txr);
1471
1472         return (rx_done);
1473 }
1474 #endif /* DEVICE_POLLING */
1475
1476
1477 /*********************************************************************
1478  *
1479  *  Fast Legacy/MSI Combined Interrupt Service routine  
1480  *
1481  *********************************************************************/
1482 static int
1483 em_irq_fast(void *arg)
1484 {
1485         struct adapter  *adapter = arg;
1486         struct ifnet    *ifp;
1487         u32             reg_icr;
1488
1489         ifp = adapter->ifp;
1490
1491         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1492
1493         /* Hot eject?  */
1494         if (reg_icr == 0xffffffff)
1495                 return FILTER_STRAY;
1496
1497         /* Definitely not our interrupt.  */
1498         if (reg_icr == 0x0)
1499                 return FILTER_STRAY;
1500
1501         /*
1502          * Starting with the 82571 chip, bit 31 should be used to
1503          * determine whether the interrupt belongs to us.
1504          */
1505         if (adapter->hw.mac.type >= e1000_82571 &&
1506             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1507                 return FILTER_STRAY;
1508
1509         em_disable_intr(adapter);
1510         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1511
1512         /* Link status change */
1513         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1514                 adapter->hw.mac.get_link_status = 1;
1515                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1516         }
1517
1518         if (reg_icr & E1000_ICR_RXO)
1519                 adapter->rx_overruns++;
1520         return FILTER_HANDLED;
1521 }
1522
1523 /* Combined RX/TX handler, used by Legacy and MSI */
1524 static void
1525 em_handle_que(void *context, int pending)
1526 {
1527         struct adapter  *adapter = context;
1528         struct ifnet    *ifp = adapter->ifp;
1529         struct tx_ring  *txr = adapter->tx_rings;
1530         struct rx_ring  *rxr = adapter->rx_rings;
1531
1532
1533         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1534                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1535                 EM_TX_LOCK(txr);
1536                 em_txeof(txr);
1537 #ifdef EM_MULTIQUEUE
1538                 if (!drbr_empty(ifp, txr->br))
1539                         em_mq_start_locked(ifp, txr, NULL);
1540 #else
1541                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1542                         em_start_locked(ifp, txr);
1543 #endif
1544                 EM_TX_UNLOCK(txr);
1545                 if (more) {
1546                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1547                         return;
1548                 }
1549         }
1550
1551         em_enable_intr(adapter);
1552         return;
1553 }
1554
1555
1556 /*********************************************************************
1557  *
1558  *  MSIX Interrupt Service Routines
1559  *
1560  **********************************************************************/
1561 static void
1562 em_msix_tx(void *arg)
1563 {
1564         struct tx_ring *txr = arg;
1565         struct adapter *adapter = txr->adapter;
1566         struct ifnet    *ifp = adapter->ifp;
1567
1568         ++txr->tx_irq;
1569         EM_TX_LOCK(txr);
1570         em_txeof(txr);
1571 #ifdef EM_MULTIQUEUE
1572         if (!drbr_empty(ifp, txr->br))
1573                 em_mq_start_locked(ifp, txr, NULL);
1574 #else
1575         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1576                 em_start_locked(ifp, txr);
1577 #endif
1578         /* Reenable this interrupt */
1579         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1580         EM_TX_UNLOCK(txr);
1581         return;
1582 }
1583
1584 /*********************************************************************
1585  *
1586  *  MSIX RX Interrupt Service routine
1587  *
1588  **********************************************************************/
1589
1590 static void
1591 em_msix_rx(void *arg)
1592 {
1593         struct rx_ring  *rxr = arg;
1594         struct adapter  *adapter = rxr->adapter;
1595         bool            more;
1596
1597         ++rxr->rx_irq;
1598         if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1599                 return;
1600         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1601         if (more)
1602                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1603         else
1604                 /* Reenable this interrupt */
1605                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1606         return;
1607 }
1608
1609 /*********************************************************************
1610  *
1611  *  MSIX Link Fast Interrupt Service routine
1612  *
1613  **********************************************************************/
1614 static void
1615 em_msix_link(void *arg)
1616 {
1617         struct adapter  *adapter = arg;
1618         u32             reg_icr;
1619
1620         ++adapter->link_irq;
1621         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1622
1623         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1624                 adapter->hw.mac.get_link_status = 1;
1625                 em_handle_link(adapter, 0);
1626         } else
1627                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1628                     EM_MSIX_LINK | E1000_IMS_LSC);
1629         return;
1630 }
1631
1632 static void
1633 em_handle_rx(void *context, int pending)
1634 {
1635         struct rx_ring  *rxr = context;
1636         struct adapter  *adapter = rxr->adapter;
1637         bool            more;
1638
1639         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1640         if (more)
1641                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1642         else
1643                 /* Reenable this interrupt */
1644                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1645 }
1646
1647 static void
1648 em_handle_tx(void *context, int pending)
1649 {
1650         struct tx_ring  *txr = context;
1651         struct adapter  *adapter = txr->adapter;
1652         struct ifnet    *ifp = adapter->ifp;
1653
1654         EM_TX_LOCK(txr);
1655         em_txeof(txr);
1656 #ifdef EM_MULTIQUEUE
1657         if (!drbr_empty(ifp, txr->br))
1658                 em_mq_start_locked(ifp, txr, NULL);
1659 #else
1660         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1661                 em_start_locked(ifp, txr);
1662 #endif
1663         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1664         EM_TX_UNLOCK(txr);
1665 }
1666
1667 static void
1668 em_handle_link(void *context, int pending)
1669 {
1670         struct adapter  *adapter = context;
1671         struct tx_ring  *txr = adapter->tx_rings;
1672         struct ifnet *ifp = adapter->ifp;
1673
1674         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1675                 return;
1676
1677         EM_CORE_LOCK(adapter);
1678         callout_stop(&adapter->timer);
1679         em_update_link_status(adapter);
1680         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1681         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1682             EM_MSIX_LINK | E1000_IMS_LSC);
1683         if (adapter->link_active) {
1684                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1685                         EM_TX_LOCK(txr);
1686 #ifdef EM_MULTIQUEUE
1687                         if (!drbr_empty(ifp, txr->br))
1688                                 em_mq_start_locked(ifp, txr, NULL);
1689 #else
1690                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1691                                 em_start_locked(ifp, txr);
1692 #endif
1693                         EM_TX_UNLOCK(txr);
1694                 }
1695         }
1696         EM_CORE_UNLOCK(adapter);
1697 }
1698
1699
1700 /*********************************************************************
1701  *
1702  *  Media Ioctl callback
1703  *
1704  *  This routine is called whenever the user queries the status of
1705  *  the interface using ifconfig.
1706  *
1707  **********************************************************************/
1708 static void
1709 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1710 {
1711         struct adapter *adapter = ifp->if_softc;
1712         u_char fiber_type = IFM_1000_SX;
1713
1714         INIT_DEBUGOUT("em_media_status: begin");
1715
1716         EM_CORE_LOCK(adapter);
1717         em_update_link_status(adapter);
1718
1719         ifmr->ifm_status = IFM_AVALID;
1720         ifmr->ifm_active = IFM_ETHER;
1721
1722         if (!adapter->link_active) {
1723                 EM_CORE_UNLOCK(adapter);
1724                 return;
1725         }
1726
1727         ifmr->ifm_status |= IFM_ACTIVE;
1728
1729         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1730             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1731                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1732         } else {
1733                 switch (adapter->link_speed) {
1734                 case 10:
1735                         ifmr->ifm_active |= IFM_10_T;
1736                         break;
1737                 case 100:
1738                         ifmr->ifm_active |= IFM_100_TX;
1739                         break;
1740                 case 1000:
1741                         ifmr->ifm_active |= IFM_1000_T;
1742                         break;
1743                 }
1744                 if (adapter->link_duplex == FULL_DUPLEX)
1745                         ifmr->ifm_active |= IFM_FDX;
1746                 else
1747                         ifmr->ifm_active |= IFM_HDX;
1748         }
1749         EM_CORE_UNLOCK(adapter);
1750 }
1751
1752 /*********************************************************************
1753  *
1754  *  Media Ioctl callback
1755  *
1756  *  This routine is called when the user changes speed/duplex using
1757  *  media/mediopt option with ifconfig.
1758  *
1759  **********************************************************************/
1760 static int
1761 em_media_change(struct ifnet *ifp)
1762 {
1763         struct adapter *adapter = ifp->if_softc;
1764         struct ifmedia  *ifm = &adapter->media;
1765
1766         INIT_DEBUGOUT("em_media_change: begin");
1767
1768         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1769                 return (EINVAL);
1770
1771         EM_CORE_LOCK(adapter);
1772         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1773         case IFM_AUTO:
1774                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1775                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1776                 break;
1777         case IFM_1000_LX:
1778         case IFM_1000_SX:
1779         case IFM_1000_T:
1780                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1781                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1782                 break;
1783         case IFM_100_TX:
1784                 adapter->hw.mac.autoneg = FALSE;
1785                 adapter->hw.phy.autoneg_advertised = 0;
1786                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1787                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1788                 else
1789                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1790                 break;
1791         case IFM_10_T:
1792                 adapter->hw.mac.autoneg = FALSE;
1793                 adapter->hw.phy.autoneg_advertised = 0;
1794                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1795                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1796                 else
1797                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1798                 break;
1799         default:
1800                 device_printf(adapter->dev, "Unsupported media type\n");
1801         }
1802
1803         em_init_locked(adapter);
1804         EM_CORE_UNLOCK(adapter);
1805
1806         return (0);
1807 }
1808
1809 /*********************************************************************
1810  *
1811  *  This routine maps the mbufs to tx descriptors.
1812  *
1813  *  return 0 on success, positive on failure
1814  **********************************************************************/
1815
1816 static int
1817 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1818 {
1819         struct adapter          *adapter = txr->adapter;
1820         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1821         bus_dmamap_t            map;
1822         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1823         struct e1000_tx_desc    *ctxd = NULL;
1824         struct mbuf             *m_head;
1825         struct ether_header     *eh;
1826         struct ip               *ip = NULL;
1827         struct tcphdr           *tp = NULL;
1828         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1829         int                     ip_off, poff;
1830         int                     nsegs, i, j, first, last = 0;
1831         int                     error, do_tso, tso_desc = 0, remap = 1;
1832
1833         m_head = *m_headp;
1834         txd_upper = txd_lower = txd_used = txd_saved = 0;
1835         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1836         ip_off = poff = 0;
1837
1838         /*
1839          * Intel recommends entire IP/TCP header length reside in a single
1840          * buffer. If multiple descriptors are used to describe the IP and
1841          * TCP header, each descriptor should describe one or more
1842          * complete headers; descriptors referencing only parts of headers
1843          * are not supported. If all layer headers are not coalesced into
1844          * a single buffer, each buffer should not cross a 4KB boundary,
1845          * or be larger than the maximum read request size.
1846          * Controller also requires modifing IP/TCP header to make TSO work
1847          * so we firstly get a writable mbuf chain then coalesce ethernet/
1848          * IP/TCP header into a single buffer to meet the requirement of
1849          * controller. This also simplifies IP/TCP/UDP checksum offloading
1850          * which also has similiar restrictions.
1851          */
1852         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1853                 if (do_tso || (m_head->m_next != NULL && 
1854                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1855                         if (M_WRITABLE(*m_headp) == 0) {
1856                                 m_head = m_dup(*m_headp, M_NOWAIT);
1857                                 m_freem(*m_headp);
1858                                 if (m_head == NULL) {
1859                                         *m_headp = NULL;
1860                                         return (ENOBUFS);
1861                                 }
1862                                 *m_headp = m_head;
1863                         }
1864                 }
1865                 /*
1866                  * XXX
1867                  * Assume IPv4, we don't have TSO/checksum offload support
1868                  * for IPv6 yet.
1869                  */
1870                 ip_off = sizeof(struct ether_header);
1871                 m_head = m_pullup(m_head, ip_off);
1872                 if (m_head == NULL) {
1873                         *m_headp = NULL;
1874                         return (ENOBUFS);
1875                 }
1876                 eh = mtod(m_head, struct ether_header *);
1877                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1878                         ip_off = sizeof(struct ether_vlan_header);
1879                         m_head = m_pullup(m_head, ip_off);
1880                         if (m_head == NULL) {
1881                                 *m_headp = NULL;
1882                                 return (ENOBUFS);
1883                         }
1884                 }
1885                 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1886                 if (m_head == NULL) {
1887                         *m_headp = NULL;
1888                         return (ENOBUFS);
1889                 }
1890                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1891                 poff = ip_off + (ip->ip_hl << 2);
1892                 if (do_tso) {
1893                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1894                         if (m_head == NULL) {
1895                                 *m_headp = NULL;
1896                                 return (ENOBUFS);
1897                         }
1898                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1899                         /*
1900                          * TSO workaround:
1901                          *   pull 4 more bytes of data into it.
1902                          */
1903                         m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1904                         if (m_head == NULL) {
1905                                 *m_headp = NULL;
1906                                 return (ENOBUFS);
1907                         }
1908                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1909                         ip->ip_len = 0;
1910                         ip->ip_sum = 0;
1911                         /*
1912                          * The pseudo TCP checksum does not include TCP payload
1913                          * length so driver should recompute the checksum here
1914                          * what hardware expect to see. This is adherence of
1915                          * Microsoft's Large Send specification.
1916                          */
1917                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1918                         tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1919                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1920                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1921                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1922                         if (m_head == NULL) {
1923                                 *m_headp = NULL;
1924                                 return (ENOBUFS);
1925                         }
1926                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1927                         m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1928                         if (m_head == NULL) {
1929                                 *m_headp = NULL;
1930                                 return (ENOBUFS);
1931                         }
1932                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1933                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1934                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1935                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1936                         if (m_head == NULL) {
1937                                 *m_headp = NULL;
1938                                 return (ENOBUFS);
1939                         }
1940                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1941                 }
1942                 *m_headp = m_head;
1943         }
1944
1945         /*
1946          * Map the packet for DMA
1947          *
1948          * Capture the first descriptor index,
1949          * this descriptor will have the index
1950          * of the EOP which is the only one that
1951          * now gets a DONE bit writeback.
1952          */
1953         first = txr->next_avail_desc;
1954         tx_buffer = &txr->tx_buffers[first];
1955         tx_buffer_mapped = tx_buffer;
1956         map = tx_buffer->map;
1957
1958 retry:
1959         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1960             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1961
1962         /*
1963          * There are two types of errors we can (try) to handle:
1964          * - EFBIG means the mbuf chain was too long and bus_dma ran
1965          *   out of segments.  Defragment the mbuf chain and try again.
1966          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1967          *   at this point in time.  Defer sending and try again later.
1968          * All other errors, in particular EINVAL, are fatal and prevent the
1969          * mbuf chain from ever going through.  Drop it and report error.
1970          */
1971         if (error == EFBIG && remap) {
1972                 struct mbuf *m;
1973
1974                 m = m_defrag(*m_headp, M_NOWAIT);
1975                 if (m == NULL) {
1976                         adapter->mbuf_alloc_failed++;
1977                         m_freem(*m_headp);
1978                         *m_headp = NULL;
1979                         return (ENOBUFS);
1980                 }
1981                 *m_headp = m;
1982
1983                 /* Try it again, but only once */
1984                 remap = 0;
1985                 goto retry;
1986         } else if (error == ENOMEM) {
1987                 adapter->no_tx_dma_setup++;
1988                 return (error);
1989         } else if (error != 0) {
1990                 adapter->no_tx_dma_setup++;
1991                 m_freem(*m_headp);
1992                 *m_headp = NULL;
1993                 return (error);
1994         }
1995
1996         /*
1997          * TSO Hardware workaround, if this packet is not
1998          * TSO, and is only a single descriptor long, and
1999          * it follows a TSO burst, then we need to add a
2000          * sentinel descriptor to prevent premature writeback.
2001          */
2002         if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
2003                 if (nsegs == 1)
2004                         tso_desc = TRUE;
2005                 txr->tx_tso = FALSE;
2006         }
2007
2008         if (nsegs > (txr->tx_avail - 2)) {
2009                 txr->no_desc_avail++;
2010                 bus_dmamap_unload(txr->txtag, map);
2011                 return (ENOBUFS);
2012         }
2013         m_head = *m_headp;
2014
2015         /* Do hardware assists */
2016         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2017                 em_tso_setup(txr, m_head, ip_off, ip, tp,
2018                     &txd_upper, &txd_lower);
2019                 /* we need to make a final sentinel transmit desc */
2020                 tso_desc = TRUE;
2021         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2022                 em_transmit_checksum_setup(txr, m_head,
2023                     ip_off, ip, &txd_upper, &txd_lower);
2024
2025         if (m_head->m_flags & M_VLANTAG) {
2026                 /* Set the vlan id. */
2027                 txd_upper |=
2028                     (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2029                 /* Tell hardware to add tag */
2030                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2031         }
2032
2033         i = txr->next_avail_desc;
2034
2035         /* Set up our transmit descriptors */
2036         for (j = 0; j < nsegs; j++) {
2037                 bus_size_t seg_len;
2038                 bus_addr_t seg_addr;
2039
2040                 tx_buffer = &txr->tx_buffers[i];
2041                 ctxd = &txr->tx_base[i];
2042                 seg_addr = segs[j].ds_addr;
2043                 seg_len  = segs[j].ds_len;
2044                 /*
2045                 ** TSO Workaround:
2046                 ** If this is the last descriptor, we want to
2047                 ** split it so we have a small final sentinel
2048                 */
2049                 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2050                         seg_len -= 4;
2051                         ctxd->buffer_addr = htole64(seg_addr);
2052                         ctxd->lower.data = htole32(
2053                         adapter->txd_cmd | txd_lower | seg_len);
2054                         ctxd->upper.data =
2055                             htole32(txd_upper);
2056                         if (++i == adapter->num_tx_desc)
2057                                 i = 0;
2058                         /* Now make the sentinel */     
2059                         ++txd_used; /* using an extra txd */
2060                         ctxd = &txr->tx_base[i];
2061                         tx_buffer = &txr->tx_buffers[i];
2062                         ctxd->buffer_addr =
2063                             htole64(seg_addr + seg_len);
2064                         ctxd->lower.data = htole32(
2065                         adapter->txd_cmd | txd_lower | 4);
2066                         ctxd->upper.data =
2067                             htole32(txd_upper);
2068                         last = i;
2069                         if (++i == adapter->num_tx_desc)
2070                                 i = 0;
2071                 } else {
2072                         ctxd->buffer_addr = htole64(seg_addr);
2073                         ctxd->lower.data = htole32(
2074                         adapter->txd_cmd | txd_lower | seg_len);
2075                         ctxd->upper.data =
2076                             htole32(txd_upper);
2077                         last = i;
2078                         if (++i == adapter->num_tx_desc)
2079                                 i = 0;
2080                 }
2081                 tx_buffer->m_head = NULL;
2082                 tx_buffer->next_eop = -1;
2083         }
2084
2085         txr->next_avail_desc = i;
2086         txr->tx_avail -= nsegs;
2087         if (tso_desc) /* TSO used an extra for sentinel */
2088                 txr->tx_avail -= txd_used;
2089
2090         tx_buffer->m_head = m_head;
2091         /*
2092         ** Here we swap the map so the last descriptor,
2093         ** which gets the completion interrupt has the
2094         ** real map, and the first descriptor gets the
2095         ** unused map from this descriptor.
2096         */
2097         tx_buffer_mapped->map = tx_buffer->map;
2098         tx_buffer->map = map;
2099         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2100
2101         /*
2102          * Last Descriptor of Packet
2103          * needs End Of Packet (EOP)
2104          * and Report Status (RS)
2105          */
2106         ctxd->lower.data |=
2107             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2108         /*
2109          * Keep track in the first buffer which
2110          * descriptor will be written back
2111          */
2112         tx_buffer = &txr->tx_buffers[first];
2113         tx_buffer->next_eop = last;
2114         /* Update the watchdog time early and often */
2115         txr->watchdog_time = ticks;
2116
2117         /*
2118          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2119          * that this frame is available to transmit.
2120          */
2121         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2122             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2123         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2124
2125         return (0);
2126 }
2127
2128 static void
2129 em_set_promisc(struct adapter *adapter)
2130 {
2131         struct ifnet    *ifp = adapter->ifp;
2132         u32             reg_rctl;
2133
2134         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2135
2136         if (ifp->if_flags & IFF_PROMISC) {
2137                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2138                 /* Turn this on if you want to see bad packets */
2139                 if (em_debug_sbp)
2140                         reg_rctl |= E1000_RCTL_SBP;
2141                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2142         } else if (ifp->if_flags & IFF_ALLMULTI) {
2143                 reg_rctl |= E1000_RCTL_MPE;
2144                 reg_rctl &= ~E1000_RCTL_UPE;
2145                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2146         }
2147 }
2148
2149 static void
2150 em_disable_promisc(struct adapter *adapter)
2151 {
2152         struct ifnet    *ifp = adapter->ifp;
2153         u32             reg_rctl;
2154         int             mcnt = 0;
2155
2156         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2157         reg_rctl &=  (~E1000_RCTL_UPE);
2158         if (ifp->if_flags & IFF_ALLMULTI)
2159                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2160         else {
2161                 struct  ifmultiaddr *ifma;
2162 #if __FreeBSD_version < 800000
2163                 IF_ADDR_LOCK(ifp);
2164 #else   
2165                 if_maddr_rlock(ifp);
2166 #endif
2167                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2168                         if (ifma->ifma_addr->sa_family != AF_LINK)
2169                                 continue;
2170                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2171                                 break;
2172                         mcnt++;
2173                 }
2174 #if __FreeBSD_version < 800000
2175                 IF_ADDR_UNLOCK(ifp);
2176 #else
2177                 if_maddr_runlock(ifp);
2178 #endif
2179         }
2180         /* Don't disable if in MAX groups */
2181         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2182                 reg_rctl &=  (~E1000_RCTL_MPE);
2183         reg_rctl &=  (~E1000_RCTL_SBP);
2184         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2185 }
2186
2187
2188 /*********************************************************************
2189  *  Multicast Update
2190  *
2191  *  This routine is called whenever multicast address list is updated.
2192  *
2193  **********************************************************************/
2194
2195 static void
2196 em_set_multi(struct adapter *adapter)
2197 {
2198         struct ifnet    *ifp = adapter->ifp;
2199         struct ifmultiaddr *ifma;
2200         u32 reg_rctl = 0;
2201         u8  *mta; /* Multicast array memory */
2202         int mcnt = 0;
2203
2204         IOCTL_DEBUGOUT("em_set_multi: begin");
2205
2206         mta = adapter->mta;
2207         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2208
2209         if (adapter->hw.mac.type == e1000_82542 && 
2210             adapter->hw.revision_id == E1000_REVISION_2) {
2211                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2212                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2213                         e1000_pci_clear_mwi(&adapter->hw);
2214                 reg_rctl |= E1000_RCTL_RST;
2215                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2216                 msec_delay(5);
2217         }
2218
2219 #if __FreeBSD_version < 800000
2220         IF_ADDR_LOCK(ifp);
2221 #else
2222         if_maddr_rlock(ifp);
2223 #endif
2224         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2225                 if (ifma->ifma_addr->sa_family != AF_LINK)
2226                         continue;
2227
2228                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2229                         break;
2230
2231                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2232                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2233                 mcnt++;
2234         }
2235 #if __FreeBSD_version < 800000
2236         IF_ADDR_UNLOCK(ifp);
2237 #else
2238         if_maddr_runlock(ifp);
2239 #endif
2240         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2241                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2242                 reg_rctl |= E1000_RCTL_MPE;
2243                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2244         } else
2245                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2246
2247         if (adapter->hw.mac.type == e1000_82542 && 
2248             adapter->hw.revision_id == E1000_REVISION_2) {
2249                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2250                 reg_rctl &= ~E1000_RCTL_RST;
2251                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2252                 msec_delay(5);
2253                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2254                         e1000_pci_set_mwi(&adapter->hw);
2255         }
2256 }
2257
2258
2259 /*********************************************************************
2260  *  Timer routine
2261  *
2262  *  This routine checks for link status and updates statistics.
2263  *
2264  **********************************************************************/
2265
2266 static void
2267 em_local_timer(void *arg)
2268 {
2269         struct adapter  *adapter = arg;
2270         struct ifnet    *ifp = adapter->ifp;
2271         struct tx_ring  *txr = adapter->tx_rings;
2272         struct rx_ring  *rxr = adapter->rx_rings;
2273         u32             trigger;
2274
2275         EM_CORE_LOCK_ASSERT(adapter);
2276
2277         em_update_link_status(adapter);
2278         em_update_stats_counters(adapter);
2279
2280         /* Reset LAA into RAR[0] on 82571 */
2281         if ((adapter->hw.mac.type == e1000_82571) &&
2282             e1000_get_laa_state_82571(&adapter->hw))
2283                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2284
2285         /* Mask to use in the irq trigger */
2286         if (adapter->msix_mem)
2287                 trigger = rxr->ims;
2288         else
2289                 trigger = E1000_ICS_RXDMT0;
2290
2291         /*
2292         ** Check on the state of the TX queue(s), this 
2293         ** can be done without the lock because its RO
2294         ** and the HUNG state will be static if set.
2295         */
2296         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2297                 if ((txr->queue_status == EM_QUEUE_HUNG) &&
2298                     (adapter->pause_frames == 0))
2299                         goto hung;
2300                 /* Schedule a TX tasklet if needed */
2301                 if (txr->tx_avail <= EM_MAX_SCATTER)
2302                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2303         }
2304         
2305         adapter->pause_frames = 0;
2306         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2307 #ifndef DEVICE_POLLING
2308         /* Trigger an RX interrupt to guarantee mbuf refresh */
2309         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2310 #endif
2311         return;
2312 hung:
2313         /* Looks like we're hung */
2314         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2315         device_printf(adapter->dev,
2316             "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2317             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2318             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2319         device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2320             "Next TX to Clean = %d\n",
2321             txr->me, txr->tx_avail, txr->next_to_clean);
2322         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2323         adapter->watchdog_events++;
2324         adapter->pause_frames = 0;
2325         em_init_locked(adapter);
2326 }
2327
2328
2329 static void
2330 em_update_link_status(struct adapter *adapter)
2331 {
2332         struct e1000_hw *hw = &adapter->hw;
2333         struct ifnet *ifp = adapter->ifp;
2334         device_t dev = adapter->dev;
2335         struct tx_ring *txr = adapter->tx_rings;
2336         u32 link_check = 0;
2337
2338         /* Get the cached link value or read phy for real */
2339         switch (hw->phy.media_type) {
2340         case e1000_media_type_copper:
2341                 if (hw->mac.get_link_status) {
2342                         /* Do the work to read phy */
2343                         e1000_check_for_link(hw);
2344                         link_check = !hw->mac.get_link_status;
2345                         if (link_check) /* ESB2 fix */
2346                                 e1000_cfg_on_link_up(hw);
2347                 } else
2348                         link_check = TRUE;
2349                 break;
2350         case e1000_media_type_fiber:
2351                 e1000_check_for_link(hw);
2352                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2353                                  E1000_STATUS_LU);
2354                 break;
2355         case e1000_media_type_internal_serdes:
2356                 e1000_check_for_link(hw);
2357                 link_check = adapter->hw.mac.serdes_has_link;
2358                 break;
2359         default:
2360         case e1000_media_type_unknown:
2361                 break;
2362         }
2363
2364         /* Now check for a transition */
2365         if (link_check && (adapter->link_active == 0)) {
2366                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2367                     &adapter->link_duplex);
2368                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2369                 if ((adapter->link_speed != SPEED_1000) &&
2370                     ((hw->mac.type == e1000_82571) ||
2371                     (hw->mac.type == e1000_82572))) {
2372                         int tarc0;
2373                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2374                         tarc0 &= ~SPEED_MODE_BIT;
2375                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2376                 }
2377                 if (bootverbose)
2378                         device_printf(dev, "Link is up %d Mbps %s\n",
2379                             adapter->link_speed,
2380                             ((adapter->link_duplex == FULL_DUPLEX) ?
2381                             "Full Duplex" : "Half Duplex"));
2382                 adapter->link_active = 1;
2383                 adapter->smartspeed = 0;
2384                 ifp->if_baudrate = adapter->link_speed * 1000000;
2385                 if_link_state_change(ifp, LINK_STATE_UP);
2386         } else if (!link_check && (adapter->link_active == 1)) {
2387                 ifp->if_baudrate = adapter->link_speed = 0;
2388                 adapter->link_duplex = 0;
2389                 if (bootverbose)
2390                         device_printf(dev, "Link is Down\n");
2391                 adapter->link_active = 0;
2392                 /* Link down, disable watchdog */
2393                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2394                         txr->queue_status = EM_QUEUE_IDLE;
2395                 if_link_state_change(ifp, LINK_STATE_DOWN);
2396         }
2397 }
2398
2399 /*********************************************************************
2400  *
2401  *  This routine disables all traffic on the adapter by issuing a
2402  *  global reset on the MAC and deallocates TX/RX buffers.
2403  *
2404  *  This routine should always be called with BOTH the CORE
2405  *  and TX locks.
2406  **********************************************************************/
2407
2408 static void
2409 em_stop(void *arg)
2410 {
2411         struct adapter  *adapter = arg;
2412         struct ifnet    *ifp = adapter->ifp;
2413         struct tx_ring  *txr = adapter->tx_rings;
2414
2415         EM_CORE_LOCK_ASSERT(adapter);
2416
2417         INIT_DEBUGOUT("em_stop: begin");
2418
2419         em_disable_intr(adapter);
2420         callout_stop(&adapter->timer);
2421
2422         /* Tell the stack that the interface is no longer active */
2423         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2424         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2425
2426         /* Unarm watchdog timer. */
2427         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2428                 EM_TX_LOCK(txr);
2429                 txr->queue_status = EM_QUEUE_IDLE;
2430                 EM_TX_UNLOCK(txr);
2431         }
2432
2433         e1000_reset_hw(&adapter->hw);
2434         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2435
2436         e1000_led_off(&adapter->hw);
2437         e1000_cleanup_led(&adapter->hw);
2438 }
2439
2440
2441 /*********************************************************************
2442  *
2443  *  Determine hardware revision.
2444  *
2445  **********************************************************************/
2446 static void
2447 em_identify_hardware(struct adapter *adapter)
2448 {
2449         device_t dev = adapter->dev;
2450
2451         /* Make sure our PCI config space has the necessary stuff set */
2452         pci_enable_busmaster(dev);
2453         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2454
2455         /* Save off the information about this board */
2456         adapter->hw.vendor_id = pci_get_vendor(dev);
2457         adapter->hw.device_id = pci_get_device(dev);
2458         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2459         adapter->hw.subsystem_vendor_id =
2460             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2461         adapter->hw.subsystem_device_id =
2462             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2463
2464         /* Do Shared Code Init and Setup */
2465         if (e1000_set_mac_type(&adapter->hw)) {
2466                 device_printf(dev, "Setup init failure\n");
2467                 return;
2468         }
2469 }
2470
2471 static int
2472 em_allocate_pci_resources(struct adapter *adapter)
2473 {
2474         device_t        dev = adapter->dev;
2475         int             rid;
2476
2477         rid = PCIR_BAR(0);
2478         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2479             &rid, RF_ACTIVE);
2480         if (adapter->memory == NULL) {
2481                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2482                 return (ENXIO);
2483         }
2484         adapter->osdep.mem_bus_space_tag =
2485             rman_get_bustag(adapter->memory);
2486         adapter->osdep.mem_bus_space_handle =
2487             rman_get_bushandle(adapter->memory);
2488         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2489
2490         /* Default to a single queue */
2491         adapter->num_queues = 1;
2492
2493         /*
2494          * Setup MSI/X or MSI if PCI Express
2495          */
2496         adapter->msix = em_setup_msix(adapter);
2497
2498         adapter->hw.back = &adapter->osdep;
2499
2500         return (0);
2501 }
2502
2503 /*********************************************************************
2504  *
2505  *  Setup the Legacy or MSI Interrupt handler
2506  *
2507  **********************************************************************/
2508 int
2509 em_allocate_legacy(struct adapter *adapter)
2510 {
2511         device_t dev = adapter->dev;
2512         struct tx_ring  *txr = adapter->tx_rings;
2513         int error, rid = 0;
2514
2515         /* Manually turn off all interrupts */
2516         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2517
2518         if (adapter->msix == 1) /* using MSI */
2519                 rid = 1;
2520         /* We allocate a single interrupt resource */
2521         adapter->res = bus_alloc_resource_any(dev,
2522             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2523         if (adapter->res == NULL) {
2524                 device_printf(dev, "Unable to allocate bus resource: "
2525                     "interrupt\n");
2526                 return (ENXIO);
2527         }
2528
2529         /*
2530          * Allocate a fast interrupt and the associated
2531          * deferred processing contexts.
2532          */
2533         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2534         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2535             taskqueue_thread_enqueue, &adapter->tq);
2536         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2537             device_get_nameunit(adapter->dev));
2538         /* Use a TX only tasklet for local timer */
2539         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2540         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2541             taskqueue_thread_enqueue, &txr->tq);
2542         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2543             device_get_nameunit(adapter->dev));
2544         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2545         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2546             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2547                 device_printf(dev, "Failed to register fast interrupt "
2548                             "handler: %d\n", error);
2549                 taskqueue_free(adapter->tq);
2550                 adapter->tq = NULL;
2551                 return (error);
2552         }
2553         
2554         return (0);
2555 }
2556
2557 /*********************************************************************
2558  *
2559  *  Setup the MSIX Interrupt handlers
2560  *   This is not really Multiqueue, rather
2561  *   its just seperate interrupt vectors
2562  *   for TX, RX, and Link.
2563  *
2564  **********************************************************************/
2565 int
2566 em_allocate_msix(struct adapter *adapter)
2567 {
2568         device_t        dev = adapter->dev;
2569         struct          tx_ring *txr = adapter->tx_rings;
2570         struct          rx_ring *rxr = adapter->rx_rings;
2571         int             error, rid, vector = 0;
2572
2573
2574         /* Make sure all interrupts are disabled */
2575         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2576
2577         /* First set up ring resources */
2578         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2579
2580                 /* RX ring */
2581                 rid = vector + 1;
2582
2583                 rxr->res = bus_alloc_resource_any(dev,
2584                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2585                 if (rxr->res == NULL) {
2586                         device_printf(dev,
2587                             "Unable to allocate bus resource: "
2588                             "RX MSIX Interrupt %d\n", i);
2589                         return (ENXIO);
2590                 }
2591                 if ((error = bus_setup_intr(dev, rxr->res,
2592                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2593                     rxr, &rxr->tag)) != 0) {
2594                         device_printf(dev, "Failed to register RX handler");
2595                         return (error);
2596                 }
2597 #if __FreeBSD_version >= 800504
2598                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2599 #endif
2600                 rxr->msix = vector++; /* NOTE increment vector for TX */
2601                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2602                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2603                     taskqueue_thread_enqueue, &rxr->tq);
2604                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2605                     device_get_nameunit(adapter->dev));
2606                 /*
2607                 ** Set the bit to enable interrupt
2608                 ** in E1000_IMS -- bits 20 and 21
2609                 ** are for RX0 and RX1, note this has
2610                 ** NOTHING to do with the MSIX vector
2611                 */
2612                 rxr->ims = 1 << (20 + i);
2613                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2614
2615                 /* TX ring */
2616                 rid = vector + 1;
2617                 txr->res = bus_alloc_resource_any(dev,
2618                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2619                 if (txr->res == NULL) {
2620                         device_printf(dev,
2621                             "Unable to allocate bus resource: "
2622                             "TX MSIX Interrupt %d\n", i);
2623                         return (ENXIO);
2624                 }
2625                 if ((error = bus_setup_intr(dev, txr->res,
2626                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2627                     txr, &txr->tag)) != 0) {
2628                         device_printf(dev, "Failed to register TX handler");
2629                         return (error);
2630                 }
2631 #if __FreeBSD_version >= 800504
2632                 bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2633 #endif
2634                 txr->msix = vector++; /* Increment vector for next pass */
2635                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2636                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2637                     taskqueue_thread_enqueue, &txr->tq);
2638                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2639                     device_get_nameunit(adapter->dev));
2640                 /*
2641                 ** Set the bit to enable interrupt
2642                 ** in E1000_IMS -- bits 22 and 23
2643                 ** are for TX0 and TX1, note this has
2644                 ** NOTHING to do with the MSIX vector
2645                 */
2646                 txr->ims = 1 << (22 + i);
2647                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2648         }
2649
2650         /* Link interrupt */
2651         ++rid;
2652         adapter->res = bus_alloc_resource_any(dev,
2653             SYS_RES_IRQ, &rid, RF_ACTIVE);
2654         if (!adapter->res) {
2655                 device_printf(dev,"Unable to allocate "
2656                     "bus resource: Link interrupt [%d]\n", rid);
2657                 return (ENXIO);
2658         }
2659         /* Set the link handler function */
2660         error = bus_setup_intr(dev, adapter->res,
2661             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2662             em_msix_link, adapter, &adapter->tag);
2663         if (error) {
2664                 adapter->res = NULL;
2665                 device_printf(dev, "Failed to register LINK handler");
2666                 return (error);
2667         }
2668 #if __FreeBSD_version >= 800504
2669                 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2670 #endif
2671         adapter->linkvec = vector;
2672         adapter->ivars |=  (8 | vector) << 16;
2673         adapter->ivars |= 0x80000000;
2674
2675         return (0);
2676 }
2677
2678
2679 static void
2680 em_free_pci_resources(struct adapter *adapter)
2681 {
2682         device_t        dev = adapter->dev;
2683         struct tx_ring  *txr;
2684         struct rx_ring  *rxr;
2685         int             rid;
2686
2687
2688         /*
2689         ** Release all the queue interrupt resources:
2690         */
2691         for (int i = 0; i < adapter->num_queues; i++) {
2692                 txr = &adapter->tx_rings[i];
2693                 rxr = &adapter->rx_rings[i];
2694                 /* an early abort? */
2695                 if ((txr == NULL) || (rxr == NULL))
2696                         break;
2697                 rid = txr->msix +1;
2698                 if (txr->tag != NULL) {
2699                         bus_teardown_intr(dev, txr->res, txr->tag);
2700                         txr->tag = NULL;
2701                 }
2702                 if (txr->res != NULL)
2703                         bus_release_resource(dev, SYS_RES_IRQ,
2704                             rid, txr->res);
2705                 rid = rxr->msix +1;
2706                 if (rxr->tag != NULL) {
2707                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2708                         rxr->tag = NULL;
2709                 }
2710                 if (rxr->res != NULL)
2711                         bus_release_resource(dev, SYS_RES_IRQ,
2712                             rid, rxr->res);
2713         }
2714
2715         if (adapter->linkvec) /* we are doing MSIX */
2716                 rid = adapter->linkvec + 1;
2717         else
2718                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2719
2720         if (adapter->tag != NULL) {
2721                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2722                 adapter->tag = NULL;
2723         }
2724
2725         if (adapter->res != NULL)
2726                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2727
2728
2729         if (adapter->msix)
2730                 pci_release_msi(dev);
2731
2732         if (adapter->msix_mem != NULL)
2733                 bus_release_resource(dev, SYS_RES_MEMORY,
2734                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2735
2736         if (adapter->memory != NULL)
2737                 bus_release_resource(dev, SYS_RES_MEMORY,
2738                     PCIR_BAR(0), adapter->memory);
2739
2740         if (adapter->flash != NULL)
2741                 bus_release_resource(dev, SYS_RES_MEMORY,
2742                     EM_FLASH, adapter->flash);
2743 }
2744
2745 /*
2746  * Setup MSI or MSI/X
2747  */
2748 static int
2749 em_setup_msix(struct adapter *adapter)
2750 {
2751         device_t dev = adapter->dev;
2752         int val;
2753
2754         /*
2755         ** Setup MSI/X for Hartwell: tests have shown
2756         ** use of two queues to be unstable, and to
2757         ** provide no great gain anyway, so we simply
2758         ** seperate the interrupts and use a single queue.
2759         */
2760         if ((adapter->hw.mac.type == e1000_82574) &&
2761             (em_enable_msix == TRUE)) {
2762                 /* Map the MSIX BAR */
2763                 int rid = PCIR_BAR(EM_MSIX_BAR);
2764                 adapter->msix_mem = bus_alloc_resource_any(dev,
2765                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2766                 if (adapter->msix_mem == NULL) {
2767                         /* May not be enabled */
2768                         device_printf(adapter->dev,
2769                             "Unable to map MSIX table \n");
2770                         goto msi;
2771                 }
2772                 val = pci_msix_count(dev); 
2773                 /* We only need/want 3 vectors */
2774                 if (val >= 3)
2775                         val = 3;
2776                 else {
2777                         device_printf(adapter->dev,
2778                             "MSIX: insufficient vectors, using MSI\n");
2779                         goto msi;
2780                 }
2781
2782                 if ((pci_alloc_msix(dev, &val) == 0) && (val == 3)) {
2783                         device_printf(adapter->dev,
2784                             "Using MSIX interrupts "
2785                             "with %d vectors\n", val);
2786                         return (val);
2787                 }
2788
2789                 /*
2790                 ** If MSIX alloc failed or provided us with
2791                 ** less than needed, free and fall through to MSI
2792                 */
2793                 pci_release_msi(dev);
2794         }
2795 msi:
2796         if (adapter->msix_mem != NULL) {
2797                 bus_release_resource(dev, SYS_RES_MEMORY,
2798                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2799                 adapter->msix_mem = NULL;
2800         }
2801         val = 1;
2802         if (pci_alloc_msi(dev, &val) == 0) {
2803                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2804                 return (val);
2805         } 
2806         /* Should only happen due to manual configuration */
2807         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2808         return (0);
2809 }
2810
2811
2812 /*********************************************************************
2813  *
2814  *  Initialize the hardware to a configuration
2815  *  as specified by the adapter structure.
2816  *
2817  **********************************************************************/
2818 static void
2819 em_reset(struct adapter *adapter)
2820 {
2821         device_t        dev = adapter->dev;
2822         struct ifnet    *ifp = adapter->ifp;
2823         struct e1000_hw *hw = &adapter->hw;
2824         u16             rx_buffer_size;
2825         u32             pba;
2826
2827         INIT_DEBUGOUT("em_reset: begin");
2828
2829         /* Set up smart power down as default off on newer adapters. */
2830         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2831             hw->mac.type == e1000_82572)) {
2832                 u16 phy_tmp = 0;
2833
2834                 /* Speed up time to link by disabling smart power down. */
2835                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2836                 phy_tmp &= ~IGP02E1000_PM_SPD;
2837                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2838         }
2839
2840         /*
2841          * Packet Buffer Allocation (PBA)
2842          * Writing PBA sets the receive portion of the buffer
2843          * the remainder is used for the transmit buffer.
2844          */
2845         switch (hw->mac.type) {
2846         /* Total Packet Buffer on these is 48K */
2847         case e1000_82571:
2848         case e1000_82572:
2849         case e1000_80003es2lan:
2850                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2851                 break;
2852         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2853                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2854                 break;
2855         case e1000_82574:
2856         case e1000_82583:
2857                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2858                 break;
2859         case e1000_ich8lan:
2860                 pba = E1000_PBA_8K;
2861                 break;
2862         case e1000_ich9lan:
2863         case e1000_ich10lan:
2864                 /* Boost Receive side for jumbo frames */
2865                 if (adapter->hw.mac.max_frame_size > 4096)
2866                         pba = E1000_PBA_14K;
2867                 else
2868                         pba = E1000_PBA_10K;
2869                 break;
2870         case e1000_pchlan:
2871         case e1000_pch2lan:
2872         case e1000_pch_lpt:
2873                 pba = E1000_PBA_26K;
2874                 break;
2875         default:
2876                 if (adapter->hw.mac.max_frame_size > 8192)
2877                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2878                 else
2879                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2880         }
2881         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2882
2883         /*
2884          * These parameters control the automatic generation (Tx) and
2885          * response (Rx) to Ethernet PAUSE frames.
2886          * - High water mark should allow for at least two frames to be
2887          *   received after sending an XOFF.
2888          * - Low water mark works best when it is very near the high water mark.
2889          *   This allows the receiver to restart by sending XON when it has
2890          *   drained a bit. Here we use an arbitary value of 1500 which will
2891          *   restart after one full frame is pulled from the buffer. There
2892          *   could be several smaller frames in the buffer and if so they will
2893          *   not trigger the XON until their total number reduces the buffer
2894          *   by 1500.
2895          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2896          */
2897         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2898         hw->fc.high_water = rx_buffer_size -
2899             roundup2(adapter->hw.mac.max_frame_size, 1024);
2900         hw->fc.low_water = hw->fc.high_water - 1500;
2901
2902         if (adapter->fc) /* locally set flow control value? */
2903                 hw->fc.requested_mode = adapter->fc;
2904         else
2905                 hw->fc.requested_mode = e1000_fc_full;
2906
2907         if (hw->mac.type == e1000_80003es2lan)
2908                 hw->fc.pause_time = 0xFFFF;
2909         else
2910                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2911
2912         hw->fc.send_xon = TRUE;
2913
2914         /* Device specific overrides/settings */
2915         switch (hw->mac.type) {
2916         case e1000_pchlan:
2917                 /* Workaround: no TX flow ctrl for PCH */
2918                 hw->fc.requested_mode = e1000_fc_rx_pause;
2919                 hw->fc.pause_time = 0xFFFF; /* override */
2920                 if (ifp->if_mtu > ETHERMTU) {
2921                         hw->fc.high_water = 0x3500;
2922                         hw->fc.low_water = 0x1500;
2923                 } else {
2924                         hw->fc.high_water = 0x5000;
2925                         hw->fc.low_water = 0x3000;
2926                 }
2927                 hw->fc.refresh_time = 0x1000;
2928                 break;
2929         case e1000_pch2lan:
2930         case e1000_pch_lpt:
2931                 hw->fc.high_water = 0x5C20;
2932                 hw->fc.low_water = 0x5048;
2933                 hw->fc.pause_time = 0x0650;
2934                 hw->fc.refresh_time = 0x0400;
2935                 /* Jumbos need adjusted PBA */
2936                 if (ifp->if_mtu > ETHERMTU)
2937                         E1000_WRITE_REG(hw, E1000_PBA, 12);
2938                 else
2939                         E1000_WRITE_REG(hw, E1000_PBA, 26);
2940                 break;
2941         case e1000_ich9lan:
2942         case e1000_ich10lan:
2943                 if (ifp->if_mtu > ETHERMTU) {
2944                         hw->fc.high_water = 0x2800;
2945                         hw->fc.low_water = hw->fc.high_water - 8;
2946                         break;
2947                 } 
2948                 /* else fall thru */
2949         default:
2950                 if (hw->mac.type == e1000_80003es2lan)
2951                         hw->fc.pause_time = 0xFFFF;
2952                 break;
2953         }
2954
2955         /* Issue a global reset */
2956         e1000_reset_hw(hw);
2957         E1000_WRITE_REG(hw, E1000_WUC, 0);
2958         em_disable_aspm(adapter);
2959         /* and a re-init */
2960         if (e1000_init_hw(hw) < 0) {
2961                 device_printf(dev, "Hardware Initialization Failed\n");
2962                 return;
2963         }
2964
2965         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2966         e1000_get_phy_info(hw);
2967         e1000_check_for_link(hw);
2968         return;
2969 }
2970
2971 /*********************************************************************
2972  *
2973  *  Setup networking device structure and register an interface.
2974  *
2975  **********************************************************************/
2976 static int
2977 em_setup_interface(device_t dev, struct adapter *adapter)
2978 {
2979         struct ifnet   *ifp;
2980
2981         INIT_DEBUGOUT("em_setup_interface: begin");
2982
2983         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2984         if (ifp == NULL) {
2985                 device_printf(dev, "can not allocate ifnet structure\n");
2986                 return (-1);
2987         }
2988         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2989         ifp->if_init =  em_init;
2990         ifp->if_softc = adapter;
2991         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2992         ifp->if_ioctl = em_ioctl;
2993 #ifdef EM_MULTIQUEUE
2994         /* Multiqueue stack interface */
2995         ifp->if_transmit = em_mq_start;
2996         ifp->if_qflush = em_qflush;
2997 #else
2998         ifp->if_start = em_start;
2999         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3000         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3001         IFQ_SET_READY(&ifp->if_snd);
3002 #endif  
3003
3004         ether_ifattach(ifp, adapter->hw.mac.addr);
3005
3006         ifp->if_capabilities = ifp->if_capenable = 0;
3007
3008
3009         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3010         ifp->if_capabilities |= IFCAP_TSO4;
3011         /*
3012          * Tell the upper layer(s) we
3013          * support full VLAN capability
3014          */
3015         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3016         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3017                              |  IFCAP_VLAN_HWTSO
3018                              |  IFCAP_VLAN_MTU;
3019         ifp->if_capenable = ifp->if_capabilities;
3020
3021         /*
3022         ** Don't turn this on by default, if vlans are
3023         ** created on another pseudo device (eg. lagg)
3024         ** then vlan events are not passed thru, breaking
3025         ** operation, but with HW FILTER off it works. If
3026         ** using vlans directly on the em driver you can
3027         ** enable this and get full hardware tag filtering.
3028         */
3029         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3030
3031 #ifdef DEVICE_POLLING
3032         ifp->if_capabilities |= IFCAP_POLLING;
3033 #endif
3034
3035         /* Enable only WOL MAGIC by default */
3036         if (adapter->wol) {
3037                 ifp->if_capabilities |= IFCAP_WOL;
3038                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
3039         }
3040                 
3041         /*
3042          * Specify the media types supported by this adapter and register
3043          * callbacks to update media and link information
3044          */
3045         ifmedia_init(&adapter->media, IFM_IMASK,
3046             em_media_change, em_media_status);
3047         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3048             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3049                 u_char fiber_type = IFM_1000_SX;        /* default type */
3050
3051                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
3052                             0, NULL);
3053                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3054         } else {
3055                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3056                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3057                             0, NULL);
3058                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3059                             0, NULL);
3060                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3061                             0, NULL);
3062                 if (adapter->hw.phy.type != e1000_phy_ife) {
3063                         ifmedia_add(&adapter->media,
3064                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3065                         ifmedia_add(&adapter->media,
3066                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3067                 }
3068         }
3069         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3070         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3071         return (0);
3072 }
3073
3074
3075 /*
3076  * Manage DMA'able memory.
3077  */
3078 static void
3079 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3080 {
3081         if (error)
3082                 return;
3083         *(bus_addr_t *) arg = segs[0].ds_addr;
3084 }
3085
3086 static int
3087 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3088         struct em_dma_alloc *dma, int mapflags)
3089 {
3090         int error;
3091
3092         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3093                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3094                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3095                                 BUS_SPACE_MAXADDR,      /* highaddr */
3096                                 NULL, NULL,             /* filter, filterarg */
3097                                 size,                   /* maxsize */
3098                                 1,                      /* nsegments */
3099                                 size,                   /* maxsegsize */
3100                                 0,                      /* flags */
3101                                 NULL,                   /* lockfunc */
3102                                 NULL,                   /* lockarg */
3103                                 &dma->dma_tag);
3104         if (error) {
3105                 device_printf(adapter->dev,
3106                     "%s: bus_dma_tag_create failed: %d\n",
3107                     __func__, error);
3108                 goto fail_0;
3109         }
3110
3111         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3112             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3113         if (error) {
3114                 device_printf(adapter->dev,
3115                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3116                     __func__, (uintmax_t)size, error);
3117                 goto fail_2;
3118         }
3119
3120         dma->dma_paddr = 0;
3121         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3122             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3123         if (error || dma->dma_paddr == 0) {
3124                 device_printf(adapter->dev,
3125                     "%s: bus_dmamap_load failed: %d\n",
3126                     __func__, error);
3127                 goto fail_3;
3128         }
3129
3130         return (0);
3131
3132 fail_3:
3133         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3134 fail_2:
3135         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3136         bus_dma_tag_destroy(dma->dma_tag);
3137 fail_0:
3138         dma->dma_map = NULL;
3139         dma->dma_tag = NULL;
3140
3141         return (error);
3142 }
3143
3144 static void
3145 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3146 {
3147         if (dma->dma_tag == NULL)
3148                 return;
3149         if (dma->dma_map != NULL) {
3150                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3151                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3152                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3153                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3154                 dma->dma_map = NULL;
3155         }
3156         bus_dma_tag_destroy(dma->dma_tag);
3157         dma->dma_tag = NULL;
3158 }
3159
3160
3161 /*********************************************************************
3162  *
3163  *  Allocate memory for the transmit and receive rings, and then
3164  *  the descriptors associated with each, called only once at attach.
3165  *
3166  **********************************************************************/
3167 static int
3168 em_allocate_queues(struct adapter *adapter)
3169 {
3170         device_t                dev = adapter->dev;
3171         struct tx_ring          *txr = NULL;
3172         struct rx_ring          *rxr = NULL;
3173         int rsize, tsize, error = E1000_SUCCESS;
3174         int txconf = 0, rxconf = 0;
3175
3176
3177         /* Allocate the TX ring struct memory */
3178         if (!(adapter->tx_rings =
3179             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3180             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3181                 device_printf(dev, "Unable to allocate TX ring memory\n");
3182                 error = ENOMEM;
3183                 goto fail;
3184         }
3185
3186         /* Now allocate the RX */
3187         if (!(adapter->rx_rings =
3188             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3189             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3190                 device_printf(dev, "Unable to allocate RX ring memory\n");
3191                 error = ENOMEM;
3192                 goto rx_fail;
3193         }
3194
3195         tsize = roundup2(adapter->num_tx_desc *
3196             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3197         /*
3198          * Now set up the TX queues, txconf is needed to handle the
3199          * possibility that things fail midcourse and we need to
3200          * undo memory gracefully
3201          */ 
3202         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3203                 /* Set up some basics */
3204                 txr = &adapter->tx_rings[i];
3205                 txr->adapter = adapter;
3206                 txr->me = i;
3207
3208                 /* Initialize the TX lock */
3209                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3210                     device_get_nameunit(dev), txr->me);
3211                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3212
3213                 if (em_dma_malloc(adapter, tsize,
3214                         &txr->txdma, BUS_DMA_NOWAIT)) {
3215                         device_printf(dev,
3216                             "Unable to allocate TX Descriptor memory\n");
3217                         error = ENOMEM;
3218                         goto err_tx_desc;
3219                 }
3220                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3221                 bzero((void *)txr->tx_base, tsize);
3222
3223                 if (em_allocate_transmit_buffers(txr)) {
3224                         device_printf(dev,
3225                             "Critical Failure setting up transmit buffers\n");
3226                         error = ENOMEM;
3227                         goto err_tx_desc;
3228                 }
3229 #if __FreeBSD_version >= 800000
3230                 /* Allocate a buf ring */
3231                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3232                     M_WAITOK, &txr->tx_mtx);
3233 #endif
3234         }
3235
3236         /*
3237          * Next the RX queues...
3238          */ 
3239         rsize = roundup2(adapter->num_rx_desc *
3240             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3241         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3242                 rxr = &adapter->rx_rings[i];
3243                 rxr->adapter = adapter;
3244                 rxr->me = i;
3245
3246                 /* Initialize the RX lock */
3247                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3248                     device_get_nameunit(dev), txr->me);
3249                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3250
3251                 if (em_dma_malloc(adapter, rsize,
3252                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3253                         device_printf(dev,
3254                             "Unable to allocate RxDescriptor memory\n");
3255                         error = ENOMEM;
3256                         goto err_rx_desc;
3257                 }
3258                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3259                 bzero((void *)rxr->rx_base, rsize);
3260
3261                 /* Allocate receive buffers for the ring*/
3262                 if (em_allocate_receive_buffers(rxr)) {
3263                         device_printf(dev,
3264                             "Critical Failure setting up receive buffers\n");
3265                         error = ENOMEM;
3266                         goto err_rx_desc;
3267                 }
3268         }
3269
3270         return (0);
3271
3272 err_rx_desc:
3273         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3274                 em_dma_free(adapter, &rxr->rxdma);
3275 err_tx_desc:
3276         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3277                 em_dma_free(adapter, &txr->txdma);
3278         free(adapter->rx_rings, M_DEVBUF);
3279 rx_fail:
3280 #if __FreeBSD_version >= 800000
3281         buf_ring_free(txr->br, M_DEVBUF);
3282 #endif
3283         free(adapter->tx_rings, M_DEVBUF);
3284 fail:
3285         return (error);
3286 }
3287
3288
3289 /*********************************************************************
3290  *
3291  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3292  *  the information needed to transmit a packet on the wire. This is
3293  *  called only once at attach, setup is done every reset.
3294  *
3295  **********************************************************************/
3296 static int
3297 em_allocate_transmit_buffers(struct tx_ring *txr)
3298 {
3299         struct adapter *adapter = txr->adapter;
3300         device_t dev = adapter->dev;
3301         struct em_buffer *txbuf;
3302         int error, i;
3303
3304         /*
3305          * Setup DMA descriptor areas.
3306          */
3307         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3308                                1, 0,                    /* alignment, bounds */
3309                                BUS_SPACE_MAXADDR,       /* lowaddr */
3310                                BUS_SPACE_MAXADDR,       /* highaddr */
3311                                NULL, NULL,              /* filter, filterarg */
3312                                EM_TSO_SIZE,             /* maxsize */
3313                                EM_MAX_SCATTER,          /* nsegments */
3314                                PAGE_SIZE,               /* maxsegsize */
3315                                0,                       /* flags */
3316                                NULL,                    /* lockfunc */
3317                                NULL,                    /* lockfuncarg */
3318                                &txr->txtag))) {
3319                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3320                 goto fail;
3321         }
3322
3323         if (!(txr->tx_buffers =
3324             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3325             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3326                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3327                 error = ENOMEM;
3328                 goto fail;
3329         }
3330
3331         /* Create the descriptor buffer dma maps */
3332         txbuf = txr->tx_buffers;
3333         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3334                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3335                 if (error != 0) {
3336                         device_printf(dev, "Unable to create TX DMA map\n");
3337                         goto fail;
3338                 }
3339         }
3340
3341         return 0;
3342 fail:
3343         /* We free all, it handles case where we are in the middle */
3344         em_free_transmit_structures(adapter);
3345         return (error);
3346 }
3347
3348 /*********************************************************************
3349  *
3350  *  Initialize a transmit ring.
3351  *
3352  **********************************************************************/
3353 static void
3354 em_setup_transmit_ring(struct tx_ring *txr)
3355 {
3356         struct adapter *adapter = txr->adapter;
3357         struct em_buffer *txbuf;
3358         int i;
3359 #ifdef DEV_NETMAP
3360         struct netmap_adapter *na = NA(adapter->ifp);
3361         struct netmap_slot *slot;
3362 #endif /* DEV_NETMAP */
3363
3364         /* Clear the old descriptor contents */
3365         EM_TX_LOCK(txr);
3366 #ifdef DEV_NETMAP
3367         slot = netmap_reset(na, NR_TX, txr->me, 0);
3368 #endif /* DEV_NETMAP */
3369
3370         bzero((void *)txr->tx_base,
3371               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3372         /* Reset indices */
3373         txr->next_avail_desc = 0;
3374         txr->next_to_clean = 0;
3375
3376         /* Free any existing tx buffers. */
3377         txbuf = txr->tx_buffers;
3378         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3379                 if (txbuf->m_head != NULL) {
3380                         bus_dmamap_sync(txr->txtag, txbuf->map,
3381                             BUS_DMASYNC_POSTWRITE);
3382                         bus_dmamap_unload(txr->txtag, txbuf->map);
3383                         m_freem(txbuf->m_head);
3384                         txbuf->m_head = NULL;
3385                 }
3386 #ifdef DEV_NETMAP
3387                 if (slot) {
3388                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3389                         uint64_t paddr;
3390                         void *addr;
3391
3392                         addr = PNMB(na, slot + si, &paddr);
3393                         txr->tx_base[i].buffer_addr = htole64(paddr);
3394                         /* reload the map for netmap mode */
3395                         netmap_load_map(na, txr->txtag, txbuf->map, addr);
3396                 }
3397 #endif /* DEV_NETMAP */
3398
3399                 /* clear the watch index */
3400                 txbuf->next_eop = -1;
3401         }
3402
3403         /* Set number of descriptors available */
3404         txr->tx_avail = adapter->num_tx_desc;
3405         txr->queue_status = EM_QUEUE_IDLE;
3406
3407         /* Clear checksum offload context. */
3408         txr->last_hw_offload = 0;
3409         txr->last_hw_ipcss = 0;
3410         txr->last_hw_ipcso = 0;
3411         txr->last_hw_tucss = 0;
3412         txr->last_hw_tucso = 0;
3413
3414         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3415             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3416         EM_TX_UNLOCK(txr);
3417 }
3418
3419 /*********************************************************************
3420  *
3421  *  Initialize all transmit rings.
3422  *
3423  **********************************************************************/
3424 static void
3425 em_setup_transmit_structures(struct adapter *adapter)
3426 {
3427         struct tx_ring *txr = adapter->tx_rings;
3428
3429         for (int i = 0; i < adapter->num_queues; i++, txr++)
3430                 em_setup_transmit_ring(txr);
3431
3432         return;
3433 }
3434
3435 /*********************************************************************
3436  *
3437  *  Enable transmit unit.
3438  *
3439  **********************************************************************/
3440 static void
3441 em_initialize_transmit_unit(struct adapter *adapter)
3442 {
3443         struct tx_ring  *txr = adapter->tx_rings;
3444         struct e1000_hw *hw = &adapter->hw;
3445         u32     tctl, tarc, tipg = 0;
3446
3447          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3448
3449         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3450                 u64 bus_addr = txr->txdma.dma_paddr;
3451                 /* Base and Len of TX Ring */
3452                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3453                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3454                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3455                     (u32)(bus_addr >> 32));
3456                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3457                     (u32)bus_addr);
3458                 /* Init the HEAD/TAIL indices */
3459                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3460                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3461
3462                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3463                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3464                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3465
3466                 txr->queue_status = EM_QUEUE_IDLE;
3467         }
3468
3469         /* Set the default values for the Tx Inter Packet Gap timer */
3470         switch (adapter->hw.mac.type) {
3471         case e1000_80003es2lan:
3472                 tipg = DEFAULT_82543_TIPG_IPGR1;
3473                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3474                     E1000_TIPG_IPGR2_SHIFT;
3475                 break;
3476         default:
3477                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3478                     (adapter->hw.phy.media_type ==
3479                     e1000_media_type_internal_serdes))
3480                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3481                 else
3482                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3483                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3484                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3485         }
3486
3487         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3488         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3489
3490         if(adapter->hw.mac.type >= e1000_82540)
3491                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3492                     adapter->tx_abs_int_delay.value);
3493
3494         if ((adapter->hw.mac.type == e1000_82571) ||
3495             (adapter->hw.mac.type == e1000_82572)) {
3496                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3497                 tarc |= SPEED_MODE_BIT;
3498                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3499         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3500                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3501                 tarc |= 1;
3502                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3503                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3504                 tarc |= 1;
3505                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3506         }
3507
3508         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3509         if (adapter->tx_int_delay.value > 0)
3510                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3511
3512         /* Program the Transmit Control Register */
3513         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3514         tctl &= ~E1000_TCTL_CT;
3515         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3516                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3517
3518         if (adapter->hw.mac.type >= e1000_82571)
3519                 tctl |= E1000_TCTL_MULR;
3520
3521         /* This write will effectively turn on the transmit unit. */
3522         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3523
3524 }
3525
3526
3527 /*********************************************************************
3528  *
3529  *  Free all transmit rings.
3530  *
3531  **********************************************************************/
3532 static void
3533 em_free_transmit_structures(struct adapter *adapter)
3534 {
3535         struct tx_ring *txr = adapter->tx_rings;
3536
3537         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3538                 EM_TX_LOCK(txr);
3539                 em_free_transmit_buffers(txr);
3540                 em_dma_free(adapter, &txr->txdma);
3541                 EM_TX_UNLOCK(txr);
3542                 EM_TX_LOCK_DESTROY(txr);
3543         }
3544
3545         free(adapter->tx_rings, M_DEVBUF);
3546 }
3547
3548 /*********************************************************************
3549  *
3550  *  Free transmit ring related data structures.
3551  *
3552  **********************************************************************/
3553 static void
3554 em_free_transmit_buffers(struct tx_ring *txr)
3555 {
3556         struct adapter          *adapter = txr->adapter;
3557         struct em_buffer        *txbuf;
3558
3559         INIT_DEBUGOUT("free_transmit_ring: begin");
3560
3561         if (txr->tx_buffers == NULL)
3562                 return;
3563
3564         for (int i = 0; i < adapter->num_tx_desc; i++) {
3565                 txbuf = &txr->tx_buffers[i];
3566                 if (txbuf->m_head != NULL) {
3567                         bus_dmamap_sync(txr->txtag, txbuf->map,
3568                             BUS_DMASYNC_POSTWRITE);
3569                         bus_dmamap_unload(txr->txtag,
3570                             txbuf->map);
3571                         m_freem(txbuf->m_head);
3572                         txbuf->m_head = NULL;
3573                         if (txbuf->map != NULL) {
3574                                 bus_dmamap_destroy(txr->txtag,
3575                                     txbuf->map);
3576                                 txbuf->map = NULL;
3577                         }
3578                 } else if (txbuf->map != NULL) {
3579                         bus_dmamap_unload(txr->txtag,
3580                             txbuf->map);
3581                         bus_dmamap_destroy(txr->txtag,
3582                             txbuf->map);
3583                         txbuf->map = NULL;
3584                 }
3585         }
3586 #if __FreeBSD_version >= 800000
3587         if (txr->br != NULL)
3588                 buf_ring_free(txr->br, M_DEVBUF);
3589 #endif
3590         if (txr->tx_buffers != NULL) {
3591                 free(txr->tx_buffers, M_DEVBUF);
3592                 txr->tx_buffers = NULL;
3593         }
3594         if (txr->txtag != NULL) {
3595                 bus_dma_tag_destroy(txr->txtag);
3596                 txr->txtag = NULL;
3597         }
3598         return;
3599 }
3600
3601
3602 /*********************************************************************
3603  *  The offload context is protocol specific (TCP/UDP) and thus
3604  *  only needs to be set when the protocol changes. The occasion
3605  *  of a context change can be a performance detriment, and
3606  *  might be better just disabled. The reason arises in the way
3607  *  in which the controller supports pipelined requests from the
3608  *  Tx data DMA. Up to four requests can be pipelined, and they may
3609  *  belong to the same packet or to multiple packets. However all
3610  *  requests for one packet are issued before a request is issued
3611  *  for a subsequent packet and if a request for the next packet
3612  *  requires a context change, that request will be stalled
3613  *  until the previous request completes. This means setting up
3614  *  a new context effectively disables pipelined Tx data DMA which
3615  *  in turn greatly slow down performance to send small sized
3616  *  frames. 
3617  **********************************************************************/
3618 static void
3619 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3620     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3621 {
3622         struct adapter                  *adapter = txr->adapter;
3623         struct e1000_context_desc       *TXD = NULL;
3624         struct em_buffer                *tx_buffer;
3625         int                             cur, hdr_len;
3626         u32                             cmd = 0;
3627         u16                             offload = 0;
3628         u8                              ipcso, ipcss, tucso, tucss;
3629
3630         ipcss = ipcso = tucss = tucso = 0;
3631         hdr_len = ip_off + (ip->ip_hl << 2);
3632         cur = txr->next_avail_desc;
3633
3634         /* Setup of IP header checksum. */
3635         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3636                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3637                 offload |= CSUM_IP;
3638                 ipcss = ip_off;
3639                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3640                 /*
3641                  * Start offset for header checksum calculation.
3642                  * End offset for header checksum calculation.
3643                  * Offset of place to put the checksum.
3644                  */
3645                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3646                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3647                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3648                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3649                 cmd |= E1000_TXD_CMD_IP;
3650         }
3651
3652         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3653                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3654                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3655                 offload |= CSUM_TCP;
3656                 tucss = hdr_len;
3657                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3658                 /*
3659                  * Setting up new checksum offload context for every frames
3660                  * takes a lot of processing time for hardware. This also
3661                  * reduces performance a lot for small sized frames so avoid
3662                  * it if driver can use previously configured checksum
3663                  * offload context.
3664                  */
3665                 if (txr->last_hw_offload == offload) {
3666                         if (offload & CSUM_IP) {
3667                                 if (txr->last_hw_ipcss == ipcss &&
3668                                     txr->last_hw_ipcso == ipcso &&
3669                                     txr->last_hw_tucss == tucss &&
3670                                     txr->last_hw_tucso == tucso)
3671                                         return;
3672                         } else {
3673                                 if (txr->last_hw_tucss == tucss &&
3674                                     txr->last_hw_tucso == tucso)
3675                                         return;
3676                         }
3677                 }
3678                 txr->last_hw_offload = offload;
3679                 txr->last_hw_tucss = tucss;
3680                 txr->last_hw_tucso = tucso;
3681                 /*
3682                  * Start offset for payload checksum calculation.
3683                  * End offset for payload checksum calculation.
3684                  * Offset of place to put the checksum.
3685                  */
3686                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3687                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3688                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3689                 TXD->upper_setup.tcp_fields.tucso = tucso;
3690                 cmd |= E1000_TXD_CMD_TCP;
3691         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3692                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3693                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3694                 tucss = hdr_len;
3695                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3696                 /*
3697                  * Setting up new checksum offload context for every frames
3698                  * takes a lot of processing time for hardware. This also
3699                  * reduces performance a lot for small sized frames so avoid
3700                  * it if driver can use previously configured checksum
3701                  * offload context.
3702                  */
3703                 if (txr->last_hw_offload == offload) {
3704                         if (offload & CSUM_IP) {
3705                                 if (txr->last_hw_ipcss == ipcss &&
3706                                     txr->last_hw_ipcso == ipcso &&
3707                                     txr->last_hw_tucss == tucss &&
3708                                     txr->last_hw_tucso == tucso)
3709                                         return;
3710                         } else {
3711                                 if (txr->last_hw_tucss == tucss &&
3712                                     txr->last_hw_tucso == tucso)
3713                                         return;
3714                         }
3715                 }
3716                 txr->last_hw_offload = offload;
3717                 txr->last_hw_tucss = tucss;
3718                 txr->last_hw_tucso = tucso;
3719                 /*
3720                  * Start offset for header checksum calculation.
3721                  * End offset for header checksum calculation.
3722                  * Offset of place to put the checksum.
3723                  */
3724                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3725                 TXD->upper_setup.tcp_fields.tucss = tucss;
3726                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3727                 TXD->upper_setup.tcp_fields.tucso = tucso;
3728         }
3729   
3730         if (offload & CSUM_IP) {
3731                 txr->last_hw_ipcss = ipcss;
3732                 txr->last_hw_ipcso = ipcso;
3733         }
3734
3735         TXD->tcp_seg_setup.data = htole32(0);
3736         TXD->cmd_and_length =
3737             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3738         tx_buffer = &txr->tx_buffers[cur];
3739         tx_buffer->m_head = NULL;
3740         tx_buffer->next_eop = -1;
3741
3742         if (++cur == adapter->num_tx_desc)
3743                 cur = 0;
3744
3745         txr->tx_avail--;
3746         txr->next_avail_desc = cur;
3747 }
3748
3749
3750 /**********************************************************************
3751  *
3752  *  Setup work for hardware segmentation offload (TSO)
3753  *
3754  **********************************************************************/
3755 static void
3756 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3757     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3758 {
3759         struct adapter                  *adapter = txr->adapter;
3760         struct e1000_context_desc       *TXD;
3761         struct em_buffer                *tx_buffer;
3762         int cur, hdr_len;
3763
3764         /*
3765          * In theory we can use the same TSO context if and only if
3766          * frame is the same type(IP/TCP) and the same MSS. However
3767          * checking whether a frame has the same IP/TCP structure is
3768          * hard thing so just ignore that and always restablish a
3769          * new TSO context.
3770          */
3771         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3772         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3773                       E1000_TXD_DTYP_D |        /* Data descr type */
3774                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3775
3776         /* IP and/or TCP header checksum calculation and insertion. */
3777         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3778
3779         cur = txr->next_avail_desc;
3780         tx_buffer = &txr->tx_buffers[cur];
3781         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3782
3783         /*
3784          * Start offset for header checksum calculation.
3785          * End offset for header checksum calculation.
3786          * Offset of place put the checksum.
3787          */
3788         TXD->lower_setup.ip_fields.ipcss = ip_off;
3789         TXD->lower_setup.ip_fields.ipcse =
3790             htole16(ip_off + (ip->ip_hl << 2) - 1);
3791         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3792         /*
3793          * Start offset for payload checksum calculation.
3794          * End offset for payload checksum calculation.
3795          * Offset of place to put the checksum.
3796          */
3797         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3798         TXD->upper_setup.tcp_fields.tucse = 0;
3799         TXD->upper_setup.tcp_fields.tucso =
3800             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3801         /*
3802          * Payload size per packet w/o any headers.
3803          * Length of all headers up to payload.
3804          */
3805         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3806         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3807
3808         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3809                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3810                                 E1000_TXD_CMD_TSE |     /* TSE context */
3811                                 E1000_TXD_CMD_IP |      /* Do IP csum */
3812                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3813                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3814
3815         tx_buffer->m_head = NULL;
3816         tx_buffer->next_eop = -1;
3817
3818         if (++cur == adapter->num_tx_desc)
3819                 cur = 0;
3820
3821         txr->tx_avail--;
3822         txr->next_avail_desc = cur;
3823         txr->tx_tso = TRUE;
3824 }
3825
3826
3827 /**********************************************************************
3828  *
3829  *  Examine each tx_buffer in the used queue. If the hardware is done
3830  *  processing the packet then free associated resources. The
3831  *  tx_buffer is put back on the free queue.
3832  *
3833  **********************************************************************/
3834 static void
3835 em_txeof(struct tx_ring *txr)
3836 {
3837         struct adapter  *adapter = txr->adapter;
3838         int first, last, done, processed;
3839         struct em_buffer *tx_buffer;
3840         struct e1000_tx_desc   *tx_desc, *eop_desc;
3841         struct ifnet   *ifp = adapter->ifp;
3842
3843         EM_TX_LOCK_ASSERT(txr);
3844 #ifdef DEV_NETMAP
3845         if (netmap_tx_irq(ifp, txr->me))
3846                 return;
3847 #endif /* DEV_NETMAP */
3848
3849         /* No work, make sure watchdog is off */
3850         if (txr->tx_avail == adapter->num_tx_desc) {
3851                 txr->queue_status = EM_QUEUE_IDLE;
3852                 return;
3853         }
3854
3855         processed = 0;
3856         first = txr->next_to_clean;
3857         tx_desc = &txr->tx_base[first];
3858         tx_buffer = &txr->tx_buffers[first];
3859         last = tx_buffer->next_eop;
3860         eop_desc = &txr->tx_base[last];
3861
3862         /*
3863          * What this does is get the index of the
3864          * first descriptor AFTER the EOP of the 
3865          * first packet, that way we can do the
3866          * simple comparison on the inner while loop.
3867          */
3868         if (++last == adapter->num_tx_desc)
3869                 last = 0;
3870         done = last;
3871
3872         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3873             BUS_DMASYNC_POSTREAD);
3874
3875         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3876                 /* We clean the range of the packet */
3877                 while (first != done) {
3878                         tx_desc->upper.data = 0;
3879                         tx_desc->lower.data = 0;
3880                         tx_desc->buffer_addr = 0;
3881                         ++txr->tx_avail;
3882                         ++processed;
3883
3884                         if (tx_buffer->m_head) {
3885                                 bus_dmamap_sync(txr->txtag,
3886                                     tx_buffer->map,
3887                                     BUS_DMASYNC_POSTWRITE);
3888                                 bus_dmamap_unload(txr->txtag,
3889                                     tx_buffer->map);
3890                                 m_freem(tx_buffer->m_head);
3891                                 tx_buffer->m_head = NULL;
3892                         }
3893                         tx_buffer->next_eop = -1;
3894                         txr->watchdog_time = ticks;
3895
3896                         if (++first == adapter->num_tx_desc)
3897                                 first = 0;
3898
3899                         tx_buffer = &txr->tx_buffers[first];
3900                         tx_desc = &txr->tx_base[first];
3901                 }
3902                 ++ifp->if_opackets;
3903                 /* See if we can continue to the next packet */
3904                 last = tx_buffer->next_eop;
3905                 if (last != -1) {
3906                         eop_desc = &txr->tx_base[last];
3907                         /* Get new done point */
3908                         if (++last == adapter->num_tx_desc) last = 0;
3909                         done = last;
3910                 } else
3911                         break;
3912         }
3913         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3914             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3915
3916         txr->next_to_clean = first;
3917
3918         /*
3919         ** Watchdog calculation, we know there's
3920         ** work outstanding or the first return
3921         ** would have been taken, so none processed
3922         ** for too long indicates a hang. local timer
3923         ** will examine this and do a reset if needed.
3924         */
3925         if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3926                 txr->queue_status = EM_QUEUE_HUNG;
3927
3928         /*
3929          * If we have a minimum free, clear IFF_DRV_OACTIVE
3930          * to tell the stack that it is OK to send packets.
3931          * Notice that all writes of OACTIVE happen under the
3932          * TX lock which, with a single queue, guarantees 
3933          * sanity.
3934          */
3935         if (txr->tx_avail >= EM_MAX_SCATTER)
3936                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3937
3938         /* Disable watchdog if all clean */
3939         if (txr->tx_avail == adapter->num_tx_desc) {
3940                 txr->queue_status = EM_QUEUE_IDLE;
3941         } 
3942 }
3943
3944
3945 /*********************************************************************
3946  *
3947  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3948  *
3949  **********************************************************************/
3950 static void
3951 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3952 {
3953         struct adapter          *adapter = rxr->adapter;
3954         struct mbuf             *m;
3955         bus_dma_segment_t       segs[1];
3956         struct em_buffer        *rxbuf;
3957         int                     i, j, error, nsegs;
3958         bool                    cleaned = FALSE;
3959
3960         i = j = rxr->next_to_refresh;
3961         /*
3962         ** Get one descriptor beyond
3963         ** our work mark to control
3964         ** the loop.
3965         */
3966         if (++j == adapter->num_rx_desc)
3967                 j = 0;
3968
3969         while (j != limit) {
3970                 rxbuf = &rxr->rx_buffers[i];
3971                 if (rxbuf->m_head == NULL) {
3972                         m = m_getjcl(M_NOWAIT, MT_DATA,
3973                             M_PKTHDR, adapter->rx_mbuf_sz);
3974                         /*
3975                         ** If we have a temporary resource shortage
3976                         ** that causes a failure, just abort refresh
3977                         ** for now, we will return to this point when
3978                         ** reinvoked from em_rxeof.
3979                         */
3980                         if (m == NULL)
3981                                 goto update;
3982                 } else
3983                         m = rxbuf->m_head;
3984
3985                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3986                 m->m_flags |= M_PKTHDR;
3987                 m->m_data = m->m_ext.ext_buf;
3988
3989                 /* Use bus_dma machinery to setup the memory mapping  */
3990                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3991                     m, segs, &nsegs, BUS_DMA_NOWAIT);
3992                 if (error != 0) {
3993                         printf("Refresh mbufs: hdr dmamap load"
3994                             " failure - %d\n", error);
3995                         m_free(m);
3996                         rxbuf->m_head = NULL;
3997                         goto update;
3998                 }
3999                 rxbuf->m_head = m;
4000                 bus_dmamap_sync(rxr->rxtag,
4001                     rxbuf->map, BUS_DMASYNC_PREREAD);
4002                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
4003                 cleaned = TRUE;
4004
4005                 i = j; /* Next is precalulated for us */
4006                 rxr->next_to_refresh = i;
4007                 /* Calculate next controlling index */
4008                 if (++j == adapter->num_rx_desc)
4009                         j = 0;
4010         }
4011 update:
4012         /*
4013         ** Update the tail pointer only if,
4014         ** and as far as we have refreshed.
4015         */
4016         if (cleaned)
4017                 E1000_WRITE_REG(&adapter->hw,
4018                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4019
4020         return;
4021 }
4022
4023
4024 /*********************************************************************
4025  *
4026  *  Allocate memory for rx_buffer structures. Since we use one
4027  *  rx_buffer per received packet, the maximum number of rx_buffer's
4028  *  that we'll need is equal to the number of receive descriptors
4029  *  that we've allocated.
4030  *
4031  **********************************************************************/
4032 static int
4033 em_allocate_receive_buffers(struct rx_ring *rxr)
4034 {
4035         struct adapter          *adapter = rxr->adapter;
4036         device_t                dev = adapter->dev;
4037         struct em_buffer        *rxbuf;
4038         int                     error;
4039
4040         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4041             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4042         if (rxr->rx_buffers == NULL) {
4043                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4044                 return (ENOMEM);
4045         }
4046
4047         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4048                                 1, 0,                   /* alignment, bounds */
4049                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4050                                 BUS_SPACE_MAXADDR,      /* highaddr */
4051                                 NULL, NULL,             /* filter, filterarg */
4052                                 MJUM9BYTES,             /* maxsize */
4053                                 1,                      /* nsegments */
4054                                 MJUM9BYTES,             /* maxsegsize */
4055                                 0,                      /* flags */
4056                                 NULL,                   /* lockfunc */
4057                                 NULL,                   /* lockarg */
4058                                 &rxr->rxtag);
4059         if (error) {
4060                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4061                     __func__, error);
4062                 goto fail;
4063         }
4064
4065         rxbuf = rxr->rx_buffers;
4066         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4067                 rxbuf = &rxr->rx_buffers[i];
4068                 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4069                 if (error) {
4070                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4071                             __func__, error);
4072                         goto fail;
4073                 }
4074         }
4075
4076         return (0);
4077
4078 fail:
4079         em_free_receive_structures(adapter);
4080         return (error);
4081 }
4082
4083
4084 /*********************************************************************
4085  *
4086  *  Initialize a receive ring and its buffers.
4087  *
4088  **********************************************************************/
4089 static int
4090 em_setup_receive_ring(struct rx_ring *rxr)
4091 {
4092         struct  adapter         *adapter = rxr->adapter;
4093         struct em_buffer        *rxbuf;
4094         bus_dma_segment_t       seg[1];
4095         int                     rsize, nsegs, error = 0;
4096 #ifdef DEV_NETMAP
4097         struct netmap_adapter *na = NA(adapter->ifp);
4098         struct netmap_slot *slot;
4099 #endif
4100
4101
4102         /* Clear the ring contents */
4103         EM_RX_LOCK(rxr);
4104         rsize = roundup2(adapter->num_rx_desc *
4105             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4106         bzero((void *)rxr->rx_base, rsize);
4107 #ifdef DEV_NETMAP
4108         slot = netmap_reset(na, NR_RX, 0, 0);
4109 #endif
4110
4111         /*
4112         ** Free current RX buffer structs and their mbufs
4113         */
4114         for (int i = 0; i < adapter->num_rx_desc; i++) {
4115                 rxbuf = &rxr->rx_buffers[i];
4116                 if (rxbuf->m_head != NULL) {
4117                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4118                             BUS_DMASYNC_POSTREAD);
4119                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4120                         m_freem(rxbuf->m_head);
4121                         rxbuf->m_head = NULL; /* mark as freed */
4122                 }
4123         }
4124
4125         /* Now replenish the mbufs */
4126         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4127                 rxbuf = &rxr->rx_buffers[j];
4128 #ifdef DEV_NETMAP
4129                 if (slot) {
4130                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4131                         uint64_t paddr;
4132                         void *addr;
4133
4134                         addr = PNMB(na, slot + si, &paddr);
4135                         netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4136                         /* Update descriptor */
4137                         rxr->rx_base[j].buffer_addr = htole64(paddr);
4138                         continue;
4139                 }
4140 #endif /* DEV_NETMAP */
4141                 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4142                     M_PKTHDR, adapter->rx_mbuf_sz);
4143                 if (rxbuf->m_head == NULL) {
4144                         error = ENOBUFS;
4145                         goto fail;
4146                 }
4147                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4148                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4149                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4150
4151                 /* Get the memory mapping */
4152                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4153                     rxbuf->map, rxbuf->m_head, seg,
4154                     &nsegs, BUS_DMA_NOWAIT);
4155                 if (error != 0) {
4156                         m_freem(rxbuf->m_head);
4157                         rxbuf->m_head = NULL;
4158                         goto fail;
4159                 }
4160                 bus_dmamap_sync(rxr->rxtag,
4161                     rxbuf->map, BUS_DMASYNC_PREREAD);
4162
4163                 /* Update descriptor */
4164                 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4165         }
4166         rxr->next_to_check = 0;
4167         rxr->next_to_refresh = 0;
4168         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4169             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4170
4171 fail:
4172         EM_RX_UNLOCK(rxr);
4173         return (error);
4174 }
4175
4176 /*********************************************************************
4177  *
4178  *  Initialize all receive rings.
4179  *
4180  **********************************************************************/
4181 static int
4182 em_setup_receive_structures(struct adapter *adapter)
4183 {
4184         struct rx_ring *rxr = adapter->rx_rings;
4185         int q;
4186
4187         for (q = 0; q < adapter->num_queues; q++, rxr++)
4188                 if (em_setup_receive_ring(rxr))
4189                         goto fail;
4190
4191         return (0);
4192 fail:
4193         /*
4194          * Free RX buffers allocated so far, we will only handle
4195          * the rings that completed, the failing case will have
4196          * cleaned up for itself. 'q' failed, so its the terminus.
4197          */
4198         for (int i = 0; i < q; ++i) {
4199                 rxr = &adapter->rx_rings[i];
4200                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4201                         struct em_buffer *rxbuf;
4202                         rxbuf = &rxr->rx_buffers[n];
4203                         if (rxbuf->m_head != NULL) {
4204                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4205                                   BUS_DMASYNC_POSTREAD);
4206                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4207                                 m_freem(rxbuf->m_head);
4208                                 rxbuf->m_head = NULL;
4209                         }
4210                 }
4211                 rxr->next_to_check = 0;
4212                 rxr->next_to_refresh = 0;
4213         }
4214
4215         return (ENOBUFS);
4216 }
4217
4218 /*********************************************************************
4219  *
4220  *  Free all receive rings.
4221  *
4222  **********************************************************************/
4223 static void
4224 em_free_receive_structures(struct adapter *adapter)
4225 {
4226         struct rx_ring *rxr = adapter->rx_rings;
4227
4228         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4229                 em_free_receive_buffers(rxr);
4230                 /* Free the ring memory as well */
4231                 em_dma_free(adapter, &rxr->rxdma);
4232                 EM_RX_LOCK_DESTROY(rxr);
4233         }
4234
4235         free(adapter->rx_rings, M_DEVBUF);
4236 }
4237
4238
4239 /*********************************************************************
4240  *
4241  *  Free receive ring data structures
4242  *
4243  **********************************************************************/
4244 static void
4245 em_free_receive_buffers(struct rx_ring *rxr)
4246 {
4247         struct adapter          *adapter = rxr->adapter;
4248         struct em_buffer        *rxbuf = NULL;
4249
4250         INIT_DEBUGOUT("free_receive_buffers: begin");
4251
4252         if (rxr->rx_buffers != NULL) {
4253                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4254                         rxbuf = &rxr->rx_buffers[i];
4255                         if (rxbuf->map != NULL) {
4256                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4257                                     BUS_DMASYNC_POSTREAD);
4258                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4259                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4260                         }
4261                         if (rxbuf->m_head != NULL) {
4262                                 m_freem(rxbuf->m_head);
4263                                 rxbuf->m_head = NULL;
4264                         }
4265                 }
4266                 free(rxr->rx_buffers, M_DEVBUF);
4267                 rxr->rx_buffers = NULL;
4268                 rxr->next_to_check = 0;
4269                 rxr->next_to_refresh = 0;
4270         }
4271
4272         if (rxr->rxtag != NULL) {
4273                 bus_dma_tag_destroy(rxr->rxtag);
4274                 rxr->rxtag = NULL;
4275         }
4276
4277         return;
4278 }
4279
4280
4281 /*********************************************************************
4282  *
4283  *  Enable receive unit.
4284  *
4285  **********************************************************************/
4286
4287 static void
4288 em_initialize_receive_unit(struct adapter *adapter)
4289 {
4290         struct rx_ring  *rxr = adapter->rx_rings;
4291         struct ifnet    *ifp = adapter->ifp;
4292         struct e1000_hw *hw = &adapter->hw;
4293         u64     bus_addr;
4294         u32     rctl, rxcsum;
4295
4296         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4297
4298         /*
4299          * Make sure receives are disabled while setting
4300          * up the descriptor ring
4301          */
4302         rctl = E1000_READ_REG(hw, E1000_RCTL);
4303         /* Do not disable if ever enabled on this hardware */
4304         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4305                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4306
4307         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4308             adapter->rx_abs_int_delay.value);
4309         /*
4310          * Set the interrupt throttling rate. Value is calculated
4311          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4312          */
4313         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4314
4315         /*
4316         ** When using MSIX interrupts we need to throttle
4317         ** using the EITR register (82574 only)
4318         */
4319         if (hw->mac.type == e1000_82574) {
4320                 for (int i = 0; i < 4; i++)
4321                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4322                             DEFAULT_ITR);
4323                 /* Disable accelerated acknowledge */
4324                 E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4325         }
4326
4327         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4328         if (ifp->if_capenable & IFCAP_RXCSUM)
4329                 rxcsum |= E1000_RXCSUM_TUOFL;
4330         else
4331                 rxcsum &= ~E1000_RXCSUM_TUOFL;
4332         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4333
4334         /*
4335         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4336         ** long latencies are observed, like Lenovo X60. This
4337         ** change eliminates the problem, but since having positive
4338         ** values in RDTR is a known source of problems on other
4339         ** platforms another solution is being sought.
4340         */
4341         if (hw->mac.type == e1000_82573)
4342                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4343
4344         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4345                 /* Setup the Base and Length of the Rx Descriptor Ring */
4346                 u32 rdt = adapter->num_rx_desc - 1; /* default */
4347
4348                 bus_addr = rxr->rxdma.dma_paddr;
4349                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4350                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4351                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4352                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4353                 /* Setup the Head and Tail Descriptor Pointers */
4354                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4355 #ifdef DEV_NETMAP
4356                 /*
4357                  * an init() while a netmap client is active must
4358                  * preserve the rx buffers passed to userspace.
4359                  */
4360                 if (ifp->if_capenable & IFCAP_NETMAP)
4361                         rdt -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[i]);
4362 #endif /* DEV_NETMAP */
4363                 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4364         }
4365
4366         /* Set PTHRESH for improved jumbo performance */
4367         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4368             (adapter->hw.mac.type == e1000_pch2lan) ||
4369             (adapter->hw.mac.type == e1000_ich10lan)) &&
4370             (ifp->if_mtu > ETHERMTU)) {
4371                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4372                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4373         }
4374                 
4375         if (adapter->hw.mac.type >= e1000_pch2lan) {
4376                 if (ifp->if_mtu > ETHERMTU)
4377                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4378                 else
4379                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4380         }
4381
4382         /* Setup the Receive Control Register */
4383         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4384         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4385             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4386             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4387
4388         /* Strip the CRC */
4389         rctl |= E1000_RCTL_SECRC;
4390
4391         /* Make sure VLAN Filters are off */
4392         rctl &= ~E1000_RCTL_VFE;
4393         rctl &= ~E1000_RCTL_SBP;
4394
4395         if (adapter->rx_mbuf_sz == MCLBYTES)
4396                 rctl |= E1000_RCTL_SZ_2048;
4397         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4398                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4399         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4400                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4401
4402         if (ifp->if_mtu > ETHERMTU)
4403                 rctl |= E1000_RCTL_LPE;
4404         else
4405                 rctl &= ~E1000_RCTL_LPE;
4406
4407         /* Write out the settings */
4408         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4409
4410         return;
4411 }
4412
4413
4414 /*********************************************************************
4415  *
4416  *  This routine executes in interrupt context. It replenishes
4417  *  the mbufs in the descriptor and sends data which has been
4418  *  dma'ed into host memory to upper layer.
4419  *
4420  *  We loop at most count times if count is > 0, or until done if
4421  *  count < 0.
4422  *  
4423  *  For polling we also now return the number of cleaned packets
4424  *********************************************************************/
4425 static bool
4426 em_rxeof(struct rx_ring *rxr, int count, int *done)
4427 {
4428         struct adapter          *adapter = rxr->adapter;
4429         struct ifnet            *ifp = adapter->ifp;
4430         struct mbuf             *mp, *sendmp;
4431         u8                      status = 0;
4432         u16                     len;
4433         int                     i, processed, rxdone = 0;
4434         bool                    eop;
4435         struct e1000_rx_desc    *cur;
4436
4437         EM_RX_LOCK(rxr);
4438
4439 #ifdef DEV_NETMAP
4440         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4441                 EM_RX_UNLOCK(rxr);
4442                 return (FALSE);
4443         }
4444 #endif /* DEV_NETMAP */
4445
4446         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4447
4448                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4449                         break;
4450
4451                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4452                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4453
4454                 cur = &rxr->rx_base[i];
4455                 status = cur->status;
4456                 mp = sendmp = NULL;
4457
4458                 if ((status & E1000_RXD_STAT_DD) == 0)
4459                         break;
4460
4461                 len = le16toh(cur->length);
4462                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4463
4464                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4465                     (rxr->discard == TRUE)) {
4466                         adapter->dropped_pkts++;
4467                         ++rxr->rx_discarded;
4468                         if (!eop) /* Catch subsequent segs */
4469                                 rxr->discard = TRUE;
4470                         else
4471                                 rxr->discard = FALSE;
4472                         em_rx_discard(rxr, i);
4473                         goto next_desc;
4474                 }
4475                 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4476
4477                 /* Assign correct length to the current fragment */
4478                 mp = rxr->rx_buffers[i].m_head;
4479                 mp->m_len = len;
4480
4481                 /* Trigger for refresh */
4482                 rxr->rx_buffers[i].m_head = NULL;
4483
4484                 /* First segment? */
4485                 if (rxr->fmp == NULL) {
4486                         mp->m_pkthdr.len = len;
4487                         rxr->fmp = rxr->lmp = mp;
4488                 } else {
4489                         /* Chain mbuf's together */
4490                         mp->m_flags &= ~M_PKTHDR;
4491                         rxr->lmp->m_next = mp;
4492                         rxr->lmp = mp;
4493                         rxr->fmp->m_pkthdr.len += len;
4494                 }
4495
4496                 if (eop) {
4497                         --count;
4498                         sendmp = rxr->fmp;
4499                         sendmp->m_pkthdr.rcvif = ifp;
4500                         ifp->if_ipackets++;
4501                         em_receive_checksum(cur, sendmp);
4502 #ifndef __NO_STRICT_ALIGNMENT
4503                         if (adapter->hw.mac.max_frame_size >
4504                             (MCLBYTES - ETHER_ALIGN) &&
4505                             em_fixup_rx(rxr) != 0)
4506                                 goto skip;
4507 #endif
4508                         if (status & E1000_RXD_STAT_VP) {
4509                                 sendmp->m_pkthdr.ether_vtag =
4510                                     le16toh(cur->special);
4511                                 sendmp->m_flags |= M_VLANTAG;
4512                         }
4513 #ifndef __NO_STRICT_ALIGNMENT
4514 skip:
4515 #endif
4516                         rxr->fmp = rxr->lmp = NULL;
4517                 }
4518 next_desc:
4519                 /* Zero out the receive descriptors status. */
4520                 cur->status = 0;
4521                 ++rxdone;       /* cumulative for POLL */
4522                 ++processed;
4523
4524                 /* Advance our pointers to the next descriptor. */
4525                 if (++i == adapter->num_rx_desc)
4526                         i = 0;
4527
4528                 /* Send to the stack */
4529                 if (sendmp != NULL) {
4530                         rxr->next_to_check = i;
4531                         EM_RX_UNLOCK(rxr);
4532                         (*ifp->if_input)(ifp, sendmp);
4533                         EM_RX_LOCK(rxr);
4534                         i = rxr->next_to_check;
4535                 }
4536
4537                 /* Only refresh mbufs every 8 descriptors */
4538                 if (processed == 8) {
4539                         em_refresh_mbufs(rxr, i);
4540                         processed = 0;
4541                 }
4542         }
4543
4544         /* Catch any remaining refresh work */
4545         if (e1000_rx_unrefreshed(rxr))
4546                 em_refresh_mbufs(rxr, i);
4547
4548         rxr->next_to_check = i;
4549         if (done != NULL)
4550                 *done = rxdone;
4551         EM_RX_UNLOCK(rxr);
4552
4553         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4554 }
4555
4556 static __inline void
4557 em_rx_discard(struct rx_ring *rxr, int i)
4558 {
4559         struct em_buffer        *rbuf;
4560
4561         rbuf = &rxr->rx_buffers[i];
4562         bus_dmamap_unload(rxr->rxtag, rbuf->map);
4563
4564         /* Free any previous pieces */
4565         if (rxr->fmp != NULL) {
4566                 rxr->fmp->m_flags |= M_PKTHDR;
4567                 m_freem(rxr->fmp);
4568                 rxr->fmp = NULL;
4569                 rxr->lmp = NULL;
4570         }
4571         /*
4572         ** Free buffer and allow em_refresh_mbufs()
4573         ** to clean up and recharge buffer.
4574         */
4575         if (rbuf->m_head) {
4576                 m_free(rbuf->m_head);
4577                 rbuf->m_head = NULL;
4578         }
4579         return;
4580 }
4581
4582 #ifndef __NO_STRICT_ALIGNMENT
4583 /*
4584  * When jumbo frames are enabled we should realign entire payload on
4585  * architecures with strict alignment. This is serious design mistake of 8254x
4586  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4587  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4588  * payload. On architecures without strict alignment restrictions 8254x still
4589  * performs unaligned memory access which would reduce the performance too.
4590  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4591  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4592  * existing mbuf chain.
4593  *
4594  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4595  * not used at all on architectures with strict alignment.
4596  */
4597 static int
4598 em_fixup_rx(struct rx_ring *rxr)
4599 {
4600         struct adapter *adapter = rxr->adapter;
4601         struct mbuf *m, *n;
4602         int error;
4603
4604         error = 0;
4605         m = rxr->fmp;
4606         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4607                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4608                 m->m_data += ETHER_HDR_LEN;
4609         } else {
4610                 MGETHDR(n, M_NOWAIT, MT_DATA);
4611                 if (n != NULL) {
4612                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4613                         m->m_data += ETHER_HDR_LEN;
4614                         m->m_len -= ETHER_HDR_LEN;
4615                         n->m_len = ETHER_HDR_LEN;
4616                         M_MOVE_PKTHDR(n, m);
4617                         n->m_next = m;
4618                         rxr->fmp = n;
4619                 } else {
4620                         adapter->dropped_pkts++;
4621                         m_freem(rxr->fmp);
4622                         rxr->fmp = NULL;
4623                         error = ENOMEM;
4624                 }
4625         }
4626
4627         return (error);
4628 }
4629 #endif
4630
4631 /*********************************************************************
4632  *
4633  *  Verify that the hardware indicated that the checksum is valid.
4634  *  Inform the stack about the status of checksum so that stack
4635  *  doesn't spend time verifying the checksum.
4636  *
4637  *********************************************************************/
4638 static void
4639 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4640 {
4641         mp->m_pkthdr.csum_flags = 0;
4642
4643         /* Ignore Checksum bit is set */
4644         if (rx_desc->status & E1000_RXD_STAT_IXSM)
4645                 return;
4646
4647         if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4648                 return;
4649
4650         /* IP Checksum Good? */
4651         if (rx_desc->status & E1000_RXD_STAT_IPCS)
4652                 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4653
4654         /* TCP or UDP checksum */
4655         if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4656                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4657                 mp->m_pkthdr.csum_data = htons(0xffff);
4658         }
4659 }
4660
4661 /*
4662  * This routine is run via an vlan
4663  * config EVENT
4664  */
4665 static void
4666 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4667 {
4668         struct adapter  *adapter = ifp->if_softc;
4669         u32             index, bit;
4670
4671         if (ifp->if_softc !=  arg)   /* Not our event */
4672                 return;
4673
4674         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4675                 return;
4676
4677         EM_CORE_LOCK(adapter);
4678         index = (vtag >> 5) & 0x7F;
4679         bit = vtag & 0x1F;
4680         adapter->shadow_vfta[index] |= (1 << bit);
4681         ++adapter->num_vlans;
4682         /* Re-init to load the changes */
4683         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4684                 em_init_locked(adapter);
4685         EM_CORE_UNLOCK(adapter);
4686 }
4687
4688 /*
4689  * This routine is run via an vlan
4690  * unconfig EVENT
4691  */
4692 static void
4693 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4694 {
4695         struct adapter  *adapter = ifp->if_softc;
4696         u32             index, bit;
4697
4698         if (ifp->if_softc !=  arg)
4699                 return;
4700
4701         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4702                 return;
4703
4704         EM_CORE_LOCK(adapter);
4705         index = (vtag >> 5) & 0x7F;
4706         bit = vtag & 0x1F;
4707         adapter->shadow_vfta[index] &= ~(1 << bit);
4708         --adapter->num_vlans;
4709         /* Re-init to load the changes */
4710         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4711                 em_init_locked(adapter);
4712         EM_CORE_UNLOCK(adapter);
4713 }
4714
4715 static void
4716 em_setup_vlan_hw_support(struct adapter *adapter)
4717 {
4718         struct e1000_hw *hw = &adapter->hw;
4719         u32             reg;
4720
4721         /*
4722         ** We get here thru init_locked, meaning
4723         ** a soft reset, this has already cleared
4724         ** the VFTA and other state, so if there
4725         ** have been no vlan's registered do nothing.
4726         */
4727         if (adapter->num_vlans == 0)
4728                 return;
4729
4730         /*
4731         ** A soft reset zero's out the VFTA, so
4732         ** we need to repopulate it now.
4733         */
4734         for (int i = 0; i < EM_VFTA_SIZE; i++)
4735                 if (adapter->shadow_vfta[i] != 0)
4736                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4737                             i, adapter->shadow_vfta[i]);
4738
4739         reg = E1000_READ_REG(hw, E1000_CTRL);
4740         reg |= E1000_CTRL_VME;
4741         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4742
4743         /* Enable the Filter Table */
4744         reg = E1000_READ_REG(hw, E1000_RCTL);
4745         reg &= ~E1000_RCTL_CFIEN;
4746         reg |= E1000_RCTL_VFE;
4747         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4748 }
4749
4750 static void
4751 em_enable_intr(struct adapter *adapter)
4752 {
4753         struct e1000_hw *hw = &adapter->hw;
4754         u32 ims_mask = IMS_ENABLE_MASK;
4755
4756         if (hw->mac.type == e1000_82574) {
4757                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4758                 ims_mask |= EM_MSIX_MASK;
4759         } 
4760         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4761 }
4762
4763 static void
4764 em_disable_intr(struct adapter *adapter)
4765 {
4766         struct e1000_hw *hw = &adapter->hw;
4767
4768         if (hw->mac.type == e1000_82574)
4769                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4770         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4771 }
4772
4773 /*
4774  * Bit of a misnomer, what this really means is
4775  * to enable OS management of the system... aka
4776  * to disable special hardware management features 
4777  */
4778 static void
4779 em_init_manageability(struct adapter *adapter)
4780 {
4781         /* A shared code workaround */
4782 #define E1000_82542_MANC2H E1000_MANC2H
4783         if (adapter->has_manage) {
4784                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4785                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4786
4787                 /* disable hardware interception of ARP */
4788                 manc &= ~(E1000_MANC_ARP_EN);
4789
4790                 /* enable receiving management packets to the host */
4791                 manc |= E1000_MANC_EN_MNG2HOST;
4792 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4793 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4794                 manc2h |= E1000_MNG2HOST_PORT_623;
4795                 manc2h |= E1000_MNG2HOST_PORT_664;
4796                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4797                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4798         }
4799 }
4800
4801 /*
4802  * Give control back to hardware management
4803  * controller if there is one.
4804  */
4805 static void
4806 em_release_manageability(struct adapter *adapter)
4807 {
4808         if (adapter->has_manage) {
4809                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4810
4811                 /* re-enable hardware interception of ARP */
4812                 manc |= E1000_MANC_ARP_EN;
4813                 manc &= ~E1000_MANC_EN_MNG2HOST;
4814
4815                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4816         }
4817 }
4818
4819 /*
4820  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4821  * For ASF and Pass Through versions of f/w this means
4822  * that the driver is loaded. For AMT version type f/w
4823  * this means that the network i/f is open.
4824  */
4825 static void
4826 em_get_hw_control(struct adapter *adapter)
4827 {
4828         u32 ctrl_ext, swsm;
4829
4830         if (adapter->hw.mac.type == e1000_82573) {
4831                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4832                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4833                     swsm | E1000_SWSM_DRV_LOAD);
4834                 return;
4835         }
4836         /* else */
4837         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4838         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4839             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4840         return;
4841 }
4842
4843 /*
4844  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4845  * For ASF and Pass Through versions of f/w this means that
4846  * the driver is no longer loaded. For AMT versions of the
4847  * f/w this means that the network i/f is closed.
4848  */
4849 static void
4850 em_release_hw_control(struct adapter *adapter)
4851 {
4852         u32 ctrl_ext, swsm;
4853
4854         if (!adapter->has_manage)
4855                 return;
4856
4857         if (adapter->hw.mac.type == e1000_82573) {
4858                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4859                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4860                     swsm & ~E1000_SWSM_DRV_LOAD);
4861                 return;
4862         }
4863         /* else */
4864         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4865         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4866             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4867         return;
4868 }
4869
4870 static int
4871 em_is_valid_ether_addr(u8 *addr)
4872 {
4873         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4874
4875         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4876                 return (FALSE);
4877         }
4878
4879         return (TRUE);
4880 }
4881
4882 /*
4883 ** Parse the interface capabilities with regard
4884 ** to both system management and wake-on-lan for
4885 ** later use.
4886 */
4887 static void
4888 em_get_wakeup(device_t dev)
4889 {
4890         struct adapter  *adapter = device_get_softc(dev);
4891         u16             eeprom_data = 0, device_id, apme_mask;
4892
4893         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4894         apme_mask = EM_EEPROM_APME;
4895
4896         switch (adapter->hw.mac.type) {
4897         case e1000_82573:
4898         case e1000_82583:
4899                 adapter->has_amt = TRUE;
4900                 /* Falls thru */
4901         case e1000_82571:
4902         case e1000_82572:
4903         case e1000_80003es2lan:
4904                 if (adapter->hw.bus.func == 1) {
4905                         e1000_read_nvm(&adapter->hw,
4906                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4907                         break;
4908                 } else
4909                         e1000_read_nvm(&adapter->hw,
4910                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4911                 break;
4912         case e1000_ich8lan:
4913         case e1000_ich9lan:
4914         case e1000_ich10lan:
4915         case e1000_pchlan:
4916         case e1000_pch2lan:
4917                 apme_mask = E1000_WUC_APME;
4918                 adapter->has_amt = TRUE;
4919                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4920                 break;
4921         default:
4922                 e1000_read_nvm(&adapter->hw,
4923                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4924                 break;
4925         }
4926         if (eeprom_data & apme_mask)
4927                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4928         /*
4929          * We have the eeprom settings, now apply the special cases
4930          * where the eeprom may be wrong or the board won't support
4931          * wake on lan on a particular port
4932          */
4933         device_id = pci_get_device(dev);
4934         switch (device_id) {
4935         case E1000_DEV_ID_82571EB_FIBER:
4936                 /* Wake events only supported on port A for dual fiber
4937                  * regardless of eeprom setting */
4938                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4939                     E1000_STATUS_FUNC_1)
4940                         adapter->wol = 0;
4941                 break;
4942         case E1000_DEV_ID_82571EB_QUAD_COPPER:
4943         case E1000_DEV_ID_82571EB_QUAD_FIBER:
4944         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4945                 /* if quad port adapter, disable WoL on all but port A */
4946                 if (global_quad_port_a != 0)
4947                         adapter->wol = 0;
4948                 /* Reset for multiple quad port adapters */
4949                 if (++global_quad_port_a == 4)
4950                         global_quad_port_a = 0;
4951                 break;
4952         }
4953         return;
4954 }
4955
4956
4957 /*
4958  * Enable PCI Wake On Lan capability
4959  */
4960 static void
4961 em_enable_wakeup(device_t dev)
4962 {
4963         struct adapter  *adapter = device_get_softc(dev);
4964         struct ifnet    *ifp = adapter->ifp;
4965         u32             pmc, ctrl, ctrl_ext, rctl;
4966         u16             status;
4967
4968         if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4969                 return;
4970
4971         /* Advertise the wakeup capability */
4972         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4973         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4974         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4975         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4976
4977         if ((adapter->hw.mac.type == e1000_ich8lan) ||
4978             (adapter->hw.mac.type == e1000_pchlan) ||
4979             (adapter->hw.mac.type == e1000_ich9lan) ||
4980             (adapter->hw.mac.type == e1000_ich10lan))
4981                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
4982
4983         /* Keep the laser running on Fiber adapters */
4984         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4985             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4986                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4987                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4988                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4989         }
4990
4991         /*
4992         ** Determine type of Wakeup: note that wol
4993         ** is set with all bits on by default.
4994         */
4995         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4996                 adapter->wol &= ~E1000_WUFC_MAG;
4997
4998         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4999                 adapter->wol &= ~E1000_WUFC_MC;
5000         else {
5001                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5002                 rctl |= E1000_RCTL_MPE;
5003                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5004         }
5005
5006         if ((adapter->hw.mac.type == e1000_pchlan) ||
5007             (adapter->hw.mac.type == e1000_pch2lan)) {
5008                 if (em_enable_phy_wakeup(adapter))
5009                         return;
5010         } else {
5011                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5012                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5013         }
5014
5015         if (adapter->hw.phy.type == e1000_phy_igp_3)
5016                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5017
5018         /* Request PME */
5019         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5020         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5021         if (ifp->if_capenable & IFCAP_WOL)
5022                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5023         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5024
5025         return;
5026 }
5027
5028 /*
5029 ** WOL in the newer chipset interfaces (pchlan)
5030 ** require thing to be copied into the phy
5031 */
5032 static int
5033 em_enable_phy_wakeup(struct adapter *adapter)
5034 {
5035         struct e1000_hw *hw = &adapter->hw;
5036         u32 mreg, ret = 0;
5037         u16 preg;
5038
5039         /* copy MAC RARs to PHY RARs */
5040         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5041
5042         /* copy MAC MTA to PHY MTA */
5043         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5044                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5045                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5046                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5047                     (u16)((mreg >> 16) & 0xFFFF));
5048         }
5049
5050         /* configure PHY Rx Control register */
5051         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5052         mreg = E1000_READ_REG(hw, E1000_RCTL);
5053         if (mreg & E1000_RCTL_UPE)
5054                 preg |= BM_RCTL_UPE;
5055         if (mreg & E1000_RCTL_MPE)
5056                 preg |= BM_RCTL_MPE;
5057         preg &= ~(BM_RCTL_MO_MASK);
5058         if (mreg & E1000_RCTL_MO_3)
5059                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5060                                 << BM_RCTL_MO_SHIFT);
5061         if (mreg & E1000_RCTL_BAM)
5062                 preg |= BM_RCTL_BAM;
5063         if (mreg & E1000_RCTL_PMCF)
5064                 preg |= BM_RCTL_PMCF;
5065         mreg = E1000_READ_REG(hw, E1000_CTRL);
5066         if (mreg & E1000_CTRL_RFCE)
5067                 preg |= BM_RCTL_RFCE;
5068         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5069
5070         /* enable PHY wakeup in MAC register */
5071         E1000_WRITE_REG(hw, E1000_WUC,
5072             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5073         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5074
5075         /* configure and enable PHY wakeup in PHY registers */
5076         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5077         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5078
5079         /* activate PHY wakeup */
5080         ret = hw->phy.ops.acquire(hw);
5081         if (ret) {
5082                 printf("Could not acquire PHY\n");
5083                 return ret;
5084         }
5085         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5086                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5087         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5088         if (ret) {
5089                 printf("Could not read PHY page 769\n");
5090                 goto out;
5091         }
5092         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5093         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5094         if (ret)
5095                 printf("Could not set PHY Host Wakeup bit\n");
5096 out:
5097         hw->phy.ops.release(hw);
5098
5099         return ret;
5100 }
5101
5102 static void
5103 em_led_func(void *arg, int onoff)
5104 {
5105         struct adapter  *adapter = arg;
5106  
5107         EM_CORE_LOCK(adapter);
5108         if (onoff) {
5109                 e1000_setup_led(&adapter->hw);
5110                 e1000_led_on(&adapter->hw);
5111         } else {
5112                 e1000_led_off(&adapter->hw);
5113                 e1000_cleanup_led(&adapter->hw);
5114         }
5115         EM_CORE_UNLOCK(adapter);
5116 }
5117
5118 /*
5119 ** Disable the L0S and L1 LINK states
5120 */
5121 static void
5122 em_disable_aspm(struct adapter *adapter)
5123 {
5124         int             base, reg;
5125         u16             link_cap,link_ctrl;
5126         device_t        dev = adapter->dev;
5127
5128         switch (adapter->hw.mac.type) {
5129                 case e1000_82573:
5130                 case e1000_82574:
5131                 case e1000_82583:
5132                         break;
5133                 default:
5134                         return;
5135         }
5136         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5137                 return;
5138         reg = base + PCIER_LINK_CAP;
5139         link_cap = pci_read_config(dev, reg, 2);
5140         if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5141                 return;
5142         reg = base + PCIER_LINK_CTL;
5143         link_ctrl = pci_read_config(dev, reg, 2);
5144         link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5145         pci_write_config(dev, reg, link_ctrl, 2);
5146         return;
5147 }
5148
5149 /**********************************************************************
5150  *
5151  *  Update the board statistics counters.
5152  *
5153  **********************************************************************/
5154 static void
5155 em_update_stats_counters(struct adapter *adapter)
5156 {
5157         struct ifnet   *ifp;
5158
5159         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5160            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5161                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5162                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5163         }
5164         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5165         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5166         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5167         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5168
5169         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5170         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5171         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5172         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5173         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5174         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5175         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5176         /*
5177         ** For watchdog management we need to know if we have been
5178         ** paused during the last interval, so capture that here.
5179         */
5180         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5181         adapter->stats.xoffrxc += adapter->pause_frames;
5182         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5183         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5184         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5185         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5186         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5187         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5188         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5189         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5190         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5191         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5192         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5193         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5194
5195         /* For the 64-bit byte counters the low dword must be read first. */
5196         /* Both registers clear on the read of the high dword */
5197
5198         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5199             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5200         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5201             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5202
5203         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5204         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5205         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5206         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5207         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5208
5209         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5210         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5211
5212         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5213         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5214         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5215         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5216         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5217         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5218         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5219         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5220         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5221         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5222
5223         /* Interrupt Counts */
5224
5225         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5226         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5227         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5228         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5229         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5230         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5231         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5232         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5233         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5234
5235         if (adapter->hw.mac.type >= e1000_82543) {
5236                 adapter->stats.algnerrc += 
5237                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5238                 adapter->stats.rxerrc += 
5239                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5240                 adapter->stats.tncrs += 
5241                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5242                 adapter->stats.cexterr += 
5243                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5244                 adapter->stats.tsctc += 
5245                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5246                 adapter->stats.tsctfc += 
5247                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5248         }
5249         ifp = adapter->ifp;
5250
5251         ifp->if_collisions = adapter->stats.colc;
5252
5253         /* Rx Errors */
5254         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5255             adapter->stats.crcerrs + adapter->stats.algnerrc +
5256             adapter->stats.ruc + adapter->stats.roc +
5257             adapter->stats.mpc + adapter->stats.cexterr;
5258
5259         /* Tx Errors */
5260         ifp->if_oerrors = adapter->stats.ecol +
5261             adapter->stats.latecol + adapter->watchdog_events;
5262 }
5263
5264 /* Export a single 32-bit register via a read-only sysctl. */
5265 static int
5266 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5267 {
5268         struct adapter *adapter;
5269         u_int val;
5270
5271         adapter = oidp->oid_arg1;
5272         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5273         return (sysctl_handle_int(oidp, &val, 0, req));
5274 }
5275
5276 /*
5277  * Add sysctl variables, one per statistic, to the system.
5278  */
5279 static void
5280 em_add_hw_stats(struct adapter *adapter)
5281 {
5282         device_t dev = adapter->dev;
5283
5284         struct tx_ring *txr = adapter->tx_rings;
5285         struct rx_ring *rxr = adapter->rx_rings;
5286
5287         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5288         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5289         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5290         struct e1000_hw_stats *stats = &adapter->stats;
5291
5292         struct sysctl_oid *stat_node, *queue_node, *int_node;
5293         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5294
5295 #define QUEUE_NAME_LEN 32
5296         char namebuf[QUEUE_NAME_LEN];
5297         
5298         /* Driver Statistics */
5299         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5300                         CTLFLAG_RD, &adapter->link_irq,
5301                         "Link MSIX IRQ Handled");
5302         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", 
5303                          CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5304                          "Std mbuf failed");
5305         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
5306                          CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5307                          "Std mbuf cluster failed");
5308         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5309                         CTLFLAG_RD, &adapter->dropped_pkts,
5310                         "Driver dropped packets");
5311         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5312                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5313                         "Driver tx dma failure in xmit");
5314         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5315                         CTLFLAG_RD, &adapter->rx_overruns,
5316                         "RX overruns");
5317         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5318                         CTLFLAG_RD, &adapter->watchdog_events,
5319                         "Watchdog timeouts");
5320         
5321         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5322                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5323                         em_sysctl_reg_handler, "IU",
5324                         "Device Control Register");
5325         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5326                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5327                         em_sysctl_reg_handler, "IU",
5328                         "Receiver Control Register");
5329         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5330                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5331                         "Flow Control High Watermark");
5332         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5333                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5334                         "Flow Control Low Watermark");
5335
5336         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5337                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5338                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5339                                             CTLFLAG_RD, NULL, "Queue Name");
5340                 queue_list = SYSCTL_CHILDREN(queue_node);
5341
5342                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5343                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5344                                 E1000_TDH(txr->me),
5345                                 em_sysctl_reg_handler, "IU",
5346                                 "Transmit Descriptor Head");
5347                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5348                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5349                                 E1000_TDT(txr->me),
5350                                 em_sysctl_reg_handler, "IU",
5351                                 "Transmit Descriptor Tail");
5352                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5353                                 CTLFLAG_RD, &txr->tx_irq,
5354                                 "Queue MSI-X Transmit Interrupts");
5355                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5356                                 CTLFLAG_RD, &txr->no_desc_avail,
5357                                 "Queue No Descriptor Available");
5358                 
5359                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5360                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5361                                 E1000_RDH(rxr->me),
5362                                 em_sysctl_reg_handler, "IU",
5363                                 "Receive Descriptor Head");
5364                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5365                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5366                                 E1000_RDT(rxr->me),
5367                                 em_sysctl_reg_handler, "IU",
5368                                 "Receive Descriptor Tail");
5369                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5370                                 CTLFLAG_RD, &rxr->rx_irq,
5371                                 "Queue MSI-X Receive Interrupts");
5372         }
5373
5374         /* MAC stats get their own sub node */
5375
5376         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5377                                     CTLFLAG_RD, NULL, "Statistics");
5378         stat_list = SYSCTL_CHILDREN(stat_node);
5379
5380         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5381                         CTLFLAG_RD, &stats->ecol,
5382                         "Excessive collisions");
5383         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5384                         CTLFLAG_RD, &stats->scc,
5385                         "Single collisions");
5386         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5387                         CTLFLAG_RD, &stats->mcc,
5388                         "Multiple collisions");
5389         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5390                         CTLFLAG_RD, &stats->latecol,
5391                         "Late collisions");
5392         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5393                         CTLFLAG_RD, &stats->colc,
5394                         "Collision Count");
5395         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5396                         CTLFLAG_RD, &adapter->stats.symerrs,
5397                         "Symbol Errors");
5398         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5399                         CTLFLAG_RD, &adapter->stats.sec,
5400                         "Sequence Errors");
5401         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5402                         CTLFLAG_RD, &adapter->stats.dc,
5403                         "Defer Count");
5404         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5405                         CTLFLAG_RD, &adapter->stats.mpc,
5406                         "Missed Packets");
5407         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5408                         CTLFLAG_RD, &adapter->stats.rnbc,
5409                         "Receive No Buffers");
5410         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5411                         CTLFLAG_RD, &adapter->stats.ruc,
5412                         "Receive Undersize");
5413         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5414                         CTLFLAG_RD, &adapter->stats.rfc,
5415                         "Fragmented Packets Received ");
5416         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5417                         CTLFLAG_RD, &adapter->stats.roc,
5418                         "Oversized Packets Received");
5419         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5420                         CTLFLAG_RD, &adapter->stats.rjc,
5421                         "Recevied Jabber");
5422         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5423                         CTLFLAG_RD, &adapter->stats.rxerrc,
5424                         "Receive Errors");
5425         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5426                         CTLFLAG_RD, &adapter->stats.crcerrs,
5427                         "CRC errors");
5428         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5429                         CTLFLAG_RD, &adapter->stats.algnerrc,
5430                         "Alignment Errors");
5431         /* On 82575 these are collision counts */
5432         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5433                         CTLFLAG_RD, &adapter->stats.cexterr,
5434                         "Collision/Carrier extension errors");
5435         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5436                         CTLFLAG_RD, &adapter->stats.xonrxc,
5437                         "XON Received");
5438         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5439                         CTLFLAG_RD, &adapter->stats.xontxc,
5440                         "XON Transmitted");
5441         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5442                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5443                         "XOFF Received");
5444         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5445                         CTLFLAG_RD, &adapter->stats.xofftxc,
5446                         "XOFF Transmitted");
5447
5448         /* Packet Reception Stats */
5449         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5450                         CTLFLAG_RD, &adapter->stats.tpr,
5451                         "Total Packets Received ");
5452         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5453                         CTLFLAG_RD, &adapter->stats.gprc,
5454                         "Good Packets Received");
5455         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5456                         CTLFLAG_RD, &adapter->stats.bprc,
5457                         "Broadcast Packets Received");
5458         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5459                         CTLFLAG_RD, &adapter->stats.mprc,
5460                         "Multicast Packets Received");
5461         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5462                         CTLFLAG_RD, &adapter->stats.prc64,
5463                         "64 byte frames received ");
5464         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5465                         CTLFLAG_RD, &adapter->stats.prc127,
5466                         "65-127 byte frames received");
5467         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5468                         CTLFLAG_RD, &adapter->stats.prc255,
5469                         "128-255 byte frames received");
5470         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5471                         CTLFLAG_RD, &adapter->stats.prc511,
5472                         "256-511 byte frames received");
5473         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5474                         CTLFLAG_RD, &adapter->stats.prc1023,
5475                         "512-1023 byte frames received");
5476         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5477                         CTLFLAG_RD, &adapter->stats.prc1522,
5478                         "1023-1522 byte frames received");
5479         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5480                         CTLFLAG_RD, &adapter->stats.gorc, 
5481                         "Good Octets Received"); 
5482
5483         /* Packet Transmission Stats */
5484         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5485                         CTLFLAG_RD, &adapter->stats.gotc, 
5486                         "Good Octets Transmitted"); 
5487         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5488                         CTLFLAG_RD, &adapter->stats.tpt,
5489                         "Total Packets Transmitted");
5490         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5491                         CTLFLAG_RD, &adapter->stats.gptc,
5492                         "Good Packets Transmitted");
5493         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5494                         CTLFLAG_RD, &adapter->stats.bptc,
5495                         "Broadcast Packets Transmitted");
5496         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5497                         CTLFLAG_RD, &adapter->stats.mptc,
5498                         "Multicast Packets Transmitted");
5499         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5500                         CTLFLAG_RD, &adapter->stats.ptc64,
5501                         "64 byte frames transmitted ");
5502         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5503                         CTLFLAG_RD, &adapter->stats.ptc127,
5504                         "65-127 byte frames transmitted");
5505         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5506                         CTLFLAG_RD, &adapter->stats.ptc255,
5507                         "128-255 byte frames transmitted");
5508         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5509                         CTLFLAG_RD, &adapter->stats.ptc511,
5510                         "256-511 byte frames transmitted");
5511         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5512                         CTLFLAG_RD, &adapter->stats.ptc1023,
5513                         "512-1023 byte frames transmitted");
5514         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5515                         CTLFLAG_RD, &adapter->stats.ptc1522,
5516                         "1024-1522 byte frames transmitted");
5517         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5518                         CTLFLAG_RD, &adapter->stats.tsctc,
5519                         "TSO Contexts Transmitted");
5520         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5521                         CTLFLAG_RD, &adapter->stats.tsctfc,
5522                         "TSO Contexts Failed");
5523
5524
5525         /* Interrupt Stats */
5526
5527         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5528                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5529         int_list = SYSCTL_CHILDREN(int_node);
5530
5531         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5532                         CTLFLAG_RD, &adapter->stats.iac,
5533                         "Interrupt Assertion Count");
5534
5535         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5536                         CTLFLAG_RD, &adapter->stats.icrxptc,
5537                         "Interrupt Cause Rx Pkt Timer Expire Count");
5538
5539         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5540                         CTLFLAG_RD, &adapter->stats.icrxatc,
5541                         "Interrupt Cause Rx Abs Timer Expire Count");
5542
5543         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5544                         CTLFLAG_RD, &adapter->stats.ictxptc,
5545                         "Interrupt Cause Tx Pkt Timer Expire Count");
5546
5547         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5548                         CTLFLAG_RD, &adapter->stats.ictxatc,
5549                         "Interrupt Cause Tx Abs Timer Expire Count");
5550
5551         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5552                         CTLFLAG_RD, &adapter->stats.ictxqec,
5553                         "Interrupt Cause Tx Queue Empty Count");
5554
5555         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5556                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5557                         "Interrupt Cause Tx Queue Min Thresh Count");
5558
5559         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5560                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5561                         "Interrupt Cause Rx Desc Min Thresh Count");
5562
5563         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5564                         CTLFLAG_RD, &adapter->stats.icrxoc,
5565                         "Interrupt Cause Receiver Overrun Count");
5566 }
5567
5568 /**********************************************************************
5569  *
5570  *  This routine provides a way to dump out the adapter eeprom,
5571  *  often a useful debug/service tool. This only dumps the first
5572  *  32 words, stuff that matters is in that extent.
5573  *
5574  **********************************************************************/
5575 static int
5576 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5577 {
5578         struct adapter *adapter = (struct adapter *)arg1;
5579         int error;
5580         int result;
5581
5582         result = -1;
5583         error = sysctl_handle_int(oidp, &result, 0, req);
5584
5585         if (error || !req->newptr)
5586                 return (error);
5587
5588         /*
5589          * This value will cause a hex dump of the
5590          * first 32 16-bit words of the EEPROM to
5591          * the screen.
5592          */
5593         if (result == 1)
5594                 em_print_nvm_info(adapter);
5595
5596         return (error);
5597 }
5598
5599 static void
5600 em_print_nvm_info(struct adapter *adapter)
5601 {
5602         u16     eeprom_data;
5603         int     i, j, row = 0;
5604
5605         /* Its a bit crude, but it gets the job done */
5606         printf("\nInterface EEPROM Dump:\n");
5607         printf("Offset\n0x0000  ");
5608         for (i = 0, j = 0; i < 32; i++, j++) {
5609                 if (j == 8) { /* Make the offset block */
5610                         j = 0; ++row;
5611                         printf("\n0x00%x0  ",row);
5612                 }
5613                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5614                 printf("%04x ", eeprom_data);
5615         }
5616         printf("\n");
5617 }
5618
5619 static int
5620 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5621 {
5622         struct em_int_delay_info *info;
5623         struct adapter *adapter;
5624         u32 regval;
5625         int error, usecs, ticks;
5626
5627         info = (struct em_int_delay_info *)arg1;
5628         usecs = info->value;
5629         error = sysctl_handle_int(oidp, &usecs, 0, req);
5630         if (error != 0 || req->newptr == NULL)
5631                 return (error);
5632         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5633                 return (EINVAL);
5634         info->value = usecs;
5635         ticks = EM_USECS_TO_TICKS(usecs);
5636         if (info->offset == E1000_ITR)  /* units are 256ns here */
5637                 ticks *= 4;
5638
5639         adapter = info->adapter;
5640         
5641         EM_CORE_LOCK(adapter);
5642         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5643         regval = (regval & ~0xffff) | (ticks & 0xffff);
5644         /* Handle a few special cases. */
5645         switch (info->offset) {
5646         case E1000_RDTR:
5647                 break;
5648         case E1000_TIDV:
5649                 if (ticks == 0) {
5650                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5651                         /* Don't write 0 into the TIDV register. */
5652                         regval++;
5653                 } else
5654                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5655                 break;
5656         }
5657         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5658         EM_CORE_UNLOCK(adapter);
5659         return (0);
5660 }
5661
5662 static void
5663 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5664         const char *description, struct em_int_delay_info *info,
5665         int offset, int value)
5666 {
5667         info->adapter = adapter;
5668         info->offset = offset;
5669         info->value = value;
5670         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5671             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5672             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5673             info, 0, em_sysctl_int_delay, "I", description);
5674 }
5675
5676 static void
5677 em_set_sysctl_value(struct adapter *adapter, const char *name,
5678         const char *description, int *limit, int value)
5679 {
5680         *limit = value;
5681         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5682             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5683             OID_AUTO, name, CTLFLAG_RW, limit, value, description);
5684 }
5685
5686
5687 /*
5688 ** Set flow control using sysctl:
5689 ** Flow control values:
5690 **      0 - off
5691 **      1 - rx pause
5692 **      2 - tx pause
5693 **      3 - full
5694 */
5695 static int
5696 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5697 {       
5698         int             error;
5699         static int      input = 3; /* default is full */
5700         struct adapter  *adapter = (struct adapter *) arg1;
5701                     
5702         error = sysctl_handle_int(oidp, &input, 0, req);
5703     
5704         if ((error) || (req->newptr == NULL))
5705                 return (error);
5706                 
5707         if (input == adapter->fc) /* no change? */
5708                 return (error);
5709
5710         switch (input) {
5711                 case e1000_fc_rx_pause:
5712                 case e1000_fc_tx_pause:
5713                 case e1000_fc_full:
5714                 case e1000_fc_none:
5715                         adapter->hw.fc.requested_mode = input;
5716                         adapter->fc = input;
5717                         break;
5718                 default:
5719                         /* Do nothing */
5720                         return (error);
5721         }
5722
5723         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5724         e1000_force_mac_fc(&adapter->hw);
5725         return (error);
5726 }
5727
5728 /*
5729 ** Manage Energy Efficient Ethernet:
5730 ** Control values:
5731 **     0/1 - enabled/disabled
5732 */
5733 static int
5734 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5735 {
5736        struct adapter *adapter = (struct adapter *) arg1;
5737        int             error, value;
5738
5739        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5740        error = sysctl_handle_int(oidp, &value, 0, req);
5741        if (error || req->newptr == NULL)
5742                return (error);
5743        EM_CORE_LOCK(adapter);
5744        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5745        em_init_locked(adapter);
5746        EM_CORE_UNLOCK(adapter);
5747        return (0);
5748 }
5749
5750 static int
5751 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5752 {
5753         struct adapter *adapter;
5754         int error;
5755         int result;
5756
5757         result = -1;
5758         error = sysctl_handle_int(oidp, &result, 0, req);
5759
5760         if (error || !req->newptr)
5761                 return (error);
5762
5763         if (result == 1) {
5764                 adapter = (struct adapter *)arg1;
5765                 em_print_debug_info(adapter);
5766         }
5767
5768         return (error);
5769 }
5770
5771 /*
5772 ** This routine is meant to be fluid, add whatever is
5773 ** needed for debugging a problem.  -jfv
5774 */
5775 static void
5776 em_print_debug_info(struct adapter *adapter)
5777 {
5778         device_t dev = adapter->dev;
5779         struct tx_ring *txr = adapter->tx_rings;
5780         struct rx_ring *rxr = adapter->rx_rings;
5781
5782         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5783                 printf("Interface is RUNNING ");
5784         else
5785                 printf("Interface is NOT RUNNING\n");
5786
5787         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5788                 printf("and INACTIVE\n");
5789         else
5790                 printf("and ACTIVE\n");
5791
5792         device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5793             E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5794             E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5795         device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5796             E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5797             E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5798         device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5799         device_printf(dev, "TX descriptors avail = %d\n",
5800             txr->tx_avail);
5801         device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5802             txr->no_desc_avail);
5803         device_printf(dev, "RX discarded packets = %ld\n",
5804             rxr->rx_discarded);
5805         device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5806         device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5807 }