]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/e1000/if_em.c
Make the various driver MSIX setup routines fallback to MSI more
[FreeBSD/FreeBSD.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2013, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37
38 #ifdef HAVE_KERNEL_OPTION_HEADERS
39 #include "opt_device_polling.h"
40 #endif
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #if __FreeBSD_version >= 800000
45 #include <sys/buf_ring.h>
46 #endif
47 #include <sys/bus.h>
48 #include <sys/endian.h>
49 #include <sys/kernel.h>
50 #include <sys/kthread.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/rman.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/taskqueue.h>
59 #include <sys/eventhandler.h>
60 #include <machine/bus.h>
61 #include <machine/resource.h>
62
63 #include <net/bpf.h>
64 #include <net/ethernet.h>
65 #include <net/if.h>
66 #include <net/if_arp.h>
67 #include <net/if_dl.h>
68 #include <net/if_media.h>
69
70 #include <net/if_types.h>
71 #include <net/if_vlan_var.h>
72
73 #include <netinet/in_systm.h>
74 #include <netinet/in.h>
75 #include <netinet/if_ether.h>
76 #include <netinet/ip.h>
77 #include <netinet/ip6.h>
78 #include <netinet/tcp.h>
79 #include <netinet/udp.h>
80
81 #include <machine/in_cksum.h>
82 #include <dev/led/led.h>
83 #include <dev/pci/pcivar.h>
84 #include <dev/pci/pcireg.h>
85
86 #include "e1000_api.h"
87 #include "e1000_82571.h"
88 #include "if_em.h"
89
90 /*********************************************************************
91  *  Set this to one to display debug statistics
92  *********************************************************************/
93 int     em_display_debug_stats = 0;
94
95 /*********************************************************************
96  *  Driver version:
97  *********************************************************************/
98 char em_driver_version[] = "7.3.8";
99
100 /*********************************************************************
101  *  PCI Device ID Table
102  *
103  *  Used by probe to select devices to load on
104  *  Last field stores an index into e1000_strings
105  *  Last entry must be all 0s
106  *
107  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
108  *********************************************************************/
109
110 static em_vendor_info_t em_vendor_info_array[] =
111 {
112         /* Intel(R) PRO/1000 Network Connection */
113         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
114         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
115         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
116         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
117                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
119                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
123                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
125                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
127                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
132
133         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
138                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
140                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
141         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
142                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
143         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
144                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
175         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
176         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
177         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
178         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
179                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
180         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
181                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
182         /* required last entry */
183         { 0, 0, 0, 0, 0}
184 };
185
186 /*********************************************************************
187  *  Table of branding strings for all supported NICs.
188  *********************************************************************/
189
190 static char *em_strings[] = {
191         "Intel(R) PRO/1000 Network Connection"
192 };
193
194 /*********************************************************************
195  *  Function prototypes
196  *********************************************************************/
197 static int      em_probe(device_t);
198 static int      em_attach(device_t);
199 static int      em_detach(device_t);
200 static int      em_shutdown(device_t);
201 static int      em_suspend(device_t);
202 static int      em_resume(device_t);
203 #ifdef EM_MULTIQUEUE
204 static int      em_mq_start(struct ifnet *, struct mbuf *);
205 static int      em_mq_start_locked(struct ifnet *,
206                     struct tx_ring *, struct mbuf *);
207 static void     em_qflush(struct ifnet *);
208 #else
209 static void     em_start(struct ifnet *);
210 static void     em_start_locked(struct ifnet *, struct tx_ring *);
211 #endif
212 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
213 static void     em_init(void *);
214 static void     em_init_locked(struct adapter *);
215 static void     em_stop(void *);
216 static void     em_media_status(struct ifnet *, struct ifmediareq *);
217 static int      em_media_change(struct ifnet *);
218 static void     em_identify_hardware(struct adapter *);
219 static int      em_allocate_pci_resources(struct adapter *);
220 static int      em_allocate_legacy(struct adapter *);
221 static int      em_allocate_msix(struct adapter *);
222 static int      em_allocate_queues(struct adapter *);
223 static int      em_setup_msix(struct adapter *);
224 static void     em_free_pci_resources(struct adapter *);
225 static void     em_local_timer(void *);
226 static void     em_reset(struct adapter *);
227 static int      em_setup_interface(device_t, struct adapter *);
228
229 static void     em_setup_transmit_structures(struct adapter *);
230 static void     em_initialize_transmit_unit(struct adapter *);
231 static int      em_allocate_transmit_buffers(struct tx_ring *);
232 static void     em_free_transmit_structures(struct adapter *);
233 static void     em_free_transmit_buffers(struct tx_ring *);
234
235 static int      em_setup_receive_structures(struct adapter *);
236 static int      em_allocate_receive_buffers(struct rx_ring *);
237 static void     em_initialize_receive_unit(struct adapter *);
238 static void     em_free_receive_structures(struct adapter *);
239 static void     em_free_receive_buffers(struct rx_ring *);
240
241 static void     em_enable_intr(struct adapter *);
242 static void     em_disable_intr(struct adapter *);
243 static void     em_update_stats_counters(struct adapter *);
244 static void     em_add_hw_stats(struct adapter *adapter);
245 static void     em_txeof(struct tx_ring *);
246 static bool     em_rxeof(struct rx_ring *, int, int *);
247 #ifndef __NO_STRICT_ALIGNMENT
248 static int      em_fixup_rx(struct rx_ring *);
249 #endif
250 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
251 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
252                     struct ip *, u32 *, u32 *);
253 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
254                     struct tcphdr *, u32 *, u32 *);
255 static void     em_set_promisc(struct adapter *);
256 static void     em_disable_promisc(struct adapter *);
257 static void     em_set_multi(struct adapter *);
258 static void     em_update_link_status(struct adapter *);
259 static void     em_refresh_mbufs(struct rx_ring *, int);
260 static void     em_register_vlan(void *, struct ifnet *, u16);
261 static void     em_unregister_vlan(void *, struct ifnet *, u16);
262 static void     em_setup_vlan_hw_support(struct adapter *);
263 static int      em_xmit(struct tx_ring *, struct mbuf **);
264 static int      em_dma_malloc(struct adapter *, bus_size_t,
265                     struct em_dma_alloc *, int);
266 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
267 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
268 static void     em_print_nvm_info(struct adapter *);
269 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
270 static void     em_print_debug_info(struct adapter *);
271 static int      em_is_valid_ether_addr(u8 *);
272 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
273 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
274                     const char *, struct em_int_delay_info *, int, int);
275 /* Management and WOL Support */
276 static void     em_init_manageability(struct adapter *);
277 static void     em_release_manageability(struct adapter *);
278 static void     em_get_hw_control(struct adapter *);
279 static void     em_release_hw_control(struct adapter *);
280 static void     em_get_wakeup(device_t);
281 static void     em_enable_wakeup(device_t);
282 static int      em_enable_phy_wakeup(struct adapter *);
283 static void     em_led_func(void *, int);
284 static void     em_disable_aspm(struct adapter *);
285
286 static int      em_irq_fast(void *);
287
288 /* MSIX handlers */
289 static void     em_msix_tx(void *);
290 static void     em_msix_rx(void *);
291 static void     em_msix_link(void *);
292 static void     em_handle_tx(void *context, int pending);
293 static void     em_handle_rx(void *context, int pending);
294 static void     em_handle_link(void *context, int pending);
295
296 static void     em_set_sysctl_value(struct adapter *, const char *,
297                     const char *, int *, int);
298 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
299 static int      em_sysctl_eee(SYSCTL_HANDLER_ARGS);
300
301 static __inline void em_rx_discard(struct rx_ring *, int);
302
303 #ifdef DEVICE_POLLING
304 static poll_handler_t em_poll;
305 #endif /* POLLING */
306
307 /*********************************************************************
308  *  FreeBSD Device Interface Entry Points
309  *********************************************************************/
310
311 static device_method_t em_methods[] = {
312         /* Device interface */
313         DEVMETHOD(device_probe, em_probe),
314         DEVMETHOD(device_attach, em_attach),
315         DEVMETHOD(device_detach, em_detach),
316         DEVMETHOD(device_shutdown, em_shutdown),
317         DEVMETHOD(device_suspend, em_suspend),
318         DEVMETHOD(device_resume, em_resume),
319         DEVMETHOD_END
320 };
321
322 static driver_t em_driver = {
323         "em", em_methods, sizeof(struct adapter),
324 };
325
326 devclass_t em_devclass;
327 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
328 MODULE_DEPEND(em, pci, 1, 1, 1);
329 MODULE_DEPEND(em, ether, 1, 1, 1);
330
331 /*********************************************************************
332  *  Tunable default values.
333  *********************************************************************/
334
335 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
336 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
337 #define M_TSO_LEN                       66
338
339 #define MAX_INTS_PER_SEC        8000
340 #define DEFAULT_ITR             (1000000000/(MAX_INTS_PER_SEC * 256))
341
342 /* Allow common code without TSO */
343 #ifndef CSUM_TSO
344 #define CSUM_TSO        0
345 #endif
346
347 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
348
349 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
350 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
351 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
352 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
353 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
354     0, "Default transmit interrupt delay in usecs");
355 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
356     0, "Default receive interrupt delay in usecs");
357
358 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
359 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
360 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
361 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
362 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
363     &em_tx_abs_int_delay_dflt, 0,
364     "Default transmit interrupt delay limit in usecs");
365 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
366     &em_rx_abs_int_delay_dflt, 0,
367     "Default receive interrupt delay limit in usecs");
368
369 static int em_rxd = EM_DEFAULT_RXD;
370 static int em_txd = EM_DEFAULT_TXD;
371 TUNABLE_INT("hw.em.rxd", &em_rxd);
372 TUNABLE_INT("hw.em.txd", &em_txd);
373 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
374     "Number of receive descriptors per queue");
375 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
376     "Number of transmit descriptors per queue");
377
378 static int em_smart_pwr_down = FALSE;
379 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
380 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
381     0, "Set to true to leave smart power down enabled on newer adapters");
382
383 /* Controls whether promiscuous also shows bad packets */
384 static int em_debug_sbp = FALSE;
385 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
386 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
387     "Show bad packets in promiscuous mode");
388
389 static int em_enable_msix = TRUE;
390 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
391 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
392     "Enable MSI-X interrupts");
393
394 /* How many packets rxeof tries to clean at a time */
395 static int em_rx_process_limit = 100;
396 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
397 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
398     &em_rx_process_limit, 0,
399     "Maximum number of received packets to process "
400     "at a time, -1 means unlimited");
401
402 /* Energy efficient ethernet - default to OFF */
403 static int eee_setting = 1;
404 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
405 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
406     "Enable Energy Efficient Ethernet");
407
408 /* Global used in WOL setup with multiport cards */
409 static int global_quad_port_a = 0;
410
411 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
412 #include <dev/netmap/if_em_netmap.h>
413 #endif /* DEV_NETMAP */
414
415 /*********************************************************************
416  *  Device identification routine
417  *
418  *  em_probe determines if the driver should be loaded on
419  *  adapter based on PCI vendor/device id of the adapter.
420  *
421  *  return BUS_PROBE_DEFAULT on success, positive on failure
422  *********************************************************************/
423
424 static int
425 em_probe(device_t dev)
426 {
427         char            adapter_name[60];
428         u16             pci_vendor_id = 0;
429         u16             pci_device_id = 0;
430         u16             pci_subvendor_id = 0;
431         u16             pci_subdevice_id = 0;
432         em_vendor_info_t *ent;
433
434         INIT_DEBUGOUT("em_probe: begin");
435
436         pci_vendor_id = pci_get_vendor(dev);
437         if (pci_vendor_id != EM_VENDOR_ID)
438                 return (ENXIO);
439
440         pci_device_id = pci_get_device(dev);
441         pci_subvendor_id = pci_get_subvendor(dev);
442         pci_subdevice_id = pci_get_subdevice(dev);
443
444         ent = em_vendor_info_array;
445         while (ent->vendor_id != 0) {
446                 if ((pci_vendor_id == ent->vendor_id) &&
447                     (pci_device_id == ent->device_id) &&
448
449                     ((pci_subvendor_id == ent->subvendor_id) ||
450                     (ent->subvendor_id == PCI_ANY_ID)) &&
451
452                     ((pci_subdevice_id == ent->subdevice_id) ||
453                     (ent->subdevice_id == PCI_ANY_ID))) {
454                         sprintf(adapter_name, "%s %s",
455                                 em_strings[ent->index],
456                                 em_driver_version);
457                         device_set_desc_copy(dev, adapter_name);
458                         return (BUS_PROBE_DEFAULT);
459                 }
460                 ent++;
461         }
462
463         return (ENXIO);
464 }
465
466 /*********************************************************************
467  *  Device initialization routine
468  *
469  *  The attach entry point is called when the driver is being loaded.
470  *  This routine identifies the type of hardware, allocates all resources
471  *  and initializes the hardware.
472  *
473  *  return 0 on success, positive on failure
474  *********************************************************************/
475
476 static int
477 em_attach(device_t dev)
478 {
479         struct adapter  *adapter;
480         struct e1000_hw *hw;
481         int             error = 0;
482
483         INIT_DEBUGOUT("em_attach: begin");
484
485         if (resource_disabled("em", device_get_unit(dev))) {
486                 device_printf(dev, "Disabled by device hint\n");
487                 return (ENXIO);
488         }
489
490         adapter = device_get_softc(dev);
491         adapter->dev = adapter->osdep.dev = dev;
492         hw = &adapter->hw;
493         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
494
495         /* SYSCTL stuff */
496         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
497             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
498             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
499             em_sysctl_nvm_info, "I", "NVM Information");
500
501         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
502             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
503             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
504             em_sysctl_debug_info, "I", "Debug Information");
505
506         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
507             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
508             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
509             em_set_flowcntl, "I", "Flow Control");
510
511         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
512
513         /* Determine hardware and mac info */
514         em_identify_hardware(adapter);
515
516         /* Setup PCI resources */
517         if (em_allocate_pci_resources(adapter)) {
518                 device_printf(dev, "Allocation of PCI resources failed\n");
519                 error = ENXIO;
520                 goto err_pci;
521         }
522
523         /*
524         ** For ICH8 and family we need to
525         ** map the flash memory, and this
526         ** must happen after the MAC is 
527         ** identified
528         */
529         if ((hw->mac.type == e1000_ich8lan) ||
530             (hw->mac.type == e1000_ich9lan) ||
531             (hw->mac.type == e1000_ich10lan) ||
532             (hw->mac.type == e1000_pchlan) ||
533             (hw->mac.type == e1000_pch2lan) ||
534             (hw->mac.type == e1000_pch_lpt)) {
535                 int rid = EM_BAR_TYPE_FLASH;
536                 adapter->flash = bus_alloc_resource_any(dev,
537                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
538                 if (adapter->flash == NULL) {
539                         device_printf(dev, "Mapping of Flash failed\n");
540                         error = ENXIO;
541                         goto err_pci;
542                 }
543                 /* This is used in the shared code */
544                 hw->flash_address = (u8 *)adapter->flash;
545                 adapter->osdep.flash_bus_space_tag =
546                     rman_get_bustag(adapter->flash);
547                 adapter->osdep.flash_bus_space_handle =
548                     rman_get_bushandle(adapter->flash);
549         }
550
551         /* Do Shared Code initialization */
552         if (e1000_setup_init_funcs(hw, TRUE)) {
553                 device_printf(dev, "Setup of Shared code failed\n");
554                 error = ENXIO;
555                 goto err_pci;
556         }
557
558         e1000_get_bus_info(hw);
559
560         /* Set up some sysctls for the tunable interrupt delays */
561         em_add_int_delay_sysctl(adapter, "rx_int_delay",
562             "receive interrupt delay in usecs", &adapter->rx_int_delay,
563             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
564         em_add_int_delay_sysctl(adapter, "tx_int_delay",
565             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
566             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
567         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
568             "receive interrupt delay limit in usecs",
569             &adapter->rx_abs_int_delay,
570             E1000_REGISTER(hw, E1000_RADV),
571             em_rx_abs_int_delay_dflt);
572         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
573             "transmit interrupt delay limit in usecs",
574             &adapter->tx_abs_int_delay,
575             E1000_REGISTER(hw, E1000_TADV),
576             em_tx_abs_int_delay_dflt);
577         em_add_int_delay_sysctl(adapter, "itr",
578             "interrupt delay limit in usecs/4",
579             &adapter->tx_itr,
580             E1000_REGISTER(hw, E1000_ITR),
581             DEFAULT_ITR);
582
583         /* Sysctl for limiting the amount of work done in the taskqueue */
584         em_set_sysctl_value(adapter, "rx_processing_limit",
585             "max number of rx packets to process", &adapter->rx_process_limit,
586             em_rx_process_limit);
587
588         /*
589          * Validate number of transmit and receive descriptors. It
590          * must not exceed hardware maximum, and must be multiple
591          * of E1000_DBA_ALIGN.
592          */
593         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
594             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
595                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
596                     EM_DEFAULT_TXD, em_txd);
597                 adapter->num_tx_desc = EM_DEFAULT_TXD;
598         } else
599                 adapter->num_tx_desc = em_txd;
600
601         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
602             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
603                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
604                     EM_DEFAULT_RXD, em_rxd);
605                 adapter->num_rx_desc = EM_DEFAULT_RXD;
606         } else
607                 adapter->num_rx_desc = em_rxd;
608
609         hw->mac.autoneg = DO_AUTO_NEG;
610         hw->phy.autoneg_wait_to_complete = FALSE;
611         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
612
613         /* Copper options */
614         if (hw->phy.media_type == e1000_media_type_copper) {
615                 hw->phy.mdix = AUTO_ALL_MODES;
616                 hw->phy.disable_polarity_correction = FALSE;
617                 hw->phy.ms_type = EM_MASTER_SLAVE;
618         }
619
620         /*
621          * Set the frame limits assuming
622          * standard ethernet sized frames.
623          */
624         adapter->hw.mac.max_frame_size =
625             ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
626
627         /*
628          * This controls when hardware reports transmit completion
629          * status.
630          */
631         hw->mac.report_tx_early = 1;
632
633         /* 
634         ** Get queue/ring memory
635         */
636         if (em_allocate_queues(adapter)) {
637                 error = ENOMEM;
638                 goto err_pci;
639         }
640
641         /* Allocate multicast array memory. */
642         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
643             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
644         if (adapter->mta == NULL) {
645                 device_printf(dev, "Can not allocate multicast setup array\n");
646                 error = ENOMEM;
647                 goto err_late;
648         }
649
650         /* Check SOL/IDER usage */
651         if (e1000_check_reset_block(hw))
652                 device_printf(dev, "PHY reset is blocked"
653                     " due to SOL/IDER session.\n");
654
655         /* Sysctl for setting Energy Efficient Ethernet */
656         hw->dev_spec.ich8lan.eee_disable = eee_setting;
657         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
658             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
659             OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
660             adapter, 0, em_sysctl_eee, "I",
661             "Disable Energy Efficient Ethernet");
662
663         /*
664         ** Start from a known state, this is
665         ** important in reading the nvm and
666         ** mac from that.
667         */
668         e1000_reset_hw(hw);
669
670
671         /* Make sure we have a good EEPROM before we read from it */
672         if (e1000_validate_nvm_checksum(hw) < 0) {
673                 /*
674                 ** Some PCI-E parts fail the first check due to
675                 ** the link being in sleep state, call it again,
676                 ** if it fails a second time its a real issue.
677                 */
678                 if (e1000_validate_nvm_checksum(hw) < 0) {
679                         device_printf(dev,
680                             "The EEPROM Checksum Is Not Valid\n");
681                         error = EIO;
682                         goto err_late;
683                 }
684         }
685
686         /* Copy the permanent MAC address out of the EEPROM */
687         if (e1000_read_mac_addr(hw) < 0) {
688                 device_printf(dev, "EEPROM read error while reading MAC"
689                     " address\n");
690                 error = EIO;
691                 goto err_late;
692         }
693
694         if (!em_is_valid_ether_addr(hw->mac.addr)) {
695                 device_printf(dev, "Invalid MAC address\n");
696                 error = EIO;
697                 goto err_late;
698         }
699
700         /*
701         **  Do interrupt configuration
702         */
703         if (adapter->msix > 1) /* Do MSIX */
704                 error = em_allocate_msix(adapter);
705         else  /* MSI or Legacy */
706                 error = em_allocate_legacy(adapter);
707         if (error)
708                 goto err_late;
709
710         /*
711          * Get Wake-on-Lan and Management info for later use
712          */
713         em_get_wakeup(dev);
714
715         /* Setup OS specific network interface */
716         if (em_setup_interface(dev, adapter) != 0)
717                 goto err_late;
718
719         em_reset(adapter);
720
721         /* Initialize statistics */
722         em_update_stats_counters(adapter);
723
724         hw->mac.get_link_status = 1;
725         em_update_link_status(adapter);
726
727         /* Register for VLAN events */
728         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
729             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
730         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
731             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
732
733         em_add_hw_stats(adapter);
734
735         /* Non-AMT based hardware can now take control from firmware */
736         if (adapter->has_manage && !adapter->has_amt)
737                 em_get_hw_control(adapter);
738
739         /* Tell the stack that the interface is not active */
740         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
741         adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
742
743         adapter->led_dev = led_create(em_led_func, adapter,
744             device_get_nameunit(dev));
745 #ifdef DEV_NETMAP
746         em_netmap_attach(adapter);
747 #endif /* DEV_NETMAP */
748
749         INIT_DEBUGOUT("em_attach: end");
750
751         return (0);
752
753 err_late:
754         em_free_transmit_structures(adapter);
755         em_free_receive_structures(adapter);
756         em_release_hw_control(adapter);
757         if (adapter->ifp != NULL)
758                 if_free(adapter->ifp);
759 err_pci:
760         em_free_pci_resources(adapter);
761         free(adapter->mta, M_DEVBUF);
762         EM_CORE_LOCK_DESTROY(adapter);
763
764         return (error);
765 }
766
767 /*********************************************************************
768  *  Device removal routine
769  *
770  *  The detach entry point is called when the driver is being removed.
771  *  This routine stops the adapter and deallocates all the resources
772  *  that were allocated for driver operation.
773  *
774  *  return 0 on success, positive on failure
775  *********************************************************************/
776
777 static int
778 em_detach(device_t dev)
779 {
780         struct adapter  *adapter = device_get_softc(dev);
781         struct ifnet    *ifp = adapter->ifp;
782
783         INIT_DEBUGOUT("em_detach: begin");
784
785         /* Make sure VLANS are not using driver */
786         if (adapter->ifp->if_vlantrunk != NULL) {
787                 device_printf(dev,"Vlan in use, detach first\n");
788                 return (EBUSY);
789         }
790
791 #ifdef DEVICE_POLLING
792         if (ifp->if_capenable & IFCAP_POLLING)
793                 ether_poll_deregister(ifp);
794 #endif
795
796         if (adapter->led_dev != NULL)
797                 led_destroy(adapter->led_dev);
798
799         EM_CORE_LOCK(adapter);
800         adapter->in_detach = 1;
801         em_stop(adapter);
802         EM_CORE_UNLOCK(adapter);
803         EM_CORE_LOCK_DESTROY(adapter);
804
805         e1000_phy_hw_reset(&adapter->hw);
806
807         em_release_manageability(adapter);
808         em_release_hw_control(adapter);
809
810         /* Unregister VLAN events */
811         if (adapter->vlan_attach != NULL)
812                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
813         if (adapter->vlan_detach != NULL)
814                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
815
816         ether_ifdetach(adapter->ifp);
817         callout_drain(&adapter->timer);
818
819 #ifdef DEV_NETMAP
820         netmap_detach(ifp);
821 #endif /* DEV_NETMAP */
822
823         em_free_pci_resources(adapter);
824         bus_generic_detach(dev);
825         if_free(ifp);
826
827         em_free_transmit_structures(adapter);
828         em_free_receive_structures(adapter);
829
830         em_release_hw_control(adapter);
831         free(adapter->mta, M_DEVBUF);
832
833         return (0);
834 }
835
836 /*********************************************************************
837  *
838  *  Shutdown entry point
839  *
840  **********************************************************************/
841
842 static int
843 em_shutdown(device_t dev)
844 {
845         return em_suspend(dev);
846 }
847
848 /*
849  * Suspend/resume device methods.
850  */
851 static int
852 em_suspend(device_t dev)
853 {
854         struct adapter *adapter = device_get_softc(dev);
855
856         EM_CORE_LOCK(adapter);
857
858         em_release_manageability(adapter);
859         em_release_hw_control(adapter);
860         em_enable_wakeup(dev);
861
862         EM_CORE_UNLOCK(adapter);
863
864         return bus_generic_suspend(dev);
865 }
866
867 static int
868 em_resume(device_t dev)
869 {
870         struct adapter *adapter = device_get_softc(dev);
871         struct tx_ring  *txr = adapter->tx_rings;
872         struct ifnet *ifp = adapter->ifp;
873
874         EM_CORE_LOCK(adapter);
875         if (adapter->hw.mac.type == e1000_pch2lan)
876                 e1000_resume_workarounds_pchlan(&adapter->hw);
877         em_init_locked(adapter);
878         em_init_manageability(adapter);
879
880         if ((ifp->if_flags & IFF_UP) &&
881             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
882                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
883                         EM_TX_LOCK(txr);
884 #ifdef EM_MULTIQUEUE
885                         if (!drbr_empty(ifp, txr->br))
886                                 em_mq_start_locked(ifp, txr, NULL);
887 #else
888                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
889                                 em_start_locked(ifp, txr);
890 #endif
891                         EM_TX_UNLOCK(txr);
892                 }
893         }
894         EM_CORE_UNLOCK(adapter);
895
896         return bus_generic_resume(dev);
897 }
898
899
900 #ifdef EM_MULTIQUEUE
901 /*********************************************************************
902  *  Multiqueue Transmit routines 
903  *
904  *  em_mq_start is called by the stack to initiate a transmit.
905  *  however, if busy the driver can queue the request rather
906  *  than do an immediate send. It is this that is an advantage
907  *  in this driver, rather than also having multiple tx queues.
908  **********************************************************************/
909 static int
910 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
911 {
912         struct adapter  *adapter = txr->adapter;
913         struct mbuf     *next;
914         int             err = 0, enq = 0;
915
916         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
917             IFF_DRV_RUNNING || adapter->link_active == 0) {
918                 if (m != NULL)
919                         err = drbr_enqueue(ifp, txr->br, m);
920                 return (err);
921         }
922
923         enq = 0;
924         if (m != NULL) {
925                 err = drbr_enqueue(ifp, txr->br, m);
926                 if (err)
927                         return (err);
928         } 
929
930         /* Process the queue */
931         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
932                 if ((err = em_xmit(txr, &next)) != 0) {
933                         if (next == NULL)
934                                 drbr_advance(ifp, txr->br);
935                         else 
936                                 drbr_putback(ifp, txr->br, next);
937                         break;
938                 }
939                 drbr_advance(ifp, txr->br);
940                 enq++;
941                 ifp->if_obytes += next->m_pkthdr.len;
942                 if (next->m_flags & M_MCAST)
943                         ifp->if_omcasts++;
944                 ETHER_BPF_MTAP(ifp, next);
945                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
946                         break;
947         }
948
949         if (enq > 0) {
950                 /* Set the watchdog */
951                 txr->queue_status = EM_QUEUE_WORKING;
952                 txr->watchdog_time = ticks;
953         }
954
955         if (txr->tx_avail < EM_MAX_SCATTER)
956                 em_txeof(txr);
957         if (txr->tx_avail < EM_MAX_SCATTER)
958                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
959         return (err);
960 }
961
962 /*
963 ** Multiqueue capable stack interface
964 */
965 static int
966 em_mq_start(struct ifnet *ifp, struct mbuf *m)
967 {
968         struct adapter  *adapter = ifp->if_softc;
969         struct tx_ring  *txr = adapter->tx_rings;
970         int             error;
971
972         if (EM_TX_TRYLOCK(txr)) {
973                 error = em_mq_start_locked(ifp, txr, m);
974                 EM_TX_UNLOCK(txr);
975         } else 
976                 error = drbr_enqueue(ifp, txr->br, m);
977
978         return (error);
979 }
980
981 /*
982 ** Flush all ring buffers
983 */
984 static void
985 em_qflush(struct ifnet *ifp)
986 {
987         struct adapter  *adapter = ifp->if_softc;
988         struct tx_ring  *txr = adapter->tx_rings;
989         struct mbuf     *m;
990
991         for (int i = 0; i < adapter->num_queues; i++, txr++) {
992                 EM_TX_LOCK(txr);
993                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
994                         m_freem(m);
995                 EM_TX_UNLOCK(txr);
996         }
997         if_qflush(ifp);
998 }
999 #else  /* !EM_MULTIQUEUE */
1000
1001 static void
1002 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1003 {
1004         struct adapter  *adapter = ifp->if_softc;
1005         struct mbuf     *m_head;
1006
1007         EM_TX_LOCK_ASSERT(txr);
1008
1009         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1010             IFF_DRV_RUNNING)
1011                 return;
1012
1013         if (!adapter->link_active)
1014                 return;
1015
1016         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1017                 /* Call cleanup if number of TX descriptors low */
1018                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1019                         em_txeof(txr);
1020                 if (txr->tx_avail < EM_MAX_SCATTER) {
1021                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1022                         break;
1023                 }
1024                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1025                 if (m_head == NULL)
1026                         break;
1027                 /*
1028                  *  Encapsulation can modify our pointer, and or make it
1029                  *  NULL on failure.  In that event, we can't requeue.
1030                  */
1031                 if (em_xmit(txr, &m_head)) {
1032                         if (m_head == NULL)
1033                                 break;
1034                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1035                         break;
1036                 }
1037
1038                 /* Send a copy of the frame to the BPF listener */
1039                 ETHER_BPF_MTAP(ifp, m_head);
1040
1041                 /* Set timeout in case hardware has problems transmitting. */
1042                 txr->watchdog_time = ticks;
1043                 txr->queue_status = EM_QUEUE_WORKING;
1044         }
1045
1046         return;
1047 }
1048
1049 static void
1050 em_start(struct ifnet *ifp)
1051 {
1052         struct adapter  *adapter = ifp->if_softc;
1053         struct tx_ring  *txr = adapter->tx_rings;
1054
1055         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1056                 EM_TX_LOCK(txr);
1057                 em_start_locked(ifp, txr);
1058                 EM_TX_UNLOCK(txr);
1059         }
1060         return;
1061 }
1062 #endif /* EM_MULTIQUEUE */
1063
1064 /*********************************************************************
1065  *  Ioctl entry point
1066  *
1067  *  em_ioctl is called when the user wants to configure the
1068  *  interface.
1069  *
1070  *  return 0 on success, positive on failure
1071  **********************************************************************/
1072
1073 static int
1074 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1075 {
1076         struct adapter  *adapter = ifp->if_softc;
1077         struct ifreq    *ifr = (struct ifreq *)data;
1078 #if defined(INET) || defined(INET6)
1079         struct ifaddr   *ifa = (struct ifaddr *)data;
1080 #endif
1081         bool            avoid_reset = FALSE;
1082         int             error = 0;
1083
1084         if (adapter->in_detach)
1085                 return (error);
1086
1087         switch (command) {
1088         case SIOCSIFADDR:
1089 #ifdef INET
1090                 if (ifa->ifa_addr->sa_family == AF_INET)
1091                         avoid_reset = TRUE;
1092 #endif
1093 #ifdef INET6
1094                 if (ifa->ifa_addr->sa_family == AF_INET6)
1095                         avoid_reset = TRUE;
1096 #endif
1097                 /*
1098                 ** Calling init results in link renegotiation,
1099                 ** so we avoid doing it when possible.
1100                 */
1101                 if (avoid_reset) {
1102                         ifp->if_flags |= IFF_UP;
1103                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1104                                 em_init(adapter);
1105 #ifdef INET
1106                         if (!(ifp->if_flags & IFF_NOARP))
1107                                 arp_ifinit(ifp, ifa);
1108 #endif
1109                 } else
1110                         error = ether_ioctl(ifp, command, data);
1111                 break;
1112         case SIOCSIFMTU:
1113             {
1114                 int max_frame_size;
1115
1116                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1117
1118                 EM_CORE_LOCK(adapter);
1119                 switch (adapter->hw.mac.type) {
1120                 case e1000_82571:
1121                 case e1000_82572:
1122                 case e1000_ich9lan:
1123                 case e1000_ich10lan:
1124                 case e1000_pch2lan:
1125                 case e1000_pch_lpt:
1126                 case e1000_82574:
1127                 case e1000_82583:
1128                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1129                         max_frame_size = 9234;
1130                         break;
1131                 case e1000_pchlan:
1132                         max_frame_size = 4096;
1133                         break;
1134                         /* Adapters that do not support jumbo frames */
1135                 case e1000_ich8lan:
1136                         max_frame_size = ETHER_MAX_LEN;
1137                         break;
1138                 default:
1139                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1140                 }
1141                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1142                     ETHER_CRC_LEN) {
1143                         EM_CORE_UNLOCK(adapter);
1144                         error = EINVAL;
1145                         break;
1146                 }
1147
1148                 ifp->if_mtu = ifr->ifr_mtu;
1149                 adapter->hw.mac.max_frame_size =
1150                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1151                 em_init_locked(adapter);
1152                 EM_CORE_UNLOCK(adapter);
1153                 break;
1154             }
1155         case SIOCSIFFLAGS:
1156                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1157                     SIOCSIFFLAGS (Set Interface Flags)");
1158                 EM_CORE_LOCK(adapter);
1159                 if (ifp->if_flags & IFF_UP) {
1160                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1161                                 if ((ifp->if_flags ^ adapter->if_flags) &
1162                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1163                                         em_disable_promisc(adapter);
1164                                         em_set_promisc(adapter);
1165                                 }
1166                         } else
1167                                 em_init_locked(adapter);
1168                 } else
1169                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1170                                 em_stop(adapter);
1171                 adapter->if_flags = ifp->if_flags;
1172                 EM_CORE_UNLOCK(adapter);
1173                 break;
1174         case SIOCADDMULTI:
1175         case SIOCDELMULTI:
1176                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1177                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1178                         EM_CORE_LOCK(adapter);
1179                         em_disable_intr(adapter);
1180                         em_set_multi(adapter);
1181 #ifdef DEVICE_POLLING
1182                         if (!(ifp->if_capenable & IFCAP_POLLING))
1183 #endif
1184                                 em_enable_intr(adapter);
1185                         EM_CORE_UNLOCK(adapter);
1186                 }
1187                 break;
1188         case SIOCSIFMEDIA:
1189                 /* Check SOL/IDER usage */
1190                 EM_CORE_LOCK(adapter);
1191                 if (e1000_check_reset_block(&adapter->hw)) {
1192                         EM_CORE_UNLOCK(adapter);
1193                         device_printf(adapter->dev, "Media change is"
1194                             " blocked due to SOL/IDER session.\n");
1195                         break;
1196                 }
1197                 EM_CORE_UNLOCK(adapter);
1198                 /* falls thru */
1199         case SIOCGIFMEDIA:
1200                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1201                     SIOCxIFMEDIA (Get/Set Interface Media)");
1202                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1203                 break;
1204         case SIOCSIFCAP:
1205             {
1206                 int mask, reinit;
1207
1208                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1209                 reinit = 0;
1210                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1211 #ifdef DEVICE_POLLING
1212                 if (mask & IFCAP_POLLING) {
1213                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1214                                 error = ether_poll_register(em_poll, ifp);
1215                                 if (error)
1216                                         return (error);
1217                                 EM_CORE_LOCK(adapter);
1218                                 em_disable_intr(adapter);
1219                                 ifp->if_capenable |= IFCAP_POLLING;
1220                                 EM_CORE_UNLOCK(adapter);
1221                         } else {
1222                                 error = ether_poll_deregister(ifp);
1223                                 /* Enable interrupt even in error case */
1224                                 EM_CORE_LOCK(adapter);
1225                                 em_enable_intr(adapter);
1226                                 ifp->if_capenable &= ~IFCAP_POLLING;
1227                                 EM_CORE_UNLOCK(adapter);
1228                         }
1229                 }
1230 #endif
1231                 if (mask & IFCAP_HWCSUM) {
1232                         ifp->if_capenable ^= IFCAP_HWCSUM;
1233                         reinit = 1;
1234                 }
1235                 if (mask & IFCAP_TSO4) {
1236                         ifp->if_capenable ^= IFCAP_TSO4;
1237                         reinit = 1;
1238                 }
1239                 if (mask & IFCAP_VLAN_HWTAGGING) {
1240                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1241                         reinit = 1;
1242                 }
1243                 if (mask & IFCAP_VLAN_HWFILTER) {
1244                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1245                         reinit = 1;
1246                 }
1247                 if (mask & IFCAP_VLAN_HWTSO) {
1248                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1249                         reinit = 1;
1250                 }
1251                 if ((mask & IFCAP_WOL) &&
1252                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1253                         if (mask & IFCAP_WOL_MCAST)
1254                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1255                         if (mask & IFCAP_WOL_MAGIC)
1256                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1257                 }
1258                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1259                         em_init(adapter);
1260                 VLAN_CAPABILITIES(ifp);
1261                 break;
1262             }
1263
1264         default:
1265                 error = ether_ioctl(ifp, command, data);
1266                 break;
1267         }
1268
1269         return (error);
1270 }
1271
1272
1273 /*********************************************************************
1274  *  Init entry point
1275  *
1276  *  This routine is used in two ways. It is used by the stack as
1277  *  init entry point in network interface structure. It is also used
1278  *  by the driver as a hw/sw initialization routine to get to a
1279  *  consistent state.
1280  *
1281  *  return 0 on success, positive on failure
1282  **********************************************************************/
1283
1284 static void
1285 em_init_locked(struct adapter *adapter)
1286 {
1287         struct ifnet    *ifp = adapter->ifp;
1288         device_t        dev = adapter->dev;
1289
1290         INIT_DEBUGOUT("em_init: begin");
1291
1292         EM_CORE_LOCK_ASSERT(adapter);
1293
1294         em_disable_intr(adapter);
1295         callout_stop(&adapter->timer);
1296
1297         /* Get the latest mac address, User can use a LAA */
1298         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1299               ETHER_ADDR_LEN);
1300
1301         /* Put the address into the Receive Address Array */
1302         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1303
1304         /*
1305          * With the 82571 adapter, RAR[0] may be overwritten
1306          * when the other port is reset, we make a duplicate
1307          * in RAR[14] for that eventuality, this assures
1308          * the interface continues to function.
1309          */
1310         if (adapter->hw.mac.type == e1000_82571) {
1311                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1312                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1313                     E1000_RAR_ENTRIES - 1);
1314         }
1315
1316         /* Initialize the hardware */
1317         em_reset(adapter);
1318         em_update_link_status(adapter);
1319
1320         /* Setup VLAN support, basic and offload if available */
1321         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1322
1323         /* Set hardware offload abilities */
1324         ifp->if_hwassist = 0;
1325         if (ifp->if_capenable & IFCAP_TXCSUM)
1326                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1327         if (ifp->if_capenable & IFCAP_TSO4)
1328                 ifp->if_hwassist |= CSUM_TSO;
1329
1330         /* Configure for OS presence */
1331         em_init_manageability(adapter);
1332
1333         /* Prepare transmit descriptors and buffers */
1334         em_setup_transmit_structures(adapter);
1335         em_initialize_transmit_unit(adapter);
1336
1337         /* Setup Multicast table */
1338         em_set_multi(adapter);
1339
1340         /*
1341         ** Figure out the desired mbuf
1342         ** pool for doing jumbos
1343         */
1344         if (adapter->hw.mac.max_frame_size <= 2048)
1345                 adapter->rx_mbuf_sz = MCLBYTES;
1346         else if (adapter->hw.mac.max_frame_size <= 4096)
1347                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1348         else
1349                 adapter->rx_mbuf_sz = MJUM9BYTES;
1350
1351         /* Prepare receive descriptors and buffers */
1352         if (em_setup_receive_structures(adapter)) {
1353                 device_printf(dev, "Could not setup receive structures\n");
1354                 em_stop(adapter);
1355                 return;
1356         }
1357         em_initialize_receive_unit(adapter);
1358
1359         /* Use real VLAN Filter support? */
1360         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1361                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1362                         /* Use real VLAN Filter support */
1363                         em_setup_vlan_hw_support(adapter);
1364                 else {
1365                         u32 ctrl;
1366                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1367                         ctrl |= E1000_CTRL_VME;
1368                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1369                 }
1370         }
1371
1372         /* Don't lose promiscuous settings */
1373         em_set_promisc(adapter);
1374
1375         /* Set the interface as ACTIVE */
1376         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1377         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1378
1379         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1380         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1381
1382         /* MSI/X configuration for 82574 */
1383         if (adapter->hw.mac.type == e1000_82574) {
1384                 int tmp;
1385                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1386                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1387                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1388                 /* Set the IVAR - interrupt vector routing. */
1389                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1390         }
1391
1392 #ifdef DEVICE_POLLING
1393         /*
1394          * Only enable interrupts if we are not polling, make sure
1395          * they are off otherwise.
1396          */
1397         if (ifp->if_capenable & IFCAP_POLLING)
1398                 em_disable_intr(adapter);
1399         else
1400 #endif /* DEVICE_POLLING */
1401                 em_enable_intr(adapter);
1402
1403         /* AMT based hardware can now take control from firmware */
1404         if (adapter->has_manage && adapter->has_amt)
1405                 em_get_hw_control(adapter);
1406 }
1407
1408 static void
1409 em_init(void *arg)
1410 {
1411         struct adapter *adapter = arg;
1412
1413         EM_CORE_LOCK(adapter);
1414         em_init_locked(adapter);
1415         EM_CORE_UNLOCK(adapter);
1416 }
1417
1418
1419 #ifdef DEVICE_POLLING
1420 /*********************************************************************
1421  *
1422  *  Legacy polling routine: note this only works with single queue
1423  *
1424  *********************************************************************/
1425 static int
1426 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1427 {
1428         struct adapter *adapter = ifp->if_softc;
1429         struct tx_ring  *txr = adapter->tx_rings;
1430         struct rx_ring  *rxr = adapter->rx_rings;
1431         u32             reg_icr;
1432         int             rx_done;
1433
1434         EM_CORE_LOCK(adapter);
1435         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1436                 EM_CORE_UNLOCK(adapter);
1437                 return (0);
1438         }
1439
1440         if (cmd == POLL_AND_CHECK_STATUS) {
1441                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1442                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1443                         callout_stop(&adapter->timer);
1444                         adapter->hw.mac.get_link_status = 1;
1445                         em_update_link_status(adapter);
1446                         callout_reset(&adapter->timer, hz,
1447                             em_local_timer, adapter);
1448                 }
1449         }
1450         EM_CORE_UNLOCK(adapter);
1451
1452         em_rxeof(rxr, count, &rx_done);
1453
1454         EM_TX_LOCK(txr);
1455         em_txeof(txr);
1456 #ifdef EM_MULTIQUEUE
1457         if (!drbr_empty(ifp, txr->br))
1458                 em_mq_start_locked(ifp, txr, NULL);
1459 #else
1460         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1461                 em_start_locked(ifp, txr);
1462 #endif
1463         EM_TX_UNLOCK(txr);
1464
1465         return (rx_done);
1466 }
1467 #endif /* DEVICE_POLLING */
1468
1469
1470 /*********************************************************************
1471  *
1472  *  Fast Legacy/MSI Combined Interrupt Service routine  
1473  *
1474  *********************************************************************/
1475 static int
1476 em_irq_fast(void *arg)
1477 {
1478         struct adapter  *adapter = arg;
1479         struct ifnet    *ifp;
1480         u32             reg_icr;
1481
1482         ifp = adapter->ifp;
1483
1484         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1485
1486         /* Hot eject?  */
1487         if (reg_icr == 0xffffffff)
1488                 return FILTER_STRAY;
1489
1490         /* Definitely not our interrupt.  */
1491         if (reg_icr == 0x0)
1492                 return FILTER_STRAY;
1493
1494         /*
1495          * Starting with the 82571 chip, bit 31 should be used to
1496          * determine whether the interrupt belongs to us.
1497          */
1498         if (adapter->hw.mac.type >= e1000_82571 &&
1499             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1500                 return FILTER_STRAY;
1501
1502         em_disable_intr(adapter);
1503         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1504
1505         /* Link status change */
1506         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1507                 adapter->hw.mac.get_link_status = 1;
1508                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1509         }
1510
1511         if (reg_icr & E1000_ICR_RXO)
1512                 adapter->rx_overruns++;
1513         return FILTER_HANDLED;
1514 }
1515
1516 /* Combined RX/TX handler, used by Legacy and MSI */
1517 static void
1518 em_handle_que(void *context, int pending)
1519 {
1520         struct adapter  *adapter = context;
1521         struct ifnet    *ifp = adapter->ifp;
1522         struct tx_ring  *txr = adapter->tx_rings;
1523         struct rx_ring  *rxr = adapter->rx_rings;
1524
1525
1526         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1527                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1528                 EM_TX_LOCK(txr);
1529                 em_txeof(txr);
1530 #ifdef EM_MULTIQUEUE
1531                 if (!drbr_empty(ifp, txr->br))
1532                         em_mq_start_locked(ifp, txr, NULL);
1533 #else
1534                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1535                         em_start_locked(ifp, txr);
1536 #endif
1537                 EM_TX_UNLOCK(txr);
1538                 if (more) {
1539                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1540                         return;
1541                 }
1542         }
1543
1544         em_enable_intr(adapter);
1545         return;
1546 }
1547
1548
1549 /*********************************************************************
1550  *
1551  *  MSIX Interrupt Service Routines
1552  *
1553  **********************************************************************/
1554 static void
1555 em_msix_tx(void *arg)
1556 {
1557         struct tx_ring *txr = arg;
1558         struct adapter *adapter = txr->adapter;
1559         struct ifnet    *ifp = adapter->ifp;
1560
1561         ++txr->tx_irq;
1562         EM_TX_LOCK(txr);
1563         em_txeof(txr);
1564 #ifdef EM_MULTIQUEUE
1565         if (!drbr_empty(ifp, txr->br))
1566                 em_mq_start_locked(ifp, txr, NULL);
1567 #else
1568         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1569                 em_start_locked(ifp, txr);
1570 #endif
1571         /* Reenable this interrupt */
1572         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1573         EM_TX_UNLOCK(txr);
1574         return;
1575 }
1576
1577 /*********************************************************************
1578  *
1579  *  MSIX RX Interrupt Service routine
1580  *
1581  **********************************************************************/
1582
1583 static void
1584 em_msix_rx(void *arg)
1585 {
1586         struct rx_ring  *rxr = arg;
1587         struct adapter  *adapter = rxr->adapter;
1588         bool            more;
1589
1590         ++rxr->rx_irq;
1591         if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1592                 return;
1593         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1594         if (more)
1595                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1596         else
1597                 /* Reenable this interrupt */
1598                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1599         return;
1600 }
1601
1602 /*********************************************************************
1603  *
1604  *  MSIX Link Fast Interrupt Service routine
1605  *
1606  **********************************************************************/
1607 static void
1608 em_msix_link(void *arg)
1609 {
1610         struct adapter  *adapter = arg;
1611         u32             reg_icr;
1612
1613         ++adapter->link_irq;
1614         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1615
1616         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1617                 adapter->hw.mac.get_link_status = 1;
1618                 em_handle_link(adapter, 0);
1619         } else
1620                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1621                     EM_MSIX_LINK | E1000_IMS_LSC);
1622         return;
1623 }
1624
1625 static void
1626 em_handle_rx(void *context, int pending)
1627 {
1628         struct rx_ring  *rxr = context;
1629         struct adapter  *adapter = rxr->adapter;
1630         bool            more;
1631
1632         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1633         if (more)
1634                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1635         else
1636                 /* Reenable this interrupt */
1637                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1638 }
1639
1640 static void
1641 em_handle_tx(void *context, int pending)
1642 {
1643         struct tx_ring  *txr = context;
1644         struct adapter  *adapter = txr->adapter;
1645         struct ifnet    *ifp = adapter->ifp;
1646
1647         EM_TX_LOCK(txr);
1648         em_txeof(txr);
1649 #ifdef EM_MULTIQUEUE
1650         if (!drbr_empty(ifp, txr->br))
1651                 em_mq_start_locked(ifp, txr, NULL);
1652 #else
1653         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1654                 em_start_locked(ifp, txr);
1655 #endif
1656         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1657         EM_TX_UNLOCK(txr);
1658 }
1659
1660 static void
1661 em_handle_link(void *context, int pending)
1662 {
1663         struct adapter  *adapter = context;
1664         struct tx_ring  *txr = adapter->tx_rings;
1665         struct ifnet *ifp = adapter->ifp;
1666
1667         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1668                 return;
1669
1670         EM_CORE_LOCK(adapter);
1671         callout_stop(&adapter->timer);
1672         em_update_link_status(adapter);
1673         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1674         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1675             EM_MSIX_LINK | E1000_IMS_LSC);
1676         if (adapter->link_active) {
1677                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1678                         EM_TX_LOCK(txr);
1679 #ifdef EM_MULTIQUEUE
1680                         if (!drbr_empty(ifp, txr->br))
1681                                 em_mq_start_locked(ifp, txr, NULL);
1682 #else
1683                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1684                                 em_start_locked(ifp, txr);
1685 #endif
1686                         EM_TX_UNLOCK(txr);
1687                 }
1688         }
1689         EM_CORE_UNLOCK(adapter);
1690 }
1691
1692
1693 /*********************************************************************
1694  *
1695  *  Media Ioctl callback
1696  *
1697  *  This routine is called whenever the user queries the status of
1698  *  the interface using ifconfig.
1699  *
1700  **********************************************************************/
1701 static void
1702 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1703 {
1704         struct adapter *adapter = ifp->if_softc;
1705         u_char fiber_type = IFM_1000_SX;
1706
1707         INIT_DEBUGOUT("em_media_status: begin");
1708
1709         EM_CORE_LOCK(adapter);
1710         em_update_link_status(adapter);
1711
1712         ifmr->ifm_status = IFM_AVALID;
1713         ifmr->ifm_active = IFM_ETHER;
1714
1715         if (!adapter->link_active) {
1716                 EM_CORE_UNLOCK(adapter);
1717                 return;
1718         }
1719
1720         ifmr->ifm_status |= IFM_ACTIVE;
1721
1722         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1723             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1724                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1725         } else {
1726                 switch (adapter->link_speed) {
1727                 case 10:
1728                         ifmr->ifm_active |= IFM_10_T;
1729                         break;
1730                 case 100:
1731                         ifmr->ifm_active |= IFM_100_TX;
1732                         break;
1733                 case 1000:
1734                         ifmr->ifm_active |= IFM_1000_T;
1735                         break;
1736                 }
1737                 if (adapter->link_duplex == FULL_DUPLEX)
1738                         ifmr->ifm_active |= IFM_FDX;
1739                 else
1740                         ifmr->ifm_active |= IFM_HDX;
1741         }
1742         EM_CORE_UNLOCK(adapter);
1743 }
1744
1745 /*********************************************************************
1746  *
1747  *  Media Ioctl callback
1748  *
1749  *  This routine is called when the user changes speed/duplex using
1750  *  media/mediopt option with ifconfig.
1751  *
1752  **********************************************************************/
1753 static int
1754 em_media_change(struct ifnet *ifp)
1755 {
1756         struct adapter *adapter = ifp->if_softc;
1757         struct ifmedia  *ifm = &adapter->media;
1758
1759         INIT_DEBUGOUT("em_media_change: begin");
1760
1761         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1762                 return (EINVAL);
1763
1764         EM_CORE_LOCK(adapter);
1765         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1766         case IFM_AUTO:
1767                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1768                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1769                 break;
1770         case IFM_1000_LX:
1771         case IFM_1000_SX:
1772         case IFM_1000_T:
1773                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1774                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1775                 break;
1776         case IFM_100_TX:
1777                 adapter->hw.mac.autoneg = FALSE;
1778                 adapter->hw.phy.autoneg_advertised = 0;
1779                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1780                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1781                 else
1782                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1783                 break;
1784         case IFM_10_T:
1785                 adapter->hw.mac.autoneg = FALSE;
1786                 adapter->hw.phy.autoneg_advertised = 0;
1787                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1788                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1789                 else
1790                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1791                 break;
1792         default:
1793                 device_printf(adapter->dev, "Unsupported media type\n");
1794         }
1795
1796         em_init_locked(adapter);
1797         EM_CORE_UNLOCK(adapter);
1798
1799         return (0);
1800 }
1801
1802 /*********************************************************************
1803  *
1804  *  This routine maps the mbufs to tx descriptors.
1805  *
1806  *  return 0 on success, positive on failure
1807  **********************************************************************/
1808
1809 static int
1810 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1811 {
1812         struct adapter          *adapter = txr->adapter;
1813         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1814         bus_dmamap_t            map;
1815         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1816         struct e1000_tx_desc    *ctxd = NULL;
1817         struct mbuf             *m_head;
1818         struct ether_header     *eh;
1819         struct ip               *ip = NULL;
1820         struct tcphdr           *tp = NULL;
1821         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1822         int                     ip_off, poff;
1823         int                     nsegs, i, j, first, last = 0;
1824         int                     error, do_tso, tso_desc = 0, remap = 1;
1825
1826 retry:
1827         m_head = *m_headp;
1828         txd_upper = txd_lower = txd_used = txd_saved = 0;
1829         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1830         ip_off = poff = 0;
1831
1832         /*
1833          * Intel recommends entire IP/TCP header length reside in a single
1834          * buffer. If multiple descriptors are used to describe the IP and
1835          * TCP header, each descriptor should describe one or more
1836          * complete headers; descriptors referencing only parts of headers
1837          * are not supported. If all layer headers are not coalesced into
1838          * a single buffer, each buffer should not cross a 4KB boundary,
1839          * or be larger than the maximum read request size.
1840          * Controller also requires modifing IP/TCP header to make TSO work
1841          * so we firstly get a writable mbuf chain then coalesce ethernet/
1842          * IP/TCP header into a single buffer to meet the requirement of
1843          * controller. This also simplifies IP/TCP/UDP checksum offloading
1844          * which also has similiar restrictions.
1845          */
1846         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1847                 if (do_tso || (m_head->m_next != NULL && 
1848                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1849                         if (M_WRITABLE(*m_headp) == 0) {
1850                                 m_head = m_dup(*m_headp, M_NOWAIT);
1851                                 m_freem(*m_headp);
1852                                 if (m_head == NULL) {
1853                                         *m_headp = NULL;
1854                                         return (ENOBUFS);
1855                                 }
1856                                 *m_headp = m_head;
1857                         }
1858                 }
1859                 /*
1860                  * XXX
1861                  * Assume IPv4, we don't have TSO/checksum offload support
1862                  * for IPv6 yet.
1863                  */
1864                 ip_off = sizeof(struct ether_header);
1865                 m_head = m_pullup(m_head, ip_off);
1866                 if (m_head == NULL) {
1867                         *m_headp = NULL;
1868                         return (ENOBUFS);
1869                 }
1870                 eh = mtod(m_head, struct ether_header *);
1871                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1872                         ip_off = sizeof(struct ether_vlan_header);
1873                         m_head = m_pullup(m_head, ip_off);
1874                         if (m_head == NULL) {
1875                                 *m_headp = NULL;
1876                                 return (ENOBUFS);
1877                         }
1878                 }
1879                 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1880                 if (m_head == NULL) {
1881                         *m_headp = NULL;
1882                         return (ENOBUFS);
1883                 }
1884                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1885                 poff = ip_off + (ip->ip_hl << 2);
1886                 if (do_tso) {
1887                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1888                         if (m_head == NULL) {
1889                                 *m_headp = NULL;
1890                                 return (ENOBUFS);
1891                         }
1892                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1893                         /*
1894                          * TSO workaround:
1895                          *   pull 4 more bytes of data into it.
1896                          */
1897                         m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1898                         if (m_head == NULL) {
1899                                 *m_headp = NULL;
1900                                 return (ENOBUFS);
1901                         }
1902                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1903                         ip->ip_len = 0;
1904                         ip->ip_sum = 0;
1905                         /*
1906                          * The pseudo TCP checksum does not include TCP payload
1907                          * length so driver should recompute the checksum here
1908                          * what hardware expect to see. This is adherence of
1909                          * Microsoft's Large Send specification.
1910                          */
1911                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1912                         tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1913                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1914                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1915                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1916                         if (m_head == NULL) {
1917                                 *m_headp = NULL;
1918                                 return (ENOBUFS);
1919                         }
1920                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1921                         m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1922                         if (m_head == NULL) {
1923                                 *m_headp = NULL;
1924                                 return (ENOBUFS);
1925                         }
1926                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1927                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1928                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1929                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1930                         if (m_head == NULL) {
1931                                 *m_headp = NULL;
1932                                 return (ENOBUFS);
1933                         }
1934                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1935                 }
1936                 *m_headp = m_head;
1937         }
1938
1939         /*
1940          * Map the packet for DMA
1941          *
1942          * Capture the first descriptor index,
1943          * this descriptor will have the index
1944          * of the EOP which is the only one that
1945          * now gets a DONE bit writeback.
1946          */
1947         first = txr->next_avail_desc;
1948         tx_buffer = &txr->tx_buffers[first];
1949         tx_buffer_mapped = tx_buffer;
1950         map = tx_buffer->map;
1951
1952         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1953             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1954
1955         /*
1956          * There are two types of errors we can (try) to handle:
1957          * - EFBIG means the mbuf chain was too long and bus_dma ran
1958          *   out of segments.  Defragment the mbuf chain and try again.
1959          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1960          *   at this point in time.  Defer sending and try again later.
1961          * All other errors, in particular EINVAL, are fatal and prevent the
1962          * mbuf chain from ever going through.  Drop it and report error.
1963          */
1964         if (error == EFBIG && remap) {
1965                 struct mbuf *m;
1966
1967                 m = m_defrag(*m_headp, M_NOWAIT);
1968                 if (m == NULL) {
1969                         adapter->mbuf_alloc_failed++;
1970                         m_freem(*m_headp);
1971                         *m_headp = NULL;
1972                         return (ENOBUFS);
1973                 }
1974                 *m_headp = m;
1975
1976                 /* Try it again, but only once */
1977                 remap = 0;
1978                 goto retry;
1979         } else if (error == ENOMEM) {
1980                 adapter->no_tx_dma_setup++;
1981                 return (error);
1982         } else if (error != 0) {
1983                 adapter->no_tx_dma_setup++;
1984                 m_freem(*m_headp);
1985                 *m_headp = NULL;
1986                 return (error);
1987         }
1988
1989         /*
1990          * TSO Hardware workaround, if this packet is not
1991          * TSO, and is only a single descriptor long, and
1992          * it follows a TSO burst, then we need to add a
1993          * sentinel descriptor to prevent premature writeback.
1994          */
1995         if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1996                 if (nsegs == 1)
1997                         tso_desc = TRUE;
1998                 txr->tx_tso = FALSE;
1999         }
2000
2001         if (nsegs > (txr->tx_avail - 2)) {
2002                 txr->no_desc_avail++;
2003                 bus_dmamap_unload(txr->txtag, map);
2004                 return (ENOBUFS);
2005         }
2006         m_head = *m_headp;
2007
2008         /* Do hardware assists */
2009         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2010                 em_tso_setup(txr, m_head, ip_off, ip, tp,
2011                     &txd_upper, &txd_lower);
2012                 /* we need to make a final sentinel transmit desc */
2013                 tso_desc = TRUE;
2014         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2015                 em_transmit_checksum_setup(txr, m_head,
2016                     ip_off, ip, &txd_upper, &txd_lower);
2017
2018         if (m_head->m_flags & M_VLANTAG) {
2019                 /* Set the vlan id. */
2020                 txd_upper |=
2021                     (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2022                 /* Tell hardware to add tag */
2023                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2024         }
2025
2026         i = txr->next_avail_desc;
2027
2028         /* Set up our transmit descriptors */
2029         for (j = 0; j < nsegs; j++) {
2030                 bus_size_t seg_len;
2031                 bus_addr_t seg_addr;
2032
2033                 tx_buffer = &txr->tx_buffers[i];
2034                 ctxd = &txr->tx_base[i];
2035                 seg_addr = segs[j].ds_addr;
2036                 seg_len  = segs[j].ds_len;
2037                 /*
2038                 ** TSO Workaround:
2039                 ** If this is the last descriptor, we want to
2040                 ** split it so we have a small final sentinel
2041                 */
2042                 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2043                         seg_len -= 4;
2044                         ctxd->buffer_addr = htole64(seg_addr);
2045                         ctxd->lower.data = htole32(
2046                         adapter->txd_cmd | txd_lower | seg_len);
2047                         ctxd->upper.data =
2048                             htole32(txd_upper);
2049                         if (++i == adapter->num_tx_desc)
2050                                 i = 0;
2051                         /* Now make the sentinel */     
2052                         ++txd_used; /* using an extra txd */
2053                         ctxd = &txr->tx_base[i];
2054                         tx_buffer = &txr->tx_buffers[i];
2055                         ctxd->buffer_addr =
2056                             htole64(seg_addr + seg_len);
2057                         ctxd->lower.data = htole32(
2058                         adapter->txd_cmd | txd_lower | 4);
2059                         ctxd->upper.data =
2060                             htole32(txd_upper);
2061                         last = i;
2062                         if (++i == adapter->num_tx_desc)
2063                                 i = 0;
2064                 } else {
2065                         ctxd->buffer_addr = htole64(seg_addr);
2066                         ctxd->lower.data = htole32(
2067                         adapter->txd_cmd | txd_lower | seg_len);
2068                         ctxd->upper.data =
2069                             htole32(txd_upper);
2070                         last = i;
2071                         if (++i == adapter->num_tx_desc)
2072                                 i = 0;
2073                 }
2074                 tx_buffer->m_head = NULL;
2075                 tx_buffer->next_eop = -1;
2076         }
2077
2078         txr->next_avail_desc = i;
2079         txr->tx_avail -= nsegs;
2080         if (tso_desc) /* TSO used an extra for sentinel */
2081                 txr->tx_avail -= txd_used;
2082
2083         tx_buffer->m_head = m_head;
2084         /*
2085         ** Here we swap the map so the last descriptor,
2086         ** which gets the completion interrupt has the
2087         ** real map, and the first descriptor gets the
2088         ** unused map from this descriptor.
2089         */
2090         tx_buffer_mapped->map = tx_buffer->map;
2091         tx_buffer->map = map;
2092         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2093
2094         /*
2095          * Last Descriptor of Packet
2096          * needs End Of Packet (EOP)
2097          * and Report Status (RS)
2098          */
2099         ctxd->lower.data |=
2100             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2101         /*
2102          * Keep track in the first buffer which
2103          * descriptor will be written back
2104          */
2105         tx_buffer = &txr->tx_buffers[first];
2106         tx_buffer->next_eop = last;
2107         /* Update the watchdog time early and often */
2108         txr->watchdog_time = ticks;
2109
2110         /*
2111          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2112          * that this frame is available to transmit.
2113          */
2114         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2115             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2116         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2117
2118         return (0);
2119 }
2120
2121 static void
2122 em_set_promisc(struct adapter *adapter)
2123 {
2124         struct ifnet    *ifp = adapter->ifp;
2125         u32             reg_rctl;
2126
2127         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2128
2129         if (ifp->if_flags & IFF_PROMISC) {
2130                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2131                 /* Turn this on if you want to see bad packets */
2132                 if (em_debug_sbp)
2133                         reg_rctl |= E1000_RCTL_SBP;
2134                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2135         } else if (ifp->if_flags & IFF_ALLMULTI) {
2136                 reg_rctl |= E1000_RCTL_MPE;
2137                 reg_rctl &= ~E1000_RCTL_UPE;
2138                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2139         }
2140 }
2141
2142 static void
2143 em_disable_promisc(struct adapter *adapter)
2144 {
2145         struct ifnet    *ifp = adapter->ifp;
2146         u32             reg_rctl;
2147         int             mcnt = 0;
2148
2149         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2150         reg_rctl &=  (~E1000_RCTL_UPE);
2151         if (ifp->if_flags & IFF_ALLMULTI)
2152                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2153         else {
2154                 struct  ifmultiaddr *ifma;
2155 #if __FreeBSD_version < 800000
2156                 IF_ADDR_LOCK(ifp);
2157 #else   
2158                 if_maddr_rlock(ifp);
2159 #endif
2160                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2161                         if (ifma->ifma_addr->sa_family != AF_LINK)
2162                                 continue;
2163                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2164                                 break;
2165                         mcnt++;
2166                 }
2167 #if __FreeBSD_version < 800000
2168                 IF_ADDR_UNLOCK(ifp);
2169 #else
2170                 if_maddr_runlock(ifp);
2171 #endif
2172         }
2173         /* Don't disable if in MAX groups */
2174         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2175                 reg_rctl &=  (~E1000_RCTL_MPE);
2176         reg_rctl &=  (~E1000_RCTL_SBP);
2177         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2178 }
2179
2180
2181 /*********************************************************************
2182  *  Multicast Update
2183  *
2184  *  This routine is called whenever multicast address list is updated.
2185  *
2186  **********************************************************************/
2187
2188 static void
2189 em_set_multi(struct adapter *adapter)
2190 {
2191         struct ifnet    *ifp = adapter->ifp;
2192         struct ifmultiaddr *ifma;
2193         u32 reg_rctl = 0;
2194         u8  *mta; /* Multicast array memory */
2195         int mcnt = 0;
2196
2197         IOCTL_DEBUGOUT("em_set_multi: begin");
2198
2199         mta = adapter->mta;
2200         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2201
2202         if (adapter->hw.mac.type == e1000_82542 && 
2203             adapter->hw.revision_id == E1000_REVISION_2) {
2204                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2205                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2206                         e1000_pci_clear_mwi(&adapter->hw);
2207                 reg_rctl |= E1000_RCTL_RST;
2208                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2209                 msec_delay(5);
2210         }
2211
2212 #if __FreeBSD_version < 800000
2213         IF_ADDR_LOCK(ifp);
2214 #else
2215         if_maddr_rlock(ifp);
2216 #endif
2217         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2218                 if (ifma->ifma_addr->sa_family != AF_LINK)
2219                         continue;
2220
2221                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2222                         break;
2223
2224                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2225                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2226                 mcnt++;
2227         }
2228 #if __FreeBSD_version < 800000
2229         IF_ADDR_UNLOCK(ifp);
2230 #else
2231         if_maddr_runlock(ifp);
2232 #endif
2233         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2234                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2235                 reg_rctl |= E1000_RCTL_MPE;
2236                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2237         } else
2238                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2239
2240         if (adapter->hw.mac.type == e1000_82542 && 
2241             adapter->hw.revision_id == E1000_REVISION_2) {
2242                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2243                 reg_rctl &= ~E1000_RCTL_RST;
2244                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2245                 msec_delay(5);
2246                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2247                         e1000_pci_set_mwi(&adapter->hw);
2248         }
2249 }
2250
2251
2252 /*********************************************************************
2253  *  Timer routine
2254  *
2255  *  This routine checks for link status and updates statistics.
2256  *
2257  **********************************************************************/
2258
2259 static void
2260 em_local_timer(void *arg)
2261 {
2262         struct adapter  *adapter = arg;
2263         struct ifnet    *ifp = adapter->ifp;
2264         struct tx_ring  *txr = adapter->tx_rings;
2265         struct rx_ring  *rxr = adapter->rx_rings;
2266         u32             trigger;
2267
2268         EM_CORE_LOCK_ASSERT(adapter);
2269
2270         em_update_link_status(adapter);
2271         em_update_stats_counters(adapter);
2272
2273         /* Reset LAA into RAR[0] on 82571 */
2274         if ((adapter->hw.mac.type == e1000_82571) &&
2275             e1000_get_laa_state_82571(&adapter->hw))
2276                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2277
2278         /* Mask to use in the irq trigger */
2279         if (adapter->msix_mem)
2280                 trigger = rxr->ims; /* RX for 82574 */
2281         else
2282                 trigger = E1000_ICS_RXDMT0;
2283
2284         /*
2285         ** Check on the state of the TX queue(s), this 
2286         ** can be done without the lock because its RO
2287         ** and the HUNG state will be static if set.
2288         */
2289         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2290                 if ((txr->queue_status == EM_QUEUE_HUNG) &&
2291                     (adapter->pause_frames == 0))
2292                         goto hung;
2293                 /* Schedule a TX tasklet if needed */
2294                 if (txr->tx_avail <= EM_MAX_SCATTER)
2295                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2296         }
2297         
2298         adapter->pause_frames = 0;
2299         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2300 #ifndef DEVICE_POLLING
2301         /* Trigger an RX interrupt to guarantee mbuf refresh */
2302         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2303 #endif
2304         return;
2305 hung:
2306         /* Looks like we're hung */
2307         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2308         device_printf(adapter->dev,
2309             "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2310             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2311             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2312         device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2313             "Next TX to Clean = %d\n",
2314             txr->me, txr->tx_avail, txr->next_to_clean);
2315         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2316         adapter->watchdog_events++;
2317         adapter->pause_frames = 0;
2318         em_init_locked(adapter);
2319 }
2320
2321
2322 static void
2323 em_update_link_status(struct adapter *adapter)
2324 {
2325         struct e1000_hw *hw = &adapter->hw;
2326         struct ifnet *ifp = adapter->ifp;
2327         device_t dev = adapter->dev;
2328         struct tx_ring *txr = adapter->tx_rings;
2329         u32 link_check = 0;
2330
2331         /* Get the cached link value or read phy for real */
2332         switch (hw->phy.media_type) {
2333         case e1000_media_type_copper:
2334                 if (hw->mac.get_link_status) {
2335                         /* Do the work to read phy */
2336                         e1000_check_for_link(hw);
2337                         link_check = !hw->mac.get_link_status;
2338                         if (link_check) /* ESB2 fix */
2339                                 e1000_cfg_on_link_up(hw);
2340                 } else
2341                         link_check = TRUE;
2342                 break;
2343         case e1000_media_type_fiber:
2344                 e1000_check_for_link(hw);
2345                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2346                                  E1000_STATUS_LU);
2347                 break;
2348         case e1000_media_type_internal_serdes:
2349                 e1000_check_for_link(hw);
2350                 link_check = adapter->hw.mac.serdes_has_link;
2351                 break;
2352         default:
2353         case e1000_media_type_unknown:
2354                 break;
2355         }
2356
2357         /* Now check for a transition */
2358         if (link_check && (adapter->link_active == 0)) {
2359                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2360                     &adapter->link_duplex);
2361                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2362                 if ((adapter->link_speed != SPEED_1000) &&
2363                     ((hw->mac.type == e1000_82571) ||
2364                     (hw->mac.type == e1000_82572))) {
2365                         int tarc0;
2366                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2367                         tarc0 &= ~SPEED_MODE_BIT;
2368                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2369                 }
2370                 if (bootverbose)
2371                         device_printf(dev, "Link is up %d Mbps %s\n",
2372                             adapter->link_speed,
2373                             ((adapter->link_duplex == FULL_DUPLEX) ?
2374                             "Full Duplex" : "Half Duplex"));
2375                 adapter->link_active = 1;
2376                 adapter->smartspeed = 0;
2377                 ifp->if_baudrate = adapter->link_speed * 1000000;
2378                 if_link_state_change(ifp, LINK_STATE_UP);
2379         } else if (!link_check && (adapter->link_active == 1)) {
2380                 ifp->if_baudrate = adapter->link_speed = 0;
2381                 adapter->link_duplex = 0;
2382                 if (bootverbose)
2383                         device_printf(dev, "Link is Down\n");
2384                 adapter->link_active = 0;
2385                 /* Link down, disable watchdog */
2386                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2387                         txr->queue_status = EM_QUEUE_IDLE;
2388                 if_link_state_change(ifp, LINK_STATE_DOWN);
2389         }
2390 }
2391
2392 /*********************************************************************
2393  *
2394  *  This routine disables all traffic on the adapter by issuing a
2395  *  global reset on the MAC and deallocates TX/RX buffers.
2396  *
2397  *  This routine should always be called with BOTH the CORE
2398  *  and TX locks.
2399  **********************************************************************/
2400
2401 static void
2402 em_stop(void *arg)
2403 {
2404         struct adapter  *adapter = arg;
2405         struct ifnet    *ifp = adapter->ifp;
2406         struct tx_ring  *txr = adapter->tx_rings;
2407
2408         EM_CORE_LOCK_ASSERT(adapter);
2409
2410         INIT_DEBUGOUT("em_stop: begin");
2411
2412         em_disable_intr(adapter);
2413         callout_stop(&adapter->timer);
2414
2415         /* Tell the stack that the interface is no longer active */
2416         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2417         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2418
2419         /* Unarm watchdog timer. */
2420         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2421                 EM_TX_LOCK(txr);
2422                 txr->queue_status = EM_QUEUE_IDLE;
2423                 EM_TX_UNLOCK(txr);
2424         }
2425
2426         e1000_reset_hw(&adapter->hw);
2427         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2428
2429         e1000_led_off(&adapter->hw);
2430         e1000_cleanup_led(&adapter->hw);
2431 }
2432
2433
2434 /*********************************************************************
2435  *
2436  *  Determine hardware revision.
2437  *
2438  **********************************************************************/
2439 static void
2440 em_identify_hardware(struct adapter *adapter)
2441 {
2442         device_t dev = adapter->dev;
2443
2444         /* Make sure our PCI config space has the necessary stuff set */
2445         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2446         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2447             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2448                 device_printf(dev, "Memory Access and/or Bus Master bits "
2449                     "were not set!\n");
2450                 adapter->hw.bus.pci_cmd_word |=
2451                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2452                 pci_write_config(dev, PCIR_COMMAND,
2453                     adapter->hw.bus.pci_cmd_word, 2);
2454         }
2455
2456         /* Save off the information about this board */
2457         adapter->hw.vendor_id = pci_get_vendor(dev);
2458         adapter->hw.device_id = pci_get_device(dev);
2459         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2460         adapter->hw.subsystem_vendor_id =
2461             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2462         adapter->hw.subsystem_device_id =
2463             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2464
2465         /* Do Shared Code Init and Setup */
2466         if (e1000_set_mac_type(&adapter->hw)) {
2467                 device_printf(dev, "Setup init failure\n");
2468                 return;
2469         }
2470 }
2471
2472 static int
2473 em_allocate_pci_resources(struct adapter *adapter)
2474 {
2475         device_t        dev = adapter->dev;
2476         int             rid;
2477
2478         rid = PCIR_BAR(0);
2479         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2480             &rid, RF_ACTIVE);
2481         if (adapter->memory == NULL) {
2482                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2483                 return (ENXIO);
2484         }
2485         adapter->osdep.mem_bus_space_tag =
2486             rman_get_bustag(adapter->memory);
2487         adapter->osdep.mem_bus_space_handle =
2488             rman_get_bushandle(adapter->memory);
2489         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2490
2491         /* Default to a single queue */
2492         adapter->num_queues = 1;
2493
2494         /*
2495          * Setup MSI/X or MSI if PCI Express
2496          */
2497         adapter->msix = em_setup_msix(adapter);
2498
2499         adapter->hw.back = &adapter->osdep;
2500
2501         return (0);
2502 }
2503
2504 /*********************************************************************
2505  *
2506  *  Setup the Legacy or MSI Interrupt handler
2507  *
2508  **********************************************************************/
2509 int
2510 em_allocate_legacy(struct adapter *adapter)
2511 {
2512         device_t dev = adapter->dev;
2513         struct tx_ring  *txr = adapter->tx_rings;
2514         int error, rid = 0;
2515
2516         /* Manually turn off all interrupts */
2517         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2518
2519         if (adapter->msix == 1) /* using MSI */
2520                 rid = 1;
2521         /* We allocate a single interrupt resource */
2522         adapter->res = bus_alloc_resource_any(dev,
2523             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2524         if (adapter->res == NULL) {
2525                 device_printf(dev, "Unable to allocate bus resource: "
2526                     "interrupt\n");
2527                 return (ENXIO);
2528         }
2529
2530         /*
2531          * Allocate a fast interrupt and the associated
2532          * deferred processing contexts.
2533          */
2534         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2535         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2536             taskqueue_thread_enqueue, &adapter->tq);
2537         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2538             device_get_nameunit(adapter->dev));
2539         /* Use a TX only tasklet for local timer */
2540         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2541         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2542             taskqueue_thread_enqueue, &txr->tq);
2543         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2544             device_get_nameunit(adapter->dev));
2545         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2546         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2547             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2548                 device_printf(dev, "Failed to register fast interrupt "
2549                             "handler: %d\n", error);
2550                 taskqueue_free(adapter->tq);
2551                 adapter->tq = NULL;
2552                 return (error);
2553         }
2554         
2555         return (0);
2556 }
2557
2558 /*********************************************************************
2559  *
2560  *  Setup the MSIX Interrupt handlers
2561  *   This is not really Multiqueue, rather
2562  *   its just seperate interrupt vectors
2563  *   for TX, RX, and Link.
2564  *
2565  **********************************************************************/
2566 int
2567 em_allocate_msix(struct adapter *adapter)
2568 {
2569         device_t        dev = adapter->dev;
2570         struct          tx_ring *txr = adapter->tx_rings;
2571         struct          rx_ring *rxr = adapter->rx_rings;
2572         int             error, rid, vector = 0;
2573
2574
2575         /* Make sure all interrupts are disabled */
2576         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2577
2578         /* First set up ring resources */
2579         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2580
2581                 /* RX ring */
2582                 rid = vector + 1;
2583
2584                 rxr->res = bus_alloc_resource_any(dev,
2585                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2586                 if (rxr->res == NULL) {
2587                         device_printf(dev,
2588                             "Unable to allocate bus resource: "
2589                             "RX MSIX Interrupt %d\n", i);
2590                         return (ENXIO);
2591                 }
2592                 if ((error = bus_setup_intr(dev, rxr->res,
2593                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2594                     rxr, &rxr->tag)) != 0) {
2595                         device_printf(dev, "Failed to register RX handler");
2596                         return (error);
2597                 }
2598 #if __FreeBSD_version >= 800504
2599                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2600 #endif
2601                 rxr->msix = vector++; /* NOTE increment vector for TX */
2602                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2603                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2604                     taskqueue_thread_enqueue, &rxr->tq);
2605                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2606                     device_get_nameunit(adapter->dev));
2607                 /*
2608                 ** Set the bit to enable interrupt
2609                 ** in E1000_IMS -- bits 20 and 21
2610                 ** are for RX0 and RX1, note this has
2611                 ** NOTHING to do with the MSIX vector
2612                 */
2613                 rxr->ims = 1 << (20 + i);
2614                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2615
2616                 /* TX ring */
2617                 rid = vector + 1;
2618                 txr->res = bus_alloc_resource_any(dev,
2619                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2620                 if (txr->res == NULL) {
2621                         device_printf(dev,
2622                             "Unable to allocate bus resource: "
2623                             "TX MSIX Interrupt %d\n", i);
2624                         return (ENXIO);
2625                 }
2626                 if ((error = bus_setup_intr(dev, txr->res,
2627                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2628                     txr, &txr->tag)) != 0) {
2629                         device_printf(dev, "Failed to register TX handler");
2630                         return (error);
2631                 }
2632 #if __FreeBSD_version >= 800504
2633                 bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2634 #endif
2635                 txr->msix = vector++; /* Increment vector for next pass */
2636                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2637                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2638                     taskqueue_thread_enqueue, &txr->tq);
2639                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2640                     device_get_nameunit(adapter->dev));
2641                 /*
2642                 ** Set the bit to enable interrupt
2643                 ** in E1000_IMS -- bits 22 and 23
2644                 ** are for TX0 and TX1, note this has
2645                 ** NOTHING to do with the MSIX vector
2646                 */
2647                 txr->ims = 1 << (22 + i);
2648                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2649         }
2650
2651         /* Link interrupt */
2652         ++rid;
2653         adapter->res = bus_alloc_resource_any(dev,
2654             SYS_RES_IRQ, &rid, RF_ACTIVE);
2655         if (!adapter->res) {
2656                 device_printf(dev,"Unable to allocate "
2657                     "bus resource: Link interrupt [%d]\n", rid);
2658                 return (ENXIO);
2659         }
2660         /* Set the link handler function */
2661         error = bus_setup_intr(dev, adapter->res,
2662             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2663             em_msix_link, adapter, &adapter->tag);
2664         if (error) {
2665                 adapter->res = NULL;
2666                 device_printf(dev, "Failed to register LINK handler");
2667                 return (error);
2668         }
2669 #if __FreeBSD_version >= 800504
2670                 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2671 #endif
2672         adapter->linkvec = vector;
2673         adapter->ivars |=  (8 | vector) << 16;
2674         adapter->ivars |= 0x80000000;
2675
2676         return (0);
2677 }
2678
2679
2680 static void
2681 em_free_pci_resources(struct adapter *adapter)
2682 {
2683         device_t        dev = adapter->dev;
2684         struct tx_ring  *txr;
2685         struct rx_ring  *rxr;
2686         int             rid;
2687
2688
2689         /*
2690         ** Release all the queue interrupt resources:
2691         */
2692         for (int i = 0; i < adapter->num_queues; i++) {
2693                 txr = &adapter->tx_rings[i];
2694                 rxr = &adapter->rx_rings[i];
2695                 /* an early abort? */
2696                 if ((txr == NULL) || (rxr == NULL))
2697                         break;
2698                 rid = txr->msix +1;
2699                 if (txr->tag != NULL) {
2700                         bus_teardown_intr(dev, txr->res, txr->tag);
2701                         txr->tag = NULL;
2702                 }
2703                 if (txr->res != NULL)
2704                         bus_release_resource(dev, SYS_RES_IRQ,
2705                             rid, txr->res);
2706                 rid = rxr->msix +1;
2707                 if (rxr->tag != NULL) {
2708                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2709                         rxr->tag = NULL;
2710                 }
2711                 if (rxr->res != NULL)
2712                         bus_release_resource(dev, SYS_RES_IRQ,
2713                             rid, rxr->res);
2714         }
2715
2716         if (adapter->linkvec) /* we are doing MSIX */
2717                 rid = adapter->linkvec + 1;
2718         else
2719                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2720
2721         if (adapter->tag != NULL) {
2722                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2723                 adapter->tag = NULL;
2724         }
2725
2726         if (adapter->res != NULL)
2727                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2728
2729
2730         if (adapter->msix)
2731                 pci_release_msi(dev);
2732
2733         if (adapter->msix_mem != NULL)
2734                 bus_release_resource(dev, SYS_RES_MEMORY,
2735                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2736
2737         if (adapter->memory != NULL)
2738                 bus_release_resource(dev, SYS_RES_MEMORY,
2739                     PCIR_BAR(0), adapter->memory);
2740
2741         if (adapter->flash != NULL)
2742                 bus_release_resource(dev, SYS_RES_MEMORY,
2743                     EM_FLASH, adapter->flash);
2744 }
2745
2746 /*
2747  * Setup MSI or MSI/X
2748  */
2749 static int
2750 em_setup_msix(struct adapter *adapter)
2751 {
2752         device_t dev = adapter->dev;
2753         int val;
2754
2755         /*
2756         ** Setup MSI/X for Hartwell: tests have shown
2757         ** use of two queues to be unstable, and to
2758         ** provide no great gain anyway, so we simply
2759         ** seperate the interrupts and use a single queue.
2760         */
2761         if ((adapter->hw.mac.type == e1000_82574) &&
2762             (em_enable_msix == TRUE)) {
2763                 /* Map the MSIX BAR */
2764                 int rid = PCIR_BAR(EM_MSIX_BAR);
2765                 adapter->msix_mem = bus_alloc_resource_any(dev,
2766                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2767                 if (adapter->msix_mem == NULL) {
2768                         /* May not be enabled */
2769                         device_printf(adapter->dev,
2770                             "Unable to map MSIX table \n");
2771                         goto msi;
2772                 }
2773                 val = pci_msix_count(dev); 
2774                 /* We only need/want 3 vectors */
2775                 if (val >= 3)
2776                         val = 3;
2777                 else {
2778                         bus_release_resource(dev, SYS_RES_MEMORY,
2779                             PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2780                         adapter->msix_mem = NULL;
2781                         device_printf(adapter->dev,
2782                             "MSIX: incorrect vectors, using MSI\n");
2783                         goto msi;
2784                 }
2785
2786                 if (pci_alloc_msix(dev, &val) == 0) {
2787                         device_printf(adapter->dev,
2788                             "Using MSIX interrupts "
2789                             "with %d vectors\n", val);
2790                         return (val);
2791                 }
2792                 /* Fall through to MSI */
2793         }
2794 msi:
2795         val = 1;
2796         if (pci_alloc_msi(dev, &val) == 0) {
2797                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2798                 return (val);
2799         } 
2800         /* Should only happen due to manual configuration */
2801         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2802         return (0);
2803 }
2804
2805
2806 /*********************************************************************
2807  *
2808  *  Initialize the hardware to a configuration
2809  *  as specified by the adapter structure.
2810  *
2811  **********************************************************************/
2812 static void
2813 em_reset(struct adapter *adapter)
2814 {
2815         device_t        dev = adapter->dev;
2816         struct ifnet    *ifp = adapter->ifp;
2817         struct e1000_hw *hw = &adapter->hw;
2818         u16             rx_buffer_size;
2819         u32             pba;
2820
2821         INIT_DEBUGOUT("em_reset: begin");
2822
2823         /* Set up smart power down as default off on newer adapters. */
2824         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2825             hw->mac.type == e1000_82572)) {
2826                 u16 phy_tmp = 0;
2827
2828                 /* Speed up time to link by disabling smart power down. */
2829                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2830                 phy_tmp &= ~IGP02E1000_PM_SPD;
2831                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2832         }
2833
2834         /*
2835          * Packet Buffer Allocation (PBA)
2836          * Writing PBA sets the receive portion of the buffer
2837          * the remainder is used for the transmit buffer.
2838          */
2839         switch (hw->mac.type) {
2840         /* Total Packet Buffer on these is 48K */
2841         case e1000_82571:
2842         case e1000_82572:
2843         case e1000_80003es2lan:
2844                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2845                 break;
2846         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2847                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2848                 break;
2849         case e1000_82574:
2850         case e1000_82583:
2851                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2852                 break;
2853         case e1000_ich8lan:
2854                 pba = E1000_PBA_8K;
2855                 break;
2856         case e1000_ich9lan:
2857         case e1000_ich10lan:
2858                 /* Boost Receive side for jumbo frames */
2859                 if (adapter->hw.mac.max_frame_size > 4096)
2860                         pba = E1000_PBA_14K;
2861                 else
2862                         pba = E1000_PBA_10K;
2863                 break;
2864         case e1000_pchlan:
2865         case e1000_pch2lan:
2866         case e1000_pch_lpt:
2867                 pba = E1000_PBA_26K;
2868                 break;
2869         default:
2870                 if (adapter->hw.mac.max_frame_size > 8192)
2871                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2872                 else
2873                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2874         }
2875         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2876
2877         /*
2878          * These parameters control the automatic generation (Tx) and
2879          * response (Rx) to Ethernet PAUSE frames.
2880          * - High water mark should allow for at least two frames to be
2881          *   received after sending an XOFF.
2882          * - Low water mark works best when it is very near the high water mark.
2883          *   This allows the receiver to restart by sending XON when it has
2884          *   drained a bit. Here we use an arbitary value of 1500 which will
2885          *   restart after one full frame is pulled from the buffer. There
2886          *   could be several smaller frames in the buffer and if so they will
2887          *   not trigger the XON until their total number reduces the buffer
2888          *   by 1500.
2889          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2890          */
2891         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2892         hw->fc.high_water = rx_buffer_size -
2893             roundup2(adapter->hw.mac.max_frame_size, 1024);
2894         hw->fc.low_water = hw->fc.high_water - 1500;
2895
2896         if (adapter->fc) /* locally set flow control value? */
2897                 hw->fc.requested_mode = adapter->fc;
2898         else
2899                 hw->fc.requested_mode = e1000_fc_full;
2900
2901         if (hw->mac.type == e1000_80003es2lan)
2902                 hw->fc.pause_time = 0xFFFF;
2903         else
2904                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2905
2906         hw->fc.send_xon = TRUE;
2907
2908         /* Device specific overrides/settings */
2909         switch (hw->mac.type) {
2910         case e1000_pchlan:
2911                 /* Workaround: no TX flow ctrl for PCH */
2912                 hw->fc.requested_mode = e1000_fc_rx_pause;
2913                 hw->fc.pause_time = 0xFFFF; /* override */
2914                 if (ifp->if_mtu > ETHERMTU) {
2915                         hw->fc.high_water = 0x3500;
2916                         hw->fc.low_water = 0x1500;
2917                 } else {
2918                         hw->fc.high_water = 0x5000;
2919                         hw->fc.low_water = 0x3000;
2920                 }
2921                 hw->fc.refresh_time = 0x1000;
2922                 break;
2923         case e1000_pch2lan:
2924         case e1000_pch_lpt:
2925                 hw->fc.high_water = 0x5C20;
2926                 hw->fc.low_water = 0x5048;
2927                 hw->fc.pause_time = 0x0650;
2928                 hw->fc.refresh_time = 0x0400;
2929                 /* Jumbos need adjusted PBA */
2930                 if (ifp->if_mtu > ETHERMTU)
2931                         E1000_WRITE_REG(hw, E1000_PBA, 12);
2932                 else
2933                         E1000_WRITE_REG(hw, E1000_PBA, 26);
2934                 break;
2935         case e1000_ich9lan:
2936         case e1000_ich10lan:
2937                 if (ifp->if_mtu > ETHERMTU) {
2938                         hw->fc.high_water = 0x2800;
2939                         hw->fc.low_water = hw->fc.high_water - 8;
2940                         break;
2941                 } 
2942                 /* else fall thru */
2943         default:
2944                 if (hw->mac.type == e1000_80003es2lan)
2945                         hw->fc.pause_time = 0xFFFF;
2946                 break;
2947         }
2948
2949         /* Issue a global reset */
2950         e1000_reset_hw(hw);
2951         E1000_WRITE_REG(hw, E1000_WUC, 0);
2952         em_disable_aspm(adapter);
2953         /* and a re-init */
2954         if (e1000_init_hw(hw) < 0) {
2955                 device_printf(dev, "Hardware Initialization Failed\n");
2956                 return;
2957         }
2958
2959         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2960         e1000_get_phy_info(hw);
2961         e1000_check_for_link(hw);
2962         return;
2963 }
2964
2965 /*********************************************************************
2966  *
2967  *  Setup networking device structure and register an interface.
2968  *
2969  **********************************************************************/
2970 static int
2971 em_setup_interface(device_t dev, struct adapter *adapter)
2972 {
2973         struct ifnet   *ifp;
2974
2975         INIT_DEBUGOUT("em_setup_interface: begin");
2976
2977         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2978         if (ifp == NULL) {
2979                 device_printf(dev, "can not allocate ifnet structure\n");
2980                 return (-1);
2981         }
2982         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2983         ifp->if_init =  em_init;
2984         ifp->if_softc = adapter;
2985         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2986         ifp->if_ioctl = em_ioctl;
2987 #ifdef EM_MULTIQUEUE
2988         /* Multiqueue stack interface */
2989         ifp->if_transmit = em_mq_start;
2990         ifp->if_qflush = em_qflush;
2991 #else
2992         ifp->if_start = em_start;
2993         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2994         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2995         IFQ_SET_READY(&ifp->if_snd);
2996 #endif  
2997
2998         ether_ifattach(ifp, adapter->hw.mac.addr);
2999
3000         ifp->if_capabilities = ifp->if_capenable = 0;
3001
3002
3003         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3004         ifp->if_capabilities |= IFCAP_TSO4;
3005         /*
3006          * Tell the upper layer(s) we
3007          * support full VLAN capability
3008          */
3009         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3010         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3011                              |  IFCAP_VLAN_HWTSO
3012                              |  IFCAP_VLAN_MTU;
3013         ifp->if_capenable = ifp->if_capabilities;
3014
3015         /*
3016         ** Don't turn this on by default, if vlans are
3017         ** created on another pseudo device (eg. lagg)
3018         ** then vlan events are not passed thru, breaking
3019         ** operation, but with HW FILTER off it works. If
3020         ** using vlans directly on the em driver you can
3021         ** enable this and get full hardware tag filtering.
3022         */
3023         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3024
3025 #ifdef DEVICE_POLLING
3026         ifp->if_capabilities |= IFCAP_POLLING;
3027 #endif
3028
3029         /* Enable only WOL MAGIC by default */
3030         if (adapter->wol) {
3031                 ifp->if_capabilities |= IFCAP_WOL;
3032                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
3033         }
3034                 
3035         /*
3036          * Specify the media types supported by this adapter and register
3037          * callbacks to update media and link information
3038          */
3039         ifmedia_init(&adapter->media, IFM_IMASK,
3040             em_media_change, em_media_status);
3041         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3042             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3043                 u_char fiber_type = IFM_1000_SX;        /* default type */
3044
3045                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
3046                             0, NULL);
3047                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3048         } else {
3049                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3050                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3051                             0, NULL);
3052                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3053                             0, NULL);
3054                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3055                             0, NULL);
3056                 if (adapter->hw.phy.type != e1000_phy_ife) {
3057                         ifmedia_add(&adapter->media,
3058                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3059                         ifmedia_add(&adapter->media,
3060                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3061                 }
3062         }
3063         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3064         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3065         return (0);
3066 }
3067
3068
3069 /*
3070  * Manage DMA'able memory.
3071  */
3072 static void
3073 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3074 {
3075         if (error)
3076                 return;
3077         *(bus_addr_t *) arg = segs[0].ds_addr;
3078 }
3079
3080 static int
3081 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3082         struct em_dma_alloc *dma, int mapflags)
3083 {
3084         int error;
3085
3086         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3087                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3088                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3089                                 BUS_SPACE_MAXADDR,      /* highaddr */
3090                                 NULL, NULL,             /* filter, filterarg */
3091                                 size,                   /* maxsize */
3092                                 1,                      /* nsegments */
3093                                 size,                   /* maxsegsize */
3094                                 0,                      /* flags */
3095                                 NULL,                   /* lockfunc */
3096                                 NULL,                   /* lockarg */
3097                                 &dma->dma_tag);
3098         if (error) {
3099                 device_printf(adapter->dev,
3100                     "%s: bus_dma_tag_create failed: %d\n",
3101                     __func__, error);
3102                 goto fail_0;
3103         }
3104
3105         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3106             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3107         if (error) {
3108                 device_printf(adapter->dev,
3109                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3110                     __func__, (uintmax_t)size, error);
3111                 goto fail_2;
3112         }
3113
3114         dma->dma_paddr = 0;
3115         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3116             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3117         if (error || dma->dma_paddr == 0) {
3118                 device_printf(adapter->dev,
3119                     "%s: bus_dmamap_load failed: %d\n",
3120                     __func__, error);
3121                 goto fail_3;
3122         }
3123
3124         return (0);
3125
3126 fail_3:
3127         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3128 fail_2:
3129         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3130         bus_dma_tag_destroy(dma->dma_tag);
3131 fail_0:
3132         dma->dma_map = NULL;
3133         dma->dma_tag = NULL;
3134
3135         return (error);
3136 }
3137
3138 static void
3139 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3140 {
3141         if (dma->dma_tag == NULL)
3142                 return;
3143         if (dma->dma_map != NULL) {
3144                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3145                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3146                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3147                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3148                 dma->dma_map = NULL;
3149         }
3150         bus_dma_tag_destroy(dma->dma_tag);
3151         dma->dma_tag = NULL;
3152 }
3153
3154
3155 /*********************************************************************
3156  *
3157  *  Allocate memory for the transmit and receive rings, and then
3158  *  the descriptors associated with each, called only once at attach.
3159  *
3160  **********************************************************************/
3161 static int
3162 em_allocate_queues(struct adapter *adapter)
3163 {
3164         device_t                dev = adapter->dev;
3165         struct tx_ring          *txr = NULL;
3166         struct rx_ring          *rxr = NULL;
3167         int rsize, tsize, error = E1000_SUCCESS;
3168         int txconf = 0, rxconf = 0;
3169
3170
3171         /* Allocate the TX ring struct memory */
3172         if (!(adapter->tx_rings =
3173             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3174             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3175                 device_printf(dev, "Unable to allocate TX ring memory\n");
3176                 error = ENOMEM;
3177                 goto fail;
3178         }
3179
3180         /* Now allocate the RX */
3181         if (!(adapter->rx_rings =
3182             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3183             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3184                 device_printf(dev, "Unable to allocate RX ring memory\n");
3185                 error = ENOMEM;
3186                 goto rx_fail;
3187         }
3188
3189         tsize = roundup2(adapter->num_tx_desc *
3190             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3191         /*
3192          * Now set up the TX queues, txconf is needed to handle the
3193          * possibility that things fail midcourse and we need to
3194          * undo memory gracefully
3195          */ 
3196         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3197                 /* Set up some basics */
3198                 txr = &adapter->tx_rings[i];
3199                 txr->adapter = adapter;
3200                 txr->me = i;
3201
3202                 /* Initialize the TX lock */
3203                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3204                     device_get_nameunit(dev), txr->me);
3205                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3206
3207                 if (em_dma_malloc(adapter, tsize,
3208                         &txr->txdma, BUS_DMA_NOWAIT)) {
3209                         device_printf(dev,
3210                             "Unable to allocate TX Descriptor memory\n");
3211                         error = ENOMEM;
3212                         goto err_tx_desc;
3213                 }
3214                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3215                 bzero((void *)txr->tx_base, tsize);
3216
3217                 if (em_allocate_transmit_buffers(txr)) {
3218                         device_printf(dev,
3219                             "Critical Failure setting up transmit buffers\n");
3220                         error = ENOMEM;
3221                         goto err_tx_desc;
3222                 }
3223 #if __FreeBSD_version >= 800000
3224                 /* Allocate a buf ring */
3225                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3226                     M_WAITOK, &txr->tx_mtx);
3227 #endif
3228         }
3229
3230         /*
3231          * Next the RX queues...
3232          */ 
3233         rsize = roundup2(adapter->num_rx_desc *
3234             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3235         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3236                 rxr = &adapter->rx_rings[i];
3237                 rxr->adapter = adapter;
3238                 rxr->me = i;
3239
3240                 /* Initialize the RX lock */
3241                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3242                     device_get_nameunit(dev), txr->me);
3243                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3244
3245                 if (em_dma_malloc(adapter, rsize,
3246                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3247                         device_printf(dev,
3248                             "Unable to allocate RxDescriptor memory\n");
3249                         error = ENOMEM;
3250                         goto err_rx_desc;
3251                 }
3252                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3253                 bzero((void *)rxr->rx_base, rsize);
3254
3255                 /* Allocate receive buffers for the ring*/
3256                 if (em_allocate_receive_buffers(rxr)) {
3257                         device_printf(dev,
3258                             "Critical Failure setting up receive buffers\n");
3259                         error = ENOMEM;
3260                         goto err_rx_desc;
3261                 }
3262         }
3263
3264         return (0);
3265
3266 err_rx_desc:
3267         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3268                 em_dma_free(adapter, &rxr->rxdma);
3269 err_tx_desc:
3270         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3271                 em_dma_free(adapter, &txr->txdma);
3272         free(adapter->rx_rings, M_DEVBUF);
3273 rx_fail:
3274 #if __FreeBSD_version >= 800000
3275         buf_ring_free(txr->br, M_DEVBUF);
3276 #endif
3277         free(adapter->tx_rings, M_DEVBUF);
3278 fail:
3279         return (error);
3280 }
3281
3282
3283 /*********************************************************************
3284  *
3285  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3286  *  the information needed to transmit a packet on the wire. This is
3287  *  called only once at attach, setup is done every reset.
3288  *
3289  **********************************************************************/
3290 static int
3291 em_allocate_transmit_buffers(struct tx_ring *txr)
3292 {
3293         struct adapter *adapter = txr->adapter;
3294         device_t dev = adapter->dev;
3295         struct em_buffer *txbuf;
3296         int error, i;
3297
3298         /*
3299          * Setup DMA descriptor areas.
3300          */
3301         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3302                                1, 0,                    /* alignment, bounds */
3303                                BUS_SPACE_MAXADDR,       /* lowaddr */
3304                                BUS_SPACE_MAXADDR,       /* highaddr */
3305                                NULL, NULL,              /* filter, filterarg */
3306                                EM_TSO_SIZE,             /* maxsize */
3307                                EM_MAX_SCATTER,          /* nsegments */
3308                                PAGE_SIZE,               /* maxsegsize */
3309                                0,                       /* flags */
3310                                NULL,                    /* lockfunc */
3311                                NULL,                    /* lockfuncarg */
3312                                &txr->txtag))) {
3313                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3314                 goto fail;
3315         }
3316
3317         if (!(txr->tx_buffers =
3318             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3319             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3320                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3321                 error = ENOMEM;
3322                 goto fail;
3323         }
3324
3325         /* Create the descriptor buffer dma maps */
3326         txbuf = txr->tx_buffers;
3327         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3328                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3329                 if (error != 0) {
3330                         device_printf(dev, "Unable to create TX DMA map\n");
3331                         goto fail;
3332                 }
3333         }
3334
3335         return 0;
3336 fail:
3337         /* We free all, it handles case where we are in the middle */
3338         em_free_transmit_structures(adapter);
3339         return (error);
3340 }
3341
3342 /*********************************************************************
3343  *
3344  *  Initialize a transmit ring.
3345  *
3346  **********************************************************************/
3347 static void
3348 em_setup_transmit_ring(struct tx_ring *txr)
3349 {
3350         struct adapter *adapter = txr->adapter;
3351         struct em_buffer *txbuf;
3352         int i;
3353 #ifdef DEV_NETMAP
3354         struct netmap_adapter *na = NA(adapter->ifp);
3355         struct netmap_slot *slot;
3356 #endif /* DEV_NETMAP */
3357
3358         /* Clear the old descriptor contents */
3359         EM_TX_LOCK(txr);
3360 #ifdef DEV_NETMAP
3361         slot = netmap_reset(na, NR_TX, txr->me, 0);
3362 #endif /* DEV_NETMAP */
3363
3364         bzero((void *)txr->tx_base,
3365               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3366         /* Reset indices */
3367         txr->next_avail_desc = 0;
3368         txr->next_to_clean = 0;
3369
3370         /* Free any existing tx buffers. */
3371         txbuf = txr->tx_buffers;
3372         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3373                 if (txbuf->m_head != NULL) {
3374                         bus_dmamap_sync(txr->txtag, txbuf->map,
3375                             BUS_DMASYNC_POSTWRITE);
3376                         bus_dmamap_unload(txr->txtag, txbuf->map);
3377                         m_freem(txbuf->m_head);
3378                         txbuf->m_head = NULL;
3379                 }
3380 #ifdef DEV_NETMAP
3381                 if (slot) {
3382                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3383                         uint64_t paddr;
3384                         void *addr;
3385
3386                         addr = PNMB(slot + si, &paddr);
3387                         txr->tx_base[i].buffer_addr = htole64(paddr);
3388                         /* reload the map for netmap mode */
3389                         netmap_load_map(txr->txtag, txbuf->map, addr);
3390                 }
3391 #endif /* DEV_NETMAP */
3392
3393                 /* clear the watch index */
3394                 txbuf->next_eop = -1;
3395         }
3396
3397         /* Set number of descriptors available */
3398         txr->tx_avail = adapter->num_tx_desc;
3399         txr->queue_status = EM_QUEUE_IDLE;
3400
3401         /* Clear checksum offload context. */
3402         txr->last_hw_offload = 0;
3403         txr->last_hw_ipcss = 0;
3404         txr->last_hw_ipcso = 0;
3405         txr->last_hw_tucss = 0;
3406         txr->last_hw_tucso = 0;
3407
3408         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3409             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3410         EM_TX_UNLOCK(txr);
3411 }
3412
3413 /*********************************************************************
3414  *
3415  *  Initialize all transmit rings.
3416  *
3417  **********************************************************************/
3418 static void
3419 em_setup_transmit_structures(struct adapter *adapter)
3420 {
3421         struct tx_ring *txr = adapter->tx_rings;
3422
3423         for (int i = 0; i < adapter->num_queues; i++, txr++)
3424                 em_setup_transmit_ring(txr);
3425
3426         return;
3427 }
3428
3429 /*********************************************************************
3430  *
3431  *  Enable transmit unit.
3432  *
3433  **********************************************************************/
3434 static void
3435 em_initialize_transmit_unit(struct adapter *adapter)
3436 {
3437         struct tx_ring  *txr = adapter->tx_rings;
3438         struct e1000_hw *hw = &adapter->hw;
3439         u32     tctl, tarc, tipg = 0;
3440
3441          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3442
3443         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3444                 u64 bus_addr = txr->txdma.dma_paddr;
3445                 /* Base and Len of TX Ring */
3446                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3447                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3448                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3449                     (u32)(bus_addr >> 32));
3450                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3451                     (u32)bus_addr);
3452                 /* Init the HEAD/TAIL indices */
3453                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3454                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3455
3456                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3457                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3458                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3459
3460                 txr->queue_status = EM_QUEUE_IDLE;
3461         }
3462
3463         /* Set the default values for the Tx Inter Packet Gap timer */
3464         switch (adapter->hw.mac.type) {
3465         case e1000_80003es2lan:
3466                 tipg = DEFAULT_82543_TIPG_IPGR1;
3467                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3468                     E1000_TIPG_IPGR2_SHIFT;
3469                 break;
3470         default:
3471                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3472                     (adapter->hw.phy.media_type ==
3473                     e1000_media_type_internal_serdes))
3474                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3475                 else
3476                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3477                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3478                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3479         }
3480
3481         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3482         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3483
3484         if(adapter->hw.mac.type >= e1000_82540)
3485                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3486                     adapter->tx_abs_int_delay.value);
3487
3488         if ((adapter->hw.mac.type == e1000_82571) ||
3489             (adapter->hw.mac.type == e1000_82572)) {
3490                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3491                 tarc |= SPEED_MODE_BIT;
3492                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3493         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3494                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3495                 tarc |= 1;
3496                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3497                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3498                 tarc |= 1;
3499                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3500         }
3501
3502         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3503         if (adapter->tx_int_delay.value > 0)
3504                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3505
3506         /* Program the Transmit Control Register */
3507         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3508         tctl &= ~E1000_TCTL_CT;
3509         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3510                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3511
3512         if (adapter->hw.mac.type >= e1000_82571)
3513                 tctl |= E1000_TCTL_MULR;
3514
3515         /* This write will effectively turn on the transmit unit. */
3516         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3517
3518 }
3519
3520
3521 /*********************************************************************
3522  *
3523  *  Free all transmit rings.
3524  *
3525  **********************************************************************/
3526 static void
3527 em_free_transmit_structures(struct adapter *adapter)
3528 {
3529         struct tx_ring *txr = adapter->tx_rings;
3530
3531         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3532                 EM_TX_LOCK(txr);
3533                 em_free_transmit_buffers(txr);
3534                 em_dma_free(adapter, &txr->txdma);
3535                 EM_TX_UNLOCK(txr);
3536                 EM_TX_LOCK_DESTROY(txr);
3537         }
3538
3539         free(adapter->tx_rings, M_DEVBUF);
3540 }
3541
3542 /*********************************************************************
3543  *
3544  *  Free transmit ring related data structures.
3545  *
3546  **********************************************************************/
3547 static void
3548 em_free_transmit_buffers(struct tx_ring *txr)
3549 {
3550         struct adapter          *adapter = txr->adapter;
3551         struct em_buffer        *txbuf;
3552
3553         INIT_DEBUGOUT("free_transmit_ring: begin");
3554
3555         if (txr->tx_buffers == NULL)
3556                 return;
3557
3558         for (int i = 0; i < adapter->num_tx_desc; i++) {
3559                 txbuf = &txr->tx_buffers[i];
3560                 if (txbuf->m_head != NULL) {
3561                         bus_dmamap_sync(txr->txtag, txbuf->map,
3562                             BUS_DMASYNC_POSTWRITE);
3563                         bus_dmamap_unload(txr->txtag,
3564                             txbuf->map);
3565                         m_freem(txbuf->m_head);
3566                         txbuf->m_head = NULL;
3567                         if (txbuf->map != NULL) {
3568                                 bus_dmamap_destroy(txr->txtag,
3569                                     txbuf->map);
3570                                 txbuf->map = NULL;
3571                         }
3572                 } else if (txbuf->map != NULL) {
3573                         bus_dmamap_unload(txr->txtag,
3574                             txbuf->map);
3575                         bus_dmamap_destroy(txr->txtag,
3576                             txbuf->map);
3577                         txbuf->map = NULL;
3578                 }
3579         }
3580 #if __FreeBSD_version >= 800000
3581         if (txr->br != NULL)
3582                 buf_ring_free(txr->br, M_DEVBUF);
3583 #endif
3584         if (txr->tx_buffers != NULL) {
3585                 free(txr->tx_buffers, M_DEVBUF);
3586                 txr->tx_buffers = NULL;
3587         }
3588         if (txr->txtag != NULL) {
3589                 bus_dma_tag_destroy(txr->txtag);
3590                 txr->txtag = NULL;
3591         }
3592         return;
3593 }
3594
3595
3596 /*********************************************************************
3597  *  The offload context is protocol specific (TCP/UDP) and thus
3598  *  only needs to be set when the protocol changes. The occasion
3599  *  of a context change can be a performance detriment, and
3600  *  might be better just disabled. The reason arises in the way
3601  *  in which the controller supports pipelined requests from the
3602  *  Tx data DMA. Up to four requests can be pipelined, and they may
3603  *  belong to the same packet or to multiple packets. However all
3604  *  requests for one packet are issued before a request is issued
3605  *  for a subsequent packet and if a request for the next packet
3606  *  requires a context change, that request will be stalled
3607  *  until the previous request completes. This means setting up
3608  *  a new context effectively disables pipelined Tx data DMA which
3609  *  in turn greatly slow down performance to send small sized
3610  *  frames. 
3611  **********************************************************************/
3612 static void
3613 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3614     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3615 {
3616         struct adapter                  *adapter = txr->adapter;
3617         struct e1000_context_desc       *TXD = NULL;
3618         struct em_buffer                *tx_buffer;
3619         int                             cur, hdr_len;
3620         u32                             cmd = 0;
3621         u16                             offload = 0;
3622         u8                              ipcso, ipcss, tucso, tucss;
3623
3624         ipcss = ipcso = tucss = tucso = 0;
3625         hdr_len = ip_off + (ip->ip_hl << 2);
3626         cur = txr->next_avail_desc;
3627
3628         /* Setup of IP header checksum. */
3629         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3630                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3631                 offload |= CSUM_IP;
3632                 ipcss = ip_off;
3633                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3634                 /*
3635                  * Start offset for header checksum calculation.
3636                  * End offset for header checksum calculation.
3637                  * Offset of place to put the checksum.
3638                  */
3639                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3640                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3641                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3642                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3643                 cmd |= E1000_TXD_CMD_IP;
3644         }
3645
3646         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3647                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3648                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3649                 offload |= CSUM_TCP;
3650                 tucss = hdr_len;
3651                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3652                 /*
3653                  * Setting up new checksum offload context for every frames
3654                  * takes a lot of processing time for hardware. This also
3655                  * reduces performance a lot for small sized frames so avoid
3656                  * it if driver can use previously configured checksum
3657                  * offload context.
3658                  */
3659                 if (txr->last_hw_offload == offload) {
3660                         if (offload & CSUM_IP) {
3661                                 if (txr->last_hw_ipcss == ipcss &&
3662                                     txr->last_hw_ipcso == ipcso &&
3663                                     txr->last_hw_tucss == tucss &&
3664                                     txr->last_hw_tucso == tucso)
3665                                         return;
3666                         } else {
3667                                 if (txr->last_hw_tucss == tucss &&
3668                                     txr->last_hw_tucso == tucso)
3669                                         return;
3670                         }
3671                 }
3672                 txr->last_hw_offload = offload;
3673                 txr->last_hw_tucss = tucss;
3674                 txr->last_hw_tucso = tucso;
3675                 /*
3676                  * Start offset for payload checksum calculation.
3677                  * End offset for payload checksum calculation.
3678                  * Offset of place to put the checksum.
3679                  */
3680                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3681                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3682                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3683                 TXD->upper_setup.tcp_fields.tucso = tucso;
3684                 cmd |= E1000_TXD_CMD_TCP;
3685         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3686                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3687                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3688                 tucss = hdr_len;
3689                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3690                 /*
3691                  * Setting up new checksum offload context for every frames
3692                  * takes a lot of processing time for hardware. This also
3693                  * reduces performance a lot for small sized frames so avoid
3694                  * it if driver can use previously configured checksum
3695                  * offload context.
3696                  */
3697                 if (txr->last_hw_offload == offload) {
3698                         if (offload & CSUM_IP) {
3699                                 if (txr->last_hw_ipcss == ipcss &&
3700                                     txr->last_hw_ipcso == ipcso &&
3701                                     txr->last_hw_tucss == tucss &&
3702                                     txr->last_hw_tucso == tucso)
3703                                         return;
3704                         } else {
3705                                 if (txr->last_hw_tucss == tucss &&
3706                                     txr->last_hw_tucso == tucso)
3707                                         return;
3708                         }
3709                 }
3710                 txr->last_hw_offload = offload;
3711                 txr->last_hw_tucss = tucss;
3712                 txr->last_hw_tucso = tucso;
3713                 /*
3714                  * Start offset for header checksum calculation.
3715                  * End offset for header checksum calculation.
3716                  * Offset of place to put the checksum.
3717                  */
3718                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3719                 TXD->upper_setup.tcp_fields.tucss = tucss;
3720                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3721                 TXD->upper_setup.tcp_fields.tucso = tucso;
3722         }
3723   
3724         if (offload & CSUM_IP) {
3725                 txr->last_hw_ipcss = ipcss;
3726                 txr->last_hw_ipcso = ipcso;
3727         }
3728
3729         TXD->tcp_seg_setup.data = htole32(0);
3730         TXD->cmd_and_length =
3731             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3732         tx_buffer = &txr->tx_buffers[cur];
3733         tx_buffer->m_head = NULL;
3734         tx_buffer->next_eop = -1;
3735
3736         if (++cur == adapter->num_tx_desc)
3737                 cur = 0;
3738
3739         txr->tx_avail--;
3740         txr->next_avail_desc = cur;
3741 }
3742
3743
3744 /**********************************************************************
3745  *
3746  *  Setup work for hardware segmentation offload (TSO)
3747  *
3748  **********************************************************************/
3749 static void
3750 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3751     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3752 {
3753         struct adapter                  *adapter = txr->adapter;
3754         struct e1000_context_desc       *TXD;
3755         struct em_buffer                *tx_buffer;
3756         int cur, hdr_len;
3757
3758         /*
3759          * In theory we can use the same TSO context if and only if
3760          * frame is the same type(IP/TCP) and the same MSS. However
3761          * checking whether a frame has the same IP/TCP structure is
3762          * hard thing so just ignore that and always restablish a
3763          * new TSO context.
3764          */
3765         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3766         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3767                       E1000_TXD_DTYP_D |        /* Data descr type */
3768                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3769
3770         /* IP and/or TCP header checksum calculation and insertion. */
3771         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3772
3773         cur = txr->next_avail_desc;
3774         tx_buffer = &txr->tx_buffers[cur];
3775         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3776
3777         /*
3778          * Start offset for header checksum calculation.
3779          * End offset for header checksum calculation.
3780          * Offset of place put the checksum.
3781          */
3782         TXD->lower_setup.ip_fields.ipcss = ip_off;
3783         TXD->lower_setup.ip_fields.ipcse =
3784             htole16(ip_off + (ip->ip_hl << 2) - 1);
3785         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3786         /*
3787          * Start offset for payload checksum calculation.
3788          * End offset for payload checksum calculation.
3789          * Offset of place to put the checksum.
3790          */
3791         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3792         TXD->upper_setup.tcp_fields.tucse = 0;
3793         TXD->upper_setup.tcp_fields.tucso =
3794             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3795         /*
3796          * Payload size per packet w/o any headers.
3797          * Length of all headers up to payload.
3798          */
3799         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3800         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3801
3802         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3803                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3804                                 E1000_TXD_CMD_TSE |     /* TSE context */
3805                                 E1000_TXD_CMD_IP |      /* Do IP csum */
3806                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3807                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3808
3809         tx_buffer->m_head = NULL;
3810         tx_buffer->next_eop = -1;
3811
3812         if (++cur == adapter->num_tx_desc)
3813                 cur = 0;
3814
3815         txr->tx_avail--;
3816         txr->next_avail_desc = cur;
3817         txr->tx_tso = TRUE;
3818 }
3819
3820
3821 /**********************************************************************
3822  *
3823  *  Examine each tx_buffer in the used queue. If the hardware is done
3824  *  processing the packet then free associated resources. The
3825  *  tx_buffer is put back on the free queue.
3826  *
3827  **********************************************************************/
3828 static void
3829 em_txeof(struct tx_ring *txr)
3830 {
3831         struct adapter  *adapter = txr->adapter;
3832         int first, last, done, processed;
3833         struct em_buffer *tx_buffer;
3834         struct e1000_tx_desc   *tx_desc, *eop_desc;
3835         struct ifnet   *ifp = adapter->ifp;
3836
3837         EM_TX_LOCK_ASSERT(txr);
3838 #ifdef DEV_NETMAP
3839         if (netmap_tx_irq(ifp, txr->me |
3840             (NETMAP_LOCKED_ENTER | NETMAP_LOCKED_EXIT)))
3841                 return;
3842 #endif /* DEV_NETMAP */
3843
3844         /* No work, make sure watchdog is off */
3845         if (txr->tx_avail == adapter->num_tx_desc) {
3846                 txr->queue_status = EM_QUEUE_IDLE;
3847                 return;
3848         }
3849
3850         processed = 0;
3851         first = txr->next_to_clean;
3852         tx_desc = &txr->tx_base[first];
3853         tx_buffer = &txr->tx_buffers[first];
3854         last = tx_buffer->next_eop;
3855         eop_desc = &txr->tx_base[last];
3856
3857         /*
3858          * What this does is get the index of the
3859          * first descriptor AFTER the EOP of the 
3860          * first packet, that way we can do the
3861          * simple comparison on the inner while loop.
3862          */
3863         if (++last == adapter->num_tx_desc)
3864                 last = 0;
3865         done = last;
3866
3867         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3868             BUS_DMASYNC_POSTREAD);
3869
3870         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3871                 /* We clean the range of the packet */
3872                 while (first != done) {
3873                         tx_desc->upper.data = 0;
3874                         tx_desc->lower.data = 0;
3875                         tx_desc->buffer_addr = 0;
3876                         ++txr->tx_avail;
3877                         ++processed;
3878
3879                         if (tx_buffer->m_head) {
3880                                 bus_dmamap_sync(txr->txtag,
3881                                     tx_buffer->map,
3882                                     BUS_DMASYNC_POSTWRITE);
3883                                 bus_dmamap_unload(txr->txtag,
3884                                     tx_buffer->map);
3885                                 m_freem(tx_buffer->m_head);
3886                                 tx_buffer->m_head = NULL;
3887                         }
3888                         tx_buffer->next_eop = -1;
3889                         txr->watchdog_time = ticks;
3890
3891                         if (++first == adapter->num_tx_desc)
3892                                 first = 0;
3893
3894                         tx_buffer = &txr->tx_buffers[first];
3895                         tx_desc = &txr->tx_base[first];
3896                 }
3897                 ++ifp->if_opackets;
3898                 /* See if we can continue to the next packet */
3899                 last = tx_buffer->next_eop;
3900                 if (last != -1) {
3901                         eop_desc = &txr->tx_base[last];
3902                         /* Get new done point */
3903                         if (++last == adapter->num_tx_desc) last = 0;
3904                         done = last;
3905                 } else
3906                         break;
3907         }
3908         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3909             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3910
3911         txr->next_to_clean = first;
3912
3913         /*
3914         ** Watchdog calculation, we know there's
3915         ** work outstanding or the first return
3916         ** would have been taken, so none processed
3917         ** for too long indicates a hang. local timer
3918         ** will examine this and do a reset if needed.
3919         */
3920         if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3921                 txr->queue_status = EM_QUEUE_HUNG;
3922
3923         /*
3924          * If we have a minimum free, clear IFF_DRV_OACTIVE
3925          * to tell the stack that it is OK to send packets.
3926          * Notice that all writes of OACTIVE happen under the
3927          * TX lock which, with a single queue, guarantees 
3928          * sanity.
3929          */
3930         if (txr->tx_avail >= EM_MAX_SCATTER)
3931                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3932
3933         /* Disable watchdog if all clean */
3934         if (txr->tx_avail == adapter->num_tx_desc) {
3935                 txr->queue_status = EM_QUEUE_IDLE;
3936         } 
3937 }
3938
3939
3940 /*********************************************************************
3941  *
3942  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3943  *
3944  **********************************************************************/
3945 static void
3946 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3947 {
3948         struct adapter          *adapter = rxr->adapter;
3949         struct mbuf             *m;
3950         bus_dma_segment_t       segs[1];
3951         struct em_buffer        *rxbuf;
3952         int                     i, j, error, nsegs;
3953         bool                    cleaned = FALSE;
3954
3955         i = j = rxr->next_to_refresh;
3956         /*
3957         ** Get one descriptor beyond
3958         ** our work mark to control
3959         ** the loop.
3960         */
3961         if (++j == adapter->num_rx_desc)
3962                 j = 0;
3963
3964         while (j != limit) {
3965                 rxbuf = &rxr->rx_buffers[i];
3966                 if (rxbuf->m_head == NULL) {
3967                         m = m_getjcl(M_NOWAIT, MT_DATA,
3968                             M_PKTHDR, adapter->rx_mbuf_sz);
3969                         /*
3970                         ** If we have a temporary resource shortage
3971                         ** that causes a failure, just abort refresh
3972                         ** for now, we will return to this point when
3973                         ** reinvoked from em_rxeof.
3974                         */
3975                         if (m == NULL)
3976                                 goto update;
3977                 } else
3978                         m = rxbuf->m_head;
3979
3980                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3981                 m->m_flags |= M_PKTHDR;
3982                 m->m_data = m->m_ext.ext_buf;
3983
3984                 /* Use bus_dma machinery to setup the memory mapping  */
3985                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3986                     m, segs, &nsegs, BUS_DMA_NOWAIT);
3987                 if (error != 0) {
3988                         printf("Refresh mbufs: hdr dmamap load"
3989                             " failure - %d\n", error);
3990                         m_free(m);
3991                         rxbuf->m_head = NULL;
3992                         goto update;
3993                 }
3994                 rxbuf->m_head = m;
3995                 bus_dmamap_sync(rxr->rxtag,
3996                     rxbuf->map, BUS_DMASYNC_PREREAD);
3997                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3998                 cleaned = TRUE;
3999
4000                 i = j; /* Next is precalulated for us */
4001                 rxr->next_to_refresh = i;
4002                 /* Calculate next controlling index */
4003                 if (++j == adapter->num_rx_desc)
4004                         j = 0;
4005         }
4006 update:
4007         /*
4008         ** Update the tail pointer only if,
4009         ** and as far as we have refreshed.
4010         */
4011         if (cleaned)
4012                 E1000_WRITE_REG(&adapter->hw,
4013                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4014
4015         return;
4016 }
4017
4018
4019 /*********************************************************************
4020  *
4021  *  Allocate memory for rx_buffer structures. Since we use one
4022  *  rx_buffer per received packet, the maximum number of rx_buffer's
4023  *  that we'll need is equal to the number of receive descriptors
4024  *  that we've allocated.
4025  *
4026  **********************************************************************/
4027 static int
4028 em_allocate_receive_buffers(struct rx_ring *rxr)
4029 {
4030         struct adapter          *adapter = rxr->adapter;
4031         device_t                dev = adapter->dev;
4032         struct em_buffer        *rxbuf;
4033         int                     error;
4034
4035         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4036             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4037         if (rxr->rx_buffers == NULL) {
4038                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4039                 return (ENOMEM);
4040         }
4041
4042         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4043                                 1, 0,                   /* alignment, bounds */
4044                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4045                                 BUS_SPACE_MAXADDR,      /* highaddr */
4046                                 NULL, NULL,             /* filter, filterarg */
4047                                 MJUM9BYTES,             /* maxsize */
4048                                 1,                      /* nsegments */
4049                                 MJUM9BYTES,             /* maxsegsize */
4050                                 0,                      /* flags */
4051                                 NULL,                   /* lockfunc */
4052                                 NULL,                   /* lockarg */
4053                                 &rxr->rxtag);
4054         if (error) {
4055                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4056                     __func__, error);
4057                 goto fail;
4058         }
4059
4060         rxbuf = rxr->rx_buffers;
4061         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4062                 rxbuf = &rxr->rx_buffers[i];
4063                 error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4064                     &rxbuf->map);
4065                 if (error) {
4066                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4067                             __func__, error);
4068                         goto fail;
4069                 }
4070         }
4071
4072         return (0);
4073
4074 fail:
4075         em_free_receive_structures(adapter);
4076         return (error);
4077 }
4078
4079
4080 /*********************************************************************
4081  *
4082  *  Initialize a receive ring and its buffers.
4083  *
4084  **********************************************************************/
4085 static int
4086 em_setup_receive_ring(struct rx_ring *rxr)
4087 {
4088         struct  adapter         *adapter = rxr->adapter;
4089         struct em_buffer        *rxbuf;
4090         bus_dma_segment_t       seg[1];
4091         int                     rsize, nsegs, error = 0;
4092 #ifdef DEV_NETMAP
4093         struct netmap_adapter *na = NA(adapter->ifp);
4094         struct netmap_slot *slot;
4095 #endif
4096
4097
4098         /* Clear the ring contents */
4099         EM_RX_LOCK(rxr);
4100         rsize = roundup2(adapter->num_rx_desc *
4101             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4102         bzero((void *)rxr->rx_base, rsize);
4103 #ifdef DEV_NETMAP
4104         slot = netmap_reset(na, NR_RX, 0, 0);
4105 #endif
4106
4107         /*
4108         ** Free current RX buffer structs and their mbufs
4109         */
4110         for (int i = 0; i < adapter->num_rx_desc; i++) {
4111                 rxbuf = &rxr->rx_buffers[i];
4112                 if (rxbuf->m_head != NULL) {
4113                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4114                             BUS_DMASYNC_POSTREAD);
4115                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4116                         m_freem(rxbuf->m_head);
4117                         rxbuf->m_head = NULL; /* mark as freed */
4118                 }
4119         }
4120
4121         /* Now replenish the mbufs */
4122         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4123                 rxbuf = &rxr->rx_buffers[j];
4124 #ifdef DEV_NETMAP
4125                 if (slot) {
4126                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4127                         uint64_t paddr;
4128                         void *addr;
4129
4130                         addr = PNMB(slot + si, &paddr);
4131                         netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4132                         /* Update descriptor */
4133                         rxr->rx_base[j].buffer_addr = htole64(paddr);
4134                         continue;
4135                 }
4136 #endif /* DEV_NETMAP */
4137                 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4138                     M_PKTHDR, adapter->rx_mbuf_sz);
4139                 if (rxbuf->m_head == NULL) {
4140                         error = ENOBUFS;
4141                         goto fail;
4142                 }
4143                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4144                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4145                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4146
4147                 /* Get the memory mapping */
4148                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4149                     rxbuf->map, rxbuf->m_head, seg,
4150                     &nsegs, BUS_DMA_NOWAIT);
4151                 if (error != 0) {
4152                         m_freem(rxbuf->m_head);
4153                         rxbuf->m_head = NULL;
4154                         goto fail;
4155                 }
4156                 bus_dmamap_sync(rxr->rxtag,
4157                     rxbuf->map, BUS_DMASYNC_PREREAD);
4158
4159                 /* Update descriptor */
4160                 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4161         }
4162         rxr->next_to_check = 0;
4163         rxr->next_to_refresh = 0;
4164         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4165             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4166
4167 fail:
4168         EM_RX_UNLOCK(rxr);
4169         return (error);
4170 }
4171
4172 /*********************************************************************
4173  *
4174  *  Initialize all receive rings.
4175  *
4176  **********************************************************************/
4177 static int
4178 em_setup_receive_structures(struct adapter *adapter)
4179 {
4180         struct rx_ring *rxr = adapter->rx_rings;
4181         int q;
4182
4183         for (q = 0; q < adapter->num_queues; q++, rxr++)
4184                 if (em_setup_receive_ring(rxr))
4185                         goto fail;
4186
4187         return (0);
4188 fail:
4189         /*
4190          * Free RX buffers allocated so far, we will only handle
4191          * the rings that completed, the failing case will have
4192          * cleaned up for itself. 'q' failed, so its the terminus.
4193          */
4194         for (int i = 0; i < q; ++i) {
4195                 rxr = &adapter->rx_rings[i];
4196                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4197                         struct em_buffer *rxbuf;
4198                         rxbuf = &rxr->rx_buffers[n];
4199                         if (rxbuf->m_head != NULL) {
4200                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4201                                   BUS_DMASYNC_POSTREAD);
4202                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4203                                 m_freem(rxbuf->m_head);
4204                                 rxbuf->m_head = NULL;
4205                         }
4206                 }
4207                 rxr->next_to_check = 0;
4208                 rxr->next_to_refresh = 0;
4209         }
4210
4211         return (ENOBUFS);
4212 }
4213
4214 /*********************************************************************
4215  *
4216  *  Free all receive rings.
4217  *
4218  **********************************************************************/
4219 static void
4220 em_free_receive_structures(struct adapter *adapter)
4221 {
4222         struct rx_ring *rxr = adapter->rx_rings;
4223
4224         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4225                 em_free_receive_buffers(rxr);
4226                 /* Free the ring memory as well */
4227                 em_dma_free(adapter, &rxr->rxdma);
4228                 EM_RX_LOCK_DESTROY(rxr);
4229         }
4230
4231         free(adapter->rx_rings, M_DEVBUF);
4232 }
4233
4234
4235 /*********************************************************************
4236  *
4237  *  Free receive ring data structures
4238  *
4239  **********************************************************************/
4240 static void
4241 em_free_receive_buffers(struct rx_ring *rxr)
4242 {
4243         struct adapter          *adapter = rxr->adapter;
4244         struct em_buffer        *rxbuf = NULL;
4245
4246         INIT_DEBUGOUT("free_receive_buffers: begin");
4247
4248         if (rxr->rx_buffers != NULL) {
4249                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4250                         rxbuf = &rxr->rx_buffers[i];
4251                         if (rxbuf->map != NULL) {
4252                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4253                                     BUS_DMASYNC_POSTREAD);
4254                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4255                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4256                         }
4257                         if (rxbuf->m_head != NULL) {
4258                                 m_freem(rxbuf->m_head);
4259                                 rxbuf->m_head = NULL;
4260                         }
4261                 }
4262                 free(rxr->rx_buffers, M_DEVBUF);
4263                 rxr->rx_buffers = NULL;
4264                 rxr->next_to_check = 0;
4265                 rxr->next_to_refresh = 0;
4266         }
4267
4268         if (rxr->rxtag != NULL) {
4269                 bus_dma_tag_destroy(rxr->rxtag);
4270                 rxr->rxtag = NULL;
4271         }
4272
4273         return;
4274 }
4275
4276
4277 /*********************************************************************
4278  *
4279  *  Enable receive unit.
4280  *
4281  **********************************************************************/
4282
4283 static void
4284 em_initialize_receive_unit(struct adapter *adapter)
4285 {
4286         struct rx_ring  *rxr = adapter->rx_rings;
4287         struct ifnet    *ifp = adapter->ifp;
4288         struct e1000_hw *hw = &adapter->hw;
4289         u64     bus_addr;
4290         u32     rctl, rxcsum;
4291
4292         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4293
4294         /*
4295          * Make sure receives are disabled while setting
4296          * up the descriptor ring
4297          */
4298         rctl = E1000_READ_REG(hw, E1000_RCTL);
4299         /* Do not disable if ever enabled on this hardware */
4300         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4301                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4302
4303         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4304             adapter->rx_abs_int_delay.value);
4305         /*
4306          * Set the interrupt throttling rate. Value is calculated
4307          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4308          */
4309         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4310
4311         /*
4312         ** When using MSIX interrupts we need to throttle
4313         ** using the EITR register (82574 only)
4314         */
4315         if (hw->mac.type == e1000_82574) {
4316                 for (int i = 0; i < 4; i++)
4317                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4318                             DEFAULT_ITR);
4319                 /* Disable accelerated acknowledge */
4320                 E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4321         }
4322
4323         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4324         if (ifp->if_capenable & IFCAP_RXCSUM)
4325                 rxcsum |= E1000_RXCSUM_TUOFL;
4326         else
4327                 rxcsum &= ~E1000_RXCSUM_TUOFL;
4328         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4329
4330         /*
4331         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4332         ** long latencies are observed, like Lenovo X60. This
4333         ** change eliminates the problem, but since having positive
4334         ** values in RDTR is a known source of problems on other
4335         ** platforms another solution is being sought.
4336         */
4337         if (hw->mac.type == e1000_82573)
4338                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4339
4340         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4341                 /* Setup the Base and Length of the Rx Descriptor Ring */
4342                 u32 rdt = adapter->num_rx_desc - 1; /* default */
4343
4344                 bus_addr = rxr->rxdma.dma_paddr;
4345                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4346                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4347                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4348                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4349                 /* Setup the Head and Tail Descriptor Pointers */
4350                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4351 #ifdef DEV_NETMAP
4352                 /*
4353                  * an init() while a netmap client is active must
4354                  * preserve the rx buffers passed to userspace.
4355                  */
4356                 if (ifp->if_capenable & IFCAP_NETMAP)
4357                         rdt -= NA(adapter->ifp)->rx_rings[i].nr_hwavail;
4358 #endif /* DEV_NETMAP */
4359                 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4360         }
4361
4362         /* Set PTHRESH for improved jumbo performance */
4363         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4364             (adapter->hw.mac.type == e1000_pch2lan) ||
4365             (adapter->hw.mac.type == e1000_ich10lan)) &&
4366             (ifp->if_mtu > ETHERMTU)) {
4367                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4368                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4369         }
4370                 
4371         if (adapter->hw.mac.type >= e1000_pch2lan) {
4372                 if (ifp->if_mtu > ETHERMTU)
4373                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4374                 else
4375                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4376         }
4377
4378         /* Setup the Receive Control Register */
4379         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4380         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4381             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4382             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4383
4384         /* Strip the CRC */
4385         rctl |= E1000_RCTL_SECRC;
4386
4387         /* Make sure VLAN Filters are off */
4388         rctl &= ~E1000_RCTL_VFE;
4389         rctl &= ~E1000_RCTL_SBP;
4390
4391         if (adapter->rx_mbuf_sz == MCLBYTES)
4392                 rctl |= E1000_RCTL_SZ_2048;
4393         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4394                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4395         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4396                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4397
4398         if (ifp->if_mtu > ETHERMTU)
4399                 rctl |= E1000_RCTL_LPE;
4400         else
4401                 rctl &= ~E1000_RCTL_LPE;
4402
4403         /* Write out the settings */
4404         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4405
4406         return;
4407 }
4408
4409
4410 /*********************************************************************
4411  *
4412  *  This routine executes in interrupt context. It replenishes
4413  *  the mbufs in the descriptor and sends data which has been
4414  *  dma'ed into host memory to upper layer.
4415  *
4416  *  We loop at most count times if count is > 0, or until done if
4417  *  count < 0.
4418  *  
4419  *  For polling we also now return the number of cleaned packets
4420  *********************************************************************/
4421 static bool
4422 em_rxeof(struct rx_ring *rxr, int count, int *done)
4423 {
4424         struct adapter          *adapter = rxr->adapter;
4425         struct ifnet            *ifp = adapter->ifp;
4426         struct mbuf             *mp, *sendmp;
4427         u8                      status = 0;
4428         u16                     len;
4429         int                     i, processed, rxdone = 0;
4430         bool                    eop;
4431         struct e1000_rx_desc    *cur;
4432
4433         EM_RX_LOCK(rxr);
4434
4435 #ifdef DEV_NETMAP
4436         if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4437                 return (FALSE);
4438 #endif /* DEV_NETMAP */
4439
4440         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4441
4442                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4443                         break;
4444
4445                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4446                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4447
4448                 cur = &rxr->rx_base[i];
4449                 status = cur->status;
4450                 mp = sendmp = NULL;
4451
4452                 if ((status & E1000_RXD_STAT_DD) == 0)
4453                         break;
4454
4455                 len = le16toh(cur->length);
4456                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4457
4458                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4459                     (rxr->discard == TRUE)) {
4460                         adapter->dropped_pkts++;
4461                         ++rxr->rx_discarded;
4462                         if (!eop) /* Catch subsequent segs */
4463                                 rxr->discard = TRUE;
4464                         else
4465                                 rxr->discard = FALSE;
4466                         em_rx_discard(rxr, i);
4467                         goto next_desc;
4468                 }
4469
4470                 /* Assign correct length to the current fragment */
4471                 mp = rxr->rx_buffers[i].m_head;
4472                 mp->m_len = len;
4473
4474                 /* Trigger for refresh */
4475                 rxr->rx_buffers[i].m_head = NULL;
4476
4477                 /* First segment? */
4478                 if (rxr->fmp == NULL) {
4479                         mp->m_pkthdr.len = len;
4480                         rxr->fmp = rxr->lmp = mp;
4481                 } else {
4482                         /* Chain mbuf's together */
4483                         mp->m_flags &= ~M_PKTHDR;
4484                         rxr->lmp->m_next = mp;
4485                         rxr->lmp = mp;
4486                         rxr->fmp->m_pkthdr.len += len;
4487                 }
4488
4489                 if (eop) {
4490                         --count;
4491                         sendmp = rxr->fmp;
4492                         sendmp->m_pkthdr.rcvif = ifp;
4493                         ifp->if_ipackets++;
4494                         em_receive_checksum(cur, sendmp);
4495 #ifndef __NO_STRICT_ALIGNMENT
4496                         if (adapter->hw.mac.max_frame_size >
4497                             (MCLBYTES - ETHER_ALIGN) &&
4498                             em_fixup_rx(rxr) != 0)
4499                                 goto skip;
4500 #endif
4501                         if (status & E1000_RXD_STAT_VP) {
4502                                 sendmp->m_pkthdr.ether_vtag =
4503                                     le16toh(cur->special);
4504                                 sendmp->m_flags |= M_VLANTAG;
4505                         }
4506 #ifndef __NO_STRICT_ALIGNMENT
4507 skip:
4508 #endif
4509                         rxr->fmp = rxr->lmp = NULL;
4510                 }
4511 next_desc:
4512                 /* Zero out the receive descriptors status. */
4513                 cur->status = 0;
4514                 ++rxdone;       /* cumulative for POLL */
4515                 ++processed;
4516
4517                 /* Advance our pointers to the next descriptor. */
4518                 if (++i == adapter->num_rx_desc)
4519                         i = 0;
4520
4521                 /* Send to the stack */
4522                 if (sendmp != NULL) {
4523                         rxr->next_to_check = i;
4524                         EM_RX_UNLOCK(rxr);
4525                         (*ifp->if_input)(ifp, sendmp);
4526                         EM_RX_LOCK(rxr);
4527                         i = rxr->next_to_check;
4528                 }
4529
4530                 /* Only refresh mbufs every 8 descriptors */
4531                 if (processed == 8) {
4532                         em_refresh_mbufs(rxr, i);
4533                         processed = 0;
4534                 }
4535         }
4536
4537         /* Catch any remaining refresh work */
4538         if (e1000_rx_unrefreshed(rxr))
4539                 em_refresh_mbufs(rxr, i);
4540
4541         rxr->next_to_check = i;
4542         if (done != NULL)
4543                 *done = rxdone;
4544         EM_RX_UNLOCK(rxr);
4545
4546         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4547 }
4548
4549 static __inline void
4550 em_rx_discard(struct rx_ring *rxr, int i)
4551 {
4552         struct em_buffer        *rbuf;
4553
4554         rbuf = &rxr->rx_buffers[i];
4555         /* Free any previous pieces */
4556         if (rxr->fmp != NULL) {
4557                 rxr->fmp->m_flags |= M_PKTHDR;
4558                 m_freem(rxr->fmp);
4559                 rxr->fmp = NULL;
4560                 rxr->lmp = NULL;
4561         }
4562         /*
4563         ** Free buffer and allow em_refresh_mbufs()
4564         ** to clean up and recharge buffer.
4565         */
4566         if (rbuf->m_head) {
4567                 m_free(rbuf->m_head);
4568                 rbuf->m_head = NULL;
4569         }
4570         return;
4571 }
4572
4573 #ifndef __NO_STRICT_ALIGNMENT
4574 /*
4575  * When jumbo frames are enabled we should realign entire payload on
4576  * architecures with strict alignment. This is serious design mistake of 8254x
4577  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4578  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4579  * payload. On architecures without strict alignment restrictions 8254x still
4580  * performs unaligned memory access which would reduce the performance too.
4581  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4582  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4583  * existing mbuf chain.
4584  *
4585  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4586  * not used at all on architectures with strict alignment.
4587  */
4588 static int
4589 em_fixup_rx(struct rx_ring *rxr)
4590 {
4591         struct adapter *adapter = rxr->adapter;
4592         struct mbuf *m, *n;
4593         int error;
4594
4595         error = 0;
4596         m = rxr->fmp;
4597         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4598                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4599                 m->m_data += ETHER_HDR_LEN;
4600         } else {
4601                 MGETHDR(n, M_NOWAIT, MT_DATA);
4602                 if (n != NULL) {
4603                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4604                         m->m_data += ETHER_HDR_LEN;
4605                         m->m_len -= ETHER_HDR_LEN;
4606                         n->m_len = ETHER_HDR_LEN;
4607                         M_MOVE_PKTHDR(n, m);
4608                         n->m_next = m;
4609                         rxr->fmp = n;
4610                 } else {
4611                         adapter->dropped_pkts++;
4612                         m_freem(rxr->fmp);
4613                         rxr->fmp = NULL;
4614                         error = ENOMEM;
4615                 }
4616         }
4617
4618         return (error);
4619 }
4620 #endif
4621
4622 /*********************************************************************
4623  *
4624  *  Verify that the hardware indicated that the checksum is valid.
4625  *  Inform the stack about the status of checksum so that stack
4626  *  doesn't spend time verifying the checksum.
4627  *
4628  *********************************************************************/
4629 static void
4630 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4631 {
4632         mp->m_pkthdr.csum_flags = 0;
4633
4634         /* Ignore Checksum bit is set */
4635         if (rx_desc->status & E1000_RXD_STAT_IXSM)
4636                 return;
4637
4638         if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4639                 return;
4640
4641         /* IP Checksum Good? */
4642         if (rx_desc->status & E1000_RXD_STAT_IPCS)
4643                 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4644
4645         /* TCP or UDP checksum */
4646         if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4647                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4648                 mp->m_pkthdr.csum_data = htons(0xffff);
4649         }
4650 }
4651
4652 /*
4653  * This routine is run via an vlan
4654  * config EVENT
4655  */
4656 static void
4657 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4658 {
4659         struct adapter  *adapter = ifp->if_softc;
4660         u32             index, bit;
4661
4662         if (ifp->if_softc !=  arg)   /* Not our event */
4663                 return;
4664
4665         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4666                 return;
4667
4668         EM_CORE_LOCK(adapter);
4669         index = (vtag >> 5) & 0x7F;
4670         bit = vtag & 0x1F;
4671         adapter->shadow_vfta[index] |= (1 << bit);
4672         ++adapter->num_vlans;
4673         /* Re-init to load the changes */
4674         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4675                 em_init_locked(adapter);
4676         EM_CORE_UNLOCK(adapter);
4677 }
4678
4679 /*
4680  * This routine is run via an vlan
4681  * unconfig EVENT
4682  */
4683 static void
4684 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4685 {
4686         struct adapter  *adapter = ifp->if_softc;
4687         u32             index, bit;
4688
4689         if (ifp->if_softc !=  arg)
4690                 return;
4691
4692         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4693                 return;
4694
4695         EM_CORE_LOCK(adapter);
4696         index = (vtag >> 5) & 0x7F;
4697         bit = vtag & 0x1F;
4698         adapter->shadow_vfta[index] &= ~(1 << bit);
4699         --adapter->num_vlans;
4700         /* Re-init to load the changes */
4701         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4702                 em_init_locked(adapter);
4703         EM_CORE_UNLOCK(adapter);
4704 }
4705
4706 static void
4707 em_setup_vlan_hw_support(struct adapter *adapter)
4708 {
4709         struct e1000_hw *hw = &adapter->hw;
4710         u32             reg;
4711
4712         /*
4713         ** We get here thru init_locked, meaning
4714         ** a soft reset, this has already cleared
4715         ** the VFTA and other state, so if there
4716         ** have been no vlan's registered do nothing.
4717         */
4718         if (adapter->num_vlans == 0)
4719                 return;
4720
4721         /*
4722         ** A soft reset zero's out the VFTA, so
4723         ** we need to repopulate it now.
4724         */
4725         for (int i = 0; i < EM_VFTA_SIZE; i++)
4726                 if (adapter->shadow_vfta[i] != 0)
4727                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4728                             i, adapter->shadow_vfta[i]);
4729
4730         reg = E1000_READ_REG(hw, E1000_CTRL);
4731         reg |= E1000_CTRL_VME;
4732         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4733
4734         /* Enable the Filter Table */
4735         reg = E1000_READ_REG(hw, E1000_RCTL);
4736         reg &= ~E1000_RCTL_CFIEN;
4737         reg |= E1000_RCTL_VFE;
4738         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4739 }
4740
4741 static void
4742 em_enable_intr(struct adapter *adapter)
4743 {
4744         struct e1000_hw *hw = &adapter->hw;
4745         u32 ims_mask = IMS_ENABLE_MASK;
4746
4747         if (hw->mac.type == e1000_82574) {
4748                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4749                 ims_mask |= EM_MSIX_MASK;
4750         } 
4751         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4752 }
4753
4754 static void
4755 em_disable_intr(struct adapter *adapter)
4756 {
4757         struct e1000_hw *hw = &adapter->hw;
4758
4759         if (hw->mac.type == e1000_82574)
4760                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4761         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4762 }
4763
4764 /*
4765  * Bit of a misnomer, what this really means is
4766  * to enable OS management of the system... aka
4767  * to disable special hardware management features 
4768  */
4769 static void
4770 em_init_manageability(struct adapter *adapter)
4771 {
4772         /* A shared code workaround */
4773 #define E1000_82542_MANC2H E1000_MANC2H
4774         if (adapter->has_manage) {
4775                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4776                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4777
4778                 /* disable hardware interception of ARP */
4779                 manc &= ~(E1000_MANC_ARP_EN);
4780
4781                 /* enable receiving management packets to the host */
4782                 manc |= E1000_MANC_EN_MNG2HOST;
4783 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4784 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4785                 manc2h |= E1000_MNG2HOST_PORT_623;
4786                 manc2h |= E1000_MNG2HOST_PORT_664;
4787                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4788                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4789         }
4790 }
4791
4792 /*
4793  * Give control back to hardware management
4794  * controller if there is one.
4795  */
4796 static void
4797 em_release_manageability(struct adapter *adapter)
4798 {
4799         if (adapter->has_manage) {
4800                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4801
4802                 /* re-enable hardware interception of ARP */
4803                 manc |= E1000_MANC_ARP_EN;
4804                 manc &= ~E1000_MANC_EN_MNG2HOST;
4805
4806                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4807         }
4808 }
4809
4810 /*
4811  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4812  * For ASF and Pass Through versions of f/w this means
4813  * that the driver is loaded. For AMT version type f/w
4814  * this means that the network i/f is open.
4815  */
4816 static void
4817 em_get_hw_control(struct adapter *adapter)
4818 {
4819         u32 ctrl_ext, swsm;
4820
4821         if (adapter->hw.mac.type == e1000_82573) {
4822                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4823                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4824                     swsm | E1000_SWSM_DRV_LOAD);
4825                 return;
4826         }
4827         /* else */
4828         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4829         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4830             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4831         return;
4832 }
4833
4834 /*
4835  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4836  * For ASF and Pass Through versions of f/w this means that
4837  * the driver is no longer loaded. For AMT versions of the
4838  * f/w this means that the network i/f is closed.
4839  */
4840 static void
4841 em_release_hw_control(struct adapter *adapter)
4842 {
4843         u32 ctrl_ext, swsm;
4844
4845         if (!adapter->has_manage)
4846                 return;
4847
4848         if (adapter->hw.mac.type == e1000_82573) {
4849                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4850                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4851                     swsm & ~E1000_SWSM_DRV_LOAD);
4852                 return;
4853         }
4854         /* else */
4855         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4856         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4857             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4858         return;
4859 }
4860
4861 static int
4862 em_is_valid_ether_addr(u8 *addr)
4863 {
4864         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4865
4866         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4867                 return (FALSE);
4868         }
4869
4870         return (TRUE);
4871 }
4872
4873 /*
4874 ** Parse the interface capabilities with regard
4875 ** to both system management and wake-on-lan for
4876 ** later use.
4877 */
4878 static void
4879 em_get_wakeup(device_t dev)
4880 {
4881         struct adapter  *adapter = device_get_softc(dev);
4882         u16             eeprom_data = 0, device_id, apme_mask;
4883
4884         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4885         apme_mask = EM_EEPROM_APME;
4886
4887         switch (adapter->hw.mac.type) {
4888         case e1000_82573:
4889         case e1000_82583:
4890                 adapter->has_amt = TRUE;
4891                 /* Falls thru */
4892         case e1000_82571:
4893         case e1000_82572:
4894         case e1000_80003es2lan:
4895                 if (adapter->hw.bus.func == 1) {
4896                         e1000_read_nvm(&adapter->hw,
4897                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4898                         break;
4899                 } else
4900                         e1000_read_nvm(&adapter->hw,
4901                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4902                 break;
4903         case e1000_ich8lan:
4904         case e1000_ich9lan:
4905         case e1000_ich10lan:
4906         case e1000_pchlan:
4907         case e1000_pch2lan:
4908                 apme_mask = E1000_WUC_APME;
4909                 adapter->has_amt = TRUE;
4910                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4911                 break;
4912         default:
4913                 e1000_read_nvm(&adapter->hw,
4914                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4915                 break;
4916         }
4917         if (eeprom_data & apme_mask)
4918                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4919         /*
4920          * We have the eeprom settings, now apply the special cases
4921          * where the eeprom may be wrong or the board won't support
4922          * wake on lan on a particular port
4923          */
4924         device_id = pci_get_device(dev);
4925         switch (device_id) {
4926         case E1000_DEV_ID_82571EB_FIBER:
4927                 /* Wake events only supported on port A for dual fiber
4928                  * regardless of eeprom setting */
4929                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4930                     E1000_STATUS_FUNC_1)
4931                         adapter->wol = 0;
4932                 break;
4933         case E1000_DEV_ID_82571EB_QUAD_COPPER:
4934         case E1000_DEV_ID_82571EB_QUAD_FIBER:
4935         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4936                 /* if quad port adapter, disable WoL on all but port A */
4937                 if (global_quad_port_a != 0)
4938                         adapter->wol = 0;
4939                 /* Reset for multiple quad port adapters */
4940                 if (++global_quad_port_a == 4)
4941                         global_quad_port_a = 0;
4942                 break;
4943         }
4944         return;
4945 }
4946
4947
4948 /*
4949  * Enable PCI Wake On Lan capability
4950  */
4951 static void
4952 em_enable_wakeup(device_t dev)
4953 {
4954         struct adapter  *adapter = device_get_softc(dev);
4955         struct ifnet    *ifp = adapter->ifp;
4956         u32             pmc, ctrl, ctrl_ext, rctl;
4957         u16             status;
4958
4959         if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4960                 return;
4961
4962         /* Advertise the wakeup capability */
4963         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4964         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4965         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4966         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4967
4968         if ((adapter->hw.mac.type == e1000_ich8lan) ||
4969             (adapter->hw.mac.type == e1000_pchlan) ||
4970             (adapter->hw.mac.type == e1000_ich9lan) ||
4971             (adapter->hw.mac.type == e1000_ich10lan))
4972                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
4973
4974         /* Keep the laser running on Fiber adapters */
4975         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4976             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4977                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4978                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4979                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4980         }
4981
4982         /*
4983         ** Determine type of Wakeup: note that wol
4984         ** is set with all bits on by default.
4985         */
4986         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4987                 adapter->wol &= ~E1000_WUFC_MAG;
4988
4989         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4990                 adapter->wol &= ~E1000_WUFC_MC;
4991         else {
4992                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4993                 rctl |= E1000_RCTL_MPE;
4994                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4995         }
4996
4997         if ((adapter->hw.mac.type == e1000_pchlan) ||
4998             (adapter->hw.mac.type == e1000_pch2lan)) {
4999                 if (em_enable_phy_wakeup(adapter))
5000                         return;
5001         } else {
5002                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5003                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5004         }
5005
5006         if (adapter->hw.phy.type == e1000_phy_igp_3)
5007                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5008
5009         /* Request PME */
5010         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5011         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5012         if (ifp->if_capenable & IFCAP_WOL)
5013                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5014         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5015
5016         return;
5017 }
5018
5019 /*
5020 ** WOL in the newer chipset interfaces (pchlan)
5021 ** require thing to be copied into the phy
5022 */
5023 static int
5024 em_enable_phy_wakeup(struct adapter *adapter)
5025 {
5026         struct e1000_hw *hw = &adapter->hw;
5027         u32 mreg, ret = 0;
5028         u16 preg;
5029
5030         /* copy MAC RARs to PHY RARs */
5031         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5032
5033         /* copy MAC MTA to PHY MTA */
5034         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5035                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5036                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5037                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5038                     (u16)((mreg >> 16) & 0xFFFF));
5039         }
5040
5041         /* configure PHY Rx Control register */
5042         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5043         mreg = E1000_READ_REG(hw, E1000_RCTL);
5044         if (mreg & E1000_RCTL_UPE)
5045                 preg |= BM_RCTL_UPE;
5046         if (mreg & E1000_RCTL_MPE)
5047                 preg |= BM_RCTL_MPE;
5048         preg &= ~(BM_RCTL_MO_MASK);
5049         if (mreg & E1000_RCTL_MO_3)
5050                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5051                                 << BM_RCTL_MO_SHIFT);
5052         if (mreg & E1000_RCTL_BAM)
5053                 preg |= BM_RCTL_BAM;
5054         if (mreg & E1000_RCTL_PMCF)
5055                 preg |= BM_RCTL_PMCF;
5056         mreg = E1000_READ_REG(hw, E1000_CTRL);
5057         if (mreg & E1000_CTRL_RFCE)
5058                 preg |= BM_RCTL_RFCE;
5059         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5060
5061         /* enable PHY wakeup in MAC register */
5062         E1000_WRITE_REG(hw, E1000_WUC,
5063             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5064         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5065
5066         /* configure and enable PHY wakeup in PHY registers */
5067         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5068         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5069
5070         /* activate PHY wakeup */
5071         ret = hw->phy.ops.acquire(hw);
5072         if (ret) {
5073                 printf("Could not acquire PHY\n");
5074                 return ret;
5075         }
5076         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5077                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5078         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5079         if (ret) {
5080                 printf("Could not read PHY page 769\n");
5081                 goto out;
5082         }
5083         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5084         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5085         if (ret)
5086                 printf("Could not set PHY Host Wakeup bit\n");
5087 out:
5088         hw->phy.ops.release(hw);
5089
5090         return ret;
5091 }
5092
5093 static void
5094 em_led_func(void *arg, int onoff)
5095 {
5096         struct adapter  *adapter = arg;
5097  
5098         EM_CORE_LOCK(adapter);
5099         if (onoff) {
5100                 e1000_setup_led(&adapter->hw);
5101                 e1000_led_on(&adapter->hw);
5102         } else {
5103                 e1000_led_off(&adapter->hw);
5104                 e1000_cleanup_led(&adapter->hw);
5105         }
5106         EM_CORE_UNLOCK(adapter);
5107 }
5108
5109 /*
5110 ** Disable the L0S and L1 LINK states
5111 */
5112 static void
5113 em_disable_aspm(struct adapter *adapter)
5114 {
5115         int             base, reg;
5116         u16             link_cap,link_ctrl;
5117         device_t        dev = adapter->dev;
5118
5119         switch (adapter->hw.mac.type) {
5120                 case e1000_82573:
5121                 case e1000_82574:
5122                 case e1000_82583:
5123                         break;
5124                 default:
5125                         return;
5126         }
5127         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5128                 return;
5129         reg = base + PCIER_LINK_CAP;
5130         link_cap = pci_read_config(dev, reg, 2);
5131         if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5132                 return;
5133         reg = base + PCIER_LINK_CTL;
5134         link_ctrl = pci_read_config(dev, reg, 2);
5135         link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5136         pci_write_config(dev, reg, link_ctrl, 2);
5137         return;
5138 }
5139
5140 /**********************************************************************
5141  *
5142  *  Update the board statistics counters.
5143  *
5144  **********************************************************************/
5145 static void
5146 em_update_stats_counters(struct adapter *adapter)
5147 {
5148         struct ifnet   *ifp;
5149
5150         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5151            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5152                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5153                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5154         }
5155         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5156         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5157         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5158         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5159
5160         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5161         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5162         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5163         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5164         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5165         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5166         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5167         /*
5168         ** For watchdog management we need to know if we have been
5169         ** paused during the last interval, so capture that here.
5170         */
5171         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5172         adapter->stats.xoffrxc += adapter->pause_frames;
5173         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5174         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5175         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5176         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5177         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5178         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5179         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5180         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5181         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5182         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5183         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5184         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5185
5186         /* For the 64-bit byte counters the low dword must be read first. */
5187         /* Both registers clear on the read of the high dword */
5188
5189         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5190             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5191         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5192             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5193
5194         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5195         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5196         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5197         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5198         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5199
5200         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5201         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5202
5203         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5204         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5205         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5206         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5207         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5208         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5209         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5210         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5211         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5212         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5213
5214         /* Interrupt Counts */
5215
5216         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5217         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5218         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5219         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5220         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5221         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5222         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5223         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5224         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5225
5226         if (adapter->hw.mac.type >= e1000_82543) {
5227                 adapter->stats.algnerrc += 
5228                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5229                 adapter->stats.rxerrc += 
5230                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5231                 adapter->stats.tncrs += 
5232                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5233                 adapter->stats.cexterr += 
5234                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5235                 adapter->stats.tsctc += 
5236                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5237                 adapter->stats.tsctfc += 
5238                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5239         }
5240         ifp = adapter->ifp;
5241
5242         ifp->if_collisions = adapter->stats.colc;
5243
5244         /* Rx Errors */
5245         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5246             adapter->stats.crcerrs + adapter->stats.algnerrc +
5247             adapter->stats.ruc + adapter->stats.roc +
5248             adapter->stats.mpc + adapter->stats.cexterr;
5249
5250         /* Tx Errors */
5251         ifp->if_oerrors = adapter->stats.ecol +
5252             adapter->stats.latecol + adapter->watchdog_events;
5253 }
5254
5255 /* Export a single 32-bit register via a read-only sysctl. */
5256 static int
5257 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5258 {
5259         struct adapter *adapter;
5260         u_int val;
5261
5262         adapter = oidp->oid_arg1;
5263         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5264         return (sysctl_handle_int(oidp, &val, 0, req));
5265 }
5266
5267 /*
5268  * Add sysctl variables, one per statistic, to the system.
5269  */
5270 static void
5271 em_add_hw_stats(struct adapter *adapter)
5272 {
5273         device_t dev = adapter->dev;
5274
5275         struct tx_ring *txr = adapter->tx_rings;
5276         struct rx_ring *rxr = adapter->rx_rings;
5277
5278         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5279         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5280         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5281         struct e1000_hw_stats *stats = &adapter->stats;
5282
5283         struct sysctl_oid *stat_node, *queue_node, *int_node;
5284         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5285
5286 #define QUEUE_NAME_LEN 32
5287         char namebuf[QUEUE_NAME_LEN];
5288         
5289         /* Driver Statistics */
5290         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5291                         CTLFLAG_RD, &adapter->link_irq,
5292                         "Link MSIX IRQ Handled");
5293         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", 
5294                          CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5295                          "Std mbuf failed");
5296         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
5297                          CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5298                          "Std mbuf cluster failed");
5299         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5300                         CTLFLAG_RD, &adapter->dropped_pkts,
5301                         "Driver dropped packets");
5302         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5303                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5304                         "Driver tx dma failure in xmit");
5305         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5306                         CTLFLAG_RD, &adapter->rx_overruns,
5307                         "RX overruns");
5308         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5309                         CTLFLAG_RD, &adapter->watchdog_events,
5310                         "Watchdog timeouts");
5311         
5312         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5313                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5314                         em_sysctl_reg_handler, "IU",
5315                         "Device Control Register");
5316         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5317                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5318                         em_sysctl_reg_handler, "IU",
5319                         "Receiver Control Register");
5320         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5321                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5322                         "Flow Control High Watermark");
5323         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5324                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5325                         "Flow Control Low Watermark");
5326
5327         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5328                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5329                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5330                                             CTLFLAG_RD, NULL, "Queue Name");
5331                 queue_list = SYSCTL_CHILDREN(queue_node);
5332
5333                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5334                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5335                                 E1000_TDH(txr->me),
5336                                 em_sysctl_reg_handler, "IU",
5337                                 "Transmit Descriptor Head");
5338                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5339                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5340                                 E1000_TDT(txr->me),
5341                                 em_sysctl_reg_handler, "IU",
5342                                 "Transmit Descriptor Tail");
5343                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5344                                 CTLFLAG_RD, &txr->tx_irq,
5345                                 "Queue MSI-X Transmit Interrupts");
5346                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5347                                 CTLFLAG_RD, &txr->no_desc_avail,
5348                                 "Queue No Descriptor Available");
5349                 
5350                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5351                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5352                                 E1000_RDH(rxr->me),
5353                                 em_sysctl_reg_handler, "IU",
5354                                 "Receive Descriptor Head");
5355                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5356                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5357                                 E1000_RDT(rxr->me),
5358                                 em_sysctl_reg_handler, "IU",
5359                                 "Receive Descriptor Tail");
5360                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5361                                 CTLFLAG_RD, &rxr->rx_irq,
5362                                 "Queue MSI-X Receive Interrupts");
5363         }
5364
5365         /* MAC stats get their own sub node */
5366
5367         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5368                                     CTLFLAG_RD, NULL, "Statistics");
5369         stat_list = SYSCTL_CHILDREN(stat_node);
5370
5371         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5372                         CTLFLAG_RD, &stats->ecol,
5373                         "Excessive collisions");
5374         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5375                         CTLFLAG_RD, &stats->scc,
5376                         "Single collisions");
5377         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5378                         CTLFLAG_RD, &stats->mcc,
5379                         "Multiple collisions");
5380         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5381                         CTLFLAG_RD, &stats->latecol,
5382                         "Late collisions");
5383         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5384                         CTLFLAG_RD, &stats->colc,
5385                         "Collision Count");
5386         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5387                         CTLFLAG_RD, &adapter->stats.symerrs,
5388                         "Symbol Errors");
5389         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5390                         CTLFLAG_RD, &adapter->stats.sec,
5391                         "Sequence Errors");
5392         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5393                         CTLFLAG_RD, &adapter->stats.dc,
5394                         "Defer Count");
5395         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5396                         CTLFLAG_RD, &adapter->stats.mpc,
5397                         "Missed Packets");
5398         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5399                         CTLFLAG_RD, &adapter->stats.rnbc,
5400                         "Receive No Buffers");
5401         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5402                         CTLFLAG_RD, &adapter->stats.ruc,
5403                         "Receive Undersize");
5404         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5405                         CTLFLAG_RD, &adapter->stats.rfc,
5406                         "Fragmented Packets Received ");
5407         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5408                         CTLFLAG_RD, &adapter->stats.roc,
5409                         "Oversized Packets Received");
5410         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5411                         CTLFLAG_RD, &adapter->stats.rjc,
5412                         "Recevied Jabber");
5413         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5414                         CTLFLAG_RD, &adapter->stats.rxerrc,
5415                         "Receive Errors");
5416         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5417                         CTLFLAG_RD, &adapter->stats.crcerrs,
5418                         "CRC errors");
5419         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5420                         CTLFLAG_RD, &adapter->stats.algnerrc,
5421                         "Alignment Errors");
5422         /* On 82575 these are collision counts */
5423         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5424                         CTLFLAG_RD, &adapter->stats.cexterr,
5425                         "Collision/Carrier extension errors");
5426         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5427                         CTLFLAG_RD, &adapter->stats.xonrxc,
5428                         "XON Received");
5429         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5430                         CTLFLAG_RD, &adapter->stats.xontxc,
5431                         "XON Transmitted");
5432         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5433                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5434                         "XOFF Received");
5435         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5436                         CTLFLAG_RD, &adapter->stats.xofftxc,
5437                         "XOFF Transmitted");
5438
5439         /* Packet Reception Stats */
5440         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5441                         CTLFLAG_RD, &adapter->stats.tpr,
5442                         "Total Packets Received ");
5443         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5444                         CTLFLAG_RD, &adapter->stats.gprc,
5445                         "Good Packets Received");
5446         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5447                         CTLFLAG_RD, &adapter->stats.bprc,
5448                         "Broadcast Packets Received");
5449         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5450                         CTLFLAG_RD, &adapter->stats.mprc,
5451                         "Multicast Packets Received");
5452         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5453                         CTLFLAG_RD, &adapter->stats.prc64,
5454                         "64 byte frames received ");
5455         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5456                         CTLFLAG_RD, &adapter->stats.prc127,
5457                         "65-127 byte frames received");
5458         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5459                         CTLFLAG_RD, &adapter->stats.prc255,
5460                         "128-255 byte frames received");
5461         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5462                         CTLFLAG_RD, &adapter->stats.prc511,
5463                         "256-511 byte frames received");
5464         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5465                         CTLFLAG_RD, &adapter->stats.prc1023,
5466                         "512-1023 byte frames received");
5467         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5468                         CTLFLAG_RD, &adapter->stats.prc1522,
5469                         "1023-1522 byte frames received");
5470         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5471                         CTLFLAG_RD, &adapter->stats.gorc, 
5472                         "Good Octets Received"); 
5473
5474         /* Packet Transmission Stats */
5475         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5476                         CTLFLAG_RD, &adapter->stats.gotc, 
5477                         "Good Octets Transmitted"); 
5478         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5479                         CTLFLAG_RD, &adapter->stats.tpt,
5480                         "Total Packets Transmitted");
5481         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5482                         CTLFLAG_RD, &adapter->stats.gptc,
5483                         "Good Packets Transmitted");
5484         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5485                         CTLFLAG_RD, &adapter->stats.bptc,
5486                         "Broadcast Packets Transmitted");
5487         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5488                         CTLFLAG_RD, &adapter->stats.mptc,
5489                         "Multicast Packets Transmitted");
5490         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5491                         CTLFLAG_RD, &adapter->stats.ptc64,
5492                         "64 byte frames transmitted ");
5493         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5494                         CTLFLAG_RD, &adapter->stats.ptc127,
5495                         "65-127 byte frames transmitted");
5496         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5497                         CTLFLAG_RD, &adapter->stats.ptc255,
5498                         "128-255 byte frames transmitted");
5499         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5500                         CTLFLAG_RD, &adapter->stats.ptc511,
5501                         "256-511 byte frames transmitted");
5502         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5503                         CTLFLAG_RD, &adapter->stats.ptc1023,
5504                         "512-1023 byte frames transmitted");
5505         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5506                         CTLFLAG_RD, &adapter->stats.ptc1522,
5507                         "1024-1522 byte frames transmitted");
5508         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5509                         CTLFLAG_RD, &adapter->stats.tsctc,
5510                         "TSO Contexts Transmitted");
5511         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5512                         CTLFLAG_RD, &adapter->stats.tsctfc,
5513                         "TSO Contexts Failed");
5514
5515
5516         /* Interrupt Stats */
5517
5518         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5519                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5520         int_list = SYSCTL_CHILDREN(int_node);
5521
5522         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5523                         CTLFLAG_RD, &adapter->stats.iac,
5524                         "Interrupt Assertion Count");
5525
5526         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5527                         CTLFLAG_RD, &adapter->stats.icrxptc,
5528                         "Interrupt Cause Rx Pkt Timer Expire Count");
5529
5530         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5531                         CTLFLAG_RD, &adapter->stats.icrxatc,
5532                         "Interrupt Cause Rx Abs Timer Expire Count");
5533
5534         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5535                         CTLFLAG_RD, &adapter->stats.ictxptc,
5536                         "Interrupt Cause Tx Pkt Timer Expire Count");
5537
5538         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5539                         CTLFLAG_RD, &adapter->stats.ictxatc,
5540                         "Interrupt Cause Tx Abs Timer Expire Count");
5541
5542         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5543                         CTLFLAG_RD, &adapter->stats.ictxqec,
5544                         "Interrupt Cause Tx Queue Empty Count");
5545
5546         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5547                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5548                         "Interrupt Cause Tx Queue Min Thresh Count");
5549
5550         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5551                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5552                         "Interrupt Cause Rx Desc Min Thresh Count");
5553
5554         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5555                         CTLFLAG_RD, &adapter->stats.icrxoc,
5556                         "Interrupt Cause Receiver Overrun Count");
5557 }
5558
5559 /**********************************************************************
5560  *
5561  *  This routine provides a way to dump out the adapter eeprom,
5562  *  often a useful debug/service tool. This only dumps the first
5563  *  32 words, stuff that matters is in that extent.
5564  *
5565  **********************************************************************/
5566 static int
5567 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5568 {
5569         struct adapter *adapter = (struct adapter *)arg1;
5570         int error;
5571         int result;
5572
5573         result = -1;
5574         error = sysctl_handle_int(oidp, &result, 0, req);
5575
5576         if (error || !req->newptr)
5577                 return (error);
5578
5579         /*
5580          * This value will cause a hex dump of the
5581          * first 32 16-bit words of the EEPROM to
5582          * the screen.
5583          */
5584         if (result == 1)
5585                 em_print_nvm_info(adapter);
5586
5587         return (error);
5588 }
5589
5590 static void
5591 em_print_nvm_info(struct adapter *adapter)
5592 {
5593         u16     eeprom_data;
5594         int     i, j, row = 0;
5595
5596         /* Its a bit crude, but it gets the job done */
5597         printf("\nInterface EEPROM Dump:\n");
5598         printf("Offset\n0x0000  ");
5599         for (i = 0, j = 0; i < 32; i++, j++) {
5600                 if (j == 8) { /* Make the offset block */
5601                         j = 0; ++row;
5602                         printf("\n0x00%x0  ",row);
5603                 }
5604                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5605                 printf("%04x ", eeprom_data);
5606         }
5607         printf("\n");
5608 }
5609
5610 static int
5611 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5612 {
5613         struct em_int_delay_info *info;
5614         struct adapter *adapter;
5615         u32 regval;
5616         int error, usecs, ticks;
5617
5618         info = (struct em_int_delay_info *)arg1;
5619         usecs = info->value;
5620         error = sysctl_handle_int(oidp, &usecs, 0, req);
5621         if (error != 0 || req->newptr == NULL)
5622                 return (error);
5623         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5624                 return (EINVAL);
5625         info->value = usecs;
5626         ticks = EM_USECS_TO_TICKS(usecs);
5627         if (info->offset == E1000_ITR)  /* units are 256ns here */
5628                 ticks *= 4;
5629
5630         adapter = info->adapter;
5631         
5632         EM_CORE_LOCK(adapter);
5633         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5634         regval = (regval & ~0xffff) | (ticks & 0xffff);
5635         /* Handle a few special cases. */
5636         switch (info->offset) {
5637         case E1000_RDTR:
5638                 break;
5639         case E1000_TIDV:
5640                 if (ticks == 0) {
5641                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5642                         /* Don't write 0 into the TIDV register. */
5643                         regval++;
5644                 } else
5645                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5646                 break;
5647         }
5648         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5649         EM_CORE_UNLOCK(adapter);
5650         return (0);
5651 }
5652
5653 static void
5654 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5655         const char *description, struct em_int_delay_info *info,
5656         int offset, int value)
5657 {
5658         info->adapter = adapter;
5659         info->offset = offset;
5660         info->value = value;
5661         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5662             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5663             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5664             info, 0, em_sysctl_int_delay, "I", description);
5665 }
5666
5667 static void
5668 em_set_sysctl_value(struct adapter *adapter, const char *name,
5669         const char *description, int *limit, int value)
5670 {
5671         *limit = value;
5672         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5673             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5674             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5675 }
5676
5677
5678 /*
5679 ** Set flow control using sysctl:
5680 ** Flow control values:
5681 **      0 - off
5682 **      1 - rx pause
5683 **      2 - tx pause
5684 **      3 - full
5685 */
5686 static int
5687 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5688 {       
5689         int             error;
5690         static int      input = 3; /* default is full */
5691         struct adapter  *adapter = (struct adapter *) arg1;
5692                     
5693         error = sysctl_handle_int(oidp, &input, 0, req);
5694     
5695         if ((error) || (req->newptr == NULL))
5696                 return (error);
5697                 
5698         if (input == adapter->fc) /* no change? */
5699                 return (error);
5700
5701         switch (input) {
5702                 case e1000_fc_rx_pause:
5703                 case e1000_fc_tx_pause:
5704                 case e1000_fc_full:
5705                 case e1000_fc_none:
5706                         adapter->hw.fc.requested_mode = input;
5707                         adapter->fc = input;
5708                         break;
5709                 default:
5710                         /* Do nothing */
5711                         return (error);
5712         }
5713
5714         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5715         e1000_force_mac_fc(&adapter->hw);
5716         return (error);
5717 }
5718
5719 /*
5720 ** Manage Energy Efficient Ethernet:
5721 ** Control values:
5722 **     0/1 - enabled/disabled
5723 */
5724 static int
5725 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5726 {
5727        struct adapter *adapter = (struct adapter *) arg1;
5728        int             error, value;
5729
5730        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5731        error = sysctl_handle_int(oidp, &value, 0, req);
5732        if (error || req->newptr == NULL)
5733                return (error);
5734        EM_CORE_LOCK(adapter);
5735        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5736        em_init_locked(adapter);
5737        EM_CORE_UNLOCK(adapter);
5738        return (0);
5739 }
5740
5741 static int
5742 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5743 {
5744         struct adapter *adapter;
5745         int error;
5746         int result;
5747
5748         result = -1;
5749         error = sysctl_handle_int(oidp, &result, 0, req);
5750
5751         if (error || !req->newptr)
5752                 return (error);
5753
5754         if (result == 1) {
5755                 adapter = (struct adapter *)arg1;
5756                 em_print_debug_info(adapter);
5757         }
5758
5759         return (error);
5760 }
5761
5762 /*
5763 ** This routine is meant to be fluid, add whatever is
5764 ** needed for debugging a problem.  -jfv
5765 */
5766 static void
5767 em_print_debug_info(struct adapter *adapter)
5768 {
5769         device_t dev = adapter->dev;
5770         struct tx_ring *txr = adapter->tx_rings;
5771         struct rx_ring *rxr = adapter->rx_rings;
5772
5773         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5774                 printf("Interface is RUNNING ");
5775         else
5776                 printf("Interface is NOT RUNNING\n");
5777
5778         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5779                 printf("and INACTIVE\n");
5780         else
5781                 printf("and ACTIVE\n");
5782
5783         device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5784             E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5785             E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5786         device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5787             E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5788             E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5789         device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5790         device_printf(dev, "TX descriptors avail = %d\n",
5791             txr->tx_avail);
5792         device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5793             txr->no_desc_avail);
5794         device_printf(dev, "RX discarded packets = %ld\n",
5795             rxr->rx_discarded);
5796         device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5797         device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5798 }