]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/e1000/if_em.c
Import Dragonfly Mail Agent into base system
[FreeBSD/FreeBSD.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2013, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37
38 #ifdef HAVE_KERNEL_OPTION_HEADERS
39 #include "opt_device_polling.h"
40 #endif
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #if __FreeBSD_version >= 800000
45 #include <sys/buf_ring.h>
46 #endif
47 #include <sys/bus.h>
48 #include <sys/endian.h>
49 #include <sys/kernel.h>
50 #include <sys/kthread.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/rman.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/taskqueue.h>
59 #include <sys/eventhandler.h>
60 #include <machine/bus.h>
61 #include <machine/resource.h>
62
63 #include <net/bpf.h>
64 #include <net/ethernet.h>
65 #include <net/if.h>
66 #include <net/if_var.h>
67 #include <net/if_arp.h>
68 #include <net/if_dl.h>
69 #include <net/if_media.h>
70
71 #include <net/if_types.h>
72 #include <net/if_vlan_var.h>
73
74 #include <netinet/in_systm.h>
75 #include <netinet/in.h>
76 #include <netinet/if_ether.h>
77 #include <netinet/ip.h>
78 #include <netinet/ip6.h>
79 #include <netinet/tcp.h>
80 #include <netinet/udp.h>
81
82 #include <machine/in_cksum.h>
83 #include <dev/led/led.h>
84 #include <dev/pci/pcivar.h>
85 #include <dev/pci/pcireg.h>
86
87 #include "e1000_api.h"
88 #include "e1000_82571.h"
89 #include "if_em.h"
90
91 /*********************************************************************
92  *  Set this to one to display debug statistics
93  *********************************************************************/
94 int     em_display_debug_stats = 0;
95
96 /*********************************************************************
97  *  Driver version:
98  *********************************************************************/
99 char em_driver_version[] = "7.3.8";
100
101 /*********************************************************************
102  *  PCI Device ID Table
103  *
104  *  Used by probe to select devices to load on
105  *  Last field stores an index into e1000_strings
106  *  Last entry must be all 0s
107  *
108  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
109  *********************************************************************/
110
111 static em_vendor_info_t em_vendor_info_array[] =
112 {
113         /* Intel(R) PRO/1000 Network Connection */
114         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
115         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
116         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
117         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
118                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
119         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
120                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
122                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
124                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
126                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
128                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
133
134         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
139                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
141                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
143                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
145                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
175         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
176         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
177         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
178         { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
179         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
180                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
181         { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
182                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
183         /* required last entry */
184         { 0, 0, 0, 0, 0}
185 };
186
187 /*********************************************************************
188  *  Table of branding strings for all supported NICs.
189  *********************************************************************/
190
191 static char *em_strings[] = {
192         "Intel(R) PRO/1000 Network Connection"
193 };
194
195 /*********************************************************************
196  *  Function prototypes
197  *********************************************************************/
198 static int      em_probe(device_t);
199 static int      em_attach(device_t);
200 static int      em_detach(device_t);
201 static int      em_shutdown(device_t);
202 static int      em_suspend(device_t);
203 static int      em_resume(device_t);
204 #ifdef EM_MULTIQUEUE
205 static int      em_mq_start(struct ifnet *, struct mbuf *);
206 static int      em_mq_start_locked(struct ifnet *,
207                     struct tx_ring *, struct mbuf *);
208 static void     em_qflush(struct ifnet *);
209 #else
210 static void     em_start(struct ifnet *);
211 static void     em_start_locked(struct ifnet *, struct tx_ring *);
212 #endif
213 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
214 static void     em_init(void *);
215 static void     em_init_locked(struct adapter *);
216 static void     em_stop(void *);
217 static void     em_media_status(struct ifnet *, struct ifmediareq *);
218 static int      em_media_change(struct ifnet *);
219 static void     em_identify_hardware(struct adapter *);
220 static int      em_allocate_pci_resources(struct adapter *);
221 static int      em_allocate_legacy(struct adapter *);
222 static int      em_allocate_msix(struct adapter *);
223 static int      em_allocate_queues(struct adapter *);
224 static int      em_setup_msix(struct adapter *);
225 static void     em_free_pci_resources(struct adapter *);
226 static void     em_local_timer(void *);
227 static void     em_reset(struct adapter *);
228 static int      em_setup_interface(device_t, struct adapter *);
229
230 static void     em_setup_transmit_structures(struct adapter *);
231 static void     em_initialize_transmit_unit(struct adapter *);
232 static int      em_allocate_transmit_buffers(struct tx_ring *);
233 static void     em_free_transmit_structures(struct adapter *);
234 static void     em_free_transmit_buffers(struct tx_ring *);
235
236 static int      em_setup_receive_structures(struct adapter *);
237 static int      em_allocate_receive_buffers(struct rx_ring *);
238 static void     em_initialize_receive_unit(struct adapter *);
239 static void     em_free_receive_structures(struct adapter *);
240 static void     em_free_receive_buffers(struct rx_ring *);
241
242 static void     em_enable_intr(struct adapter *);
243 static void     em_disable_intr(struct adapter *);
244 static void     em_update_stats_counters(struct adapter *);
245 static void     em_add_hw_stats(struct adapter *adapter);
246 static void     em_txeof(struct tx_ring *);
247 static bool     em_rxeof(struct rx_ring *, int, int *);
248 #ifndef __NO_STRICT_ALIGNMENT
249 static int      em_fixup_rx(struct rx_ring *);
250 #endif
251 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
252 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
253                     struct ip *, u32 *, u32 *);
254 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
255                     struct tcphdr *, u32 *, u32 *);
256 static void     em_set_promisc(struct adapter *);
257 static void     em_disable_promisc(struct adapter *);
258 static void     em_set_multi(struct adapter *);
259 static void     em_update_link_status(struct adapter *);
260 static void     em_refresh_mbufs(struct rx_ring *, int);
261 static void     em_register_vlan(void *, struct ifnet *, u16);
262 static void     em_unregister_vlan(void *, struct ifnet *, u16);
263 static void     em_setup_vlan_hw_support(struct adapter *);
264 static int      em_xmit(struct tx_ring *, struct mbuf **);
265 static int      em_dma_malloc(struct adapter *, bus_size_t,
266                     struct em_dma_alloc *, int);
267 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
268 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
269 static void     em_print_nvm_info(struct adapter *);
270 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
271 static void     em_print_debug_info(struct adapter *);
272 static int      em_is_valid_ether_addr(u8 *);
273 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
274 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
275                     const char *, struct em_int_delay_info *, int, int);
276 /* Management and WOL Support */
277 static void     em_init_manageability(struct adapter *);
278 static void     em_release_manageability(struct adapter *);
279 static void     em_get_hw_control(struct adapter *);
280 static void     em_release_hw_control(struct adapter *);
281 static void     em_get_wakeup(device_t);
282 static void     em_enable_wakeup(device_t);
283 static int      em_enable_phy_wakeup(struct adapter *);
284 static void     em_led_func(void *, int);
285 static void     em_disable_aspm(struct adapter *);
286
287 static int      em_irq_fast(void *);
288
289 /* MSIX handlers */
290 static void     em_msix_tx(void *);
291 static void     em_msix_rx(void *);
292 static void     em_msix_link(void *);
293 static void     em_handle_tx(void *context, int pending);
294 static void     em_handle_rx(void *context, int pending);
295 static void     em_handle_link(void *context, int pending);
296
297 static void     em_set_sysctl_value(struct adapter *, const char *,
298                     const char *, int *, int);
299 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
300 static int      em_sysctl_eee(SYSCTL_HANDLER_ARGS);
301
302 static __inline void em_rx_discard(struct rx_ring *, int);
303
304 #ifdef DEVICE_POLLING
305 static poll_handler_t em_poll;
306 #endif /* POLLING */
307
308 /*********************************************************************
309  *  FreeBSD Device Interface Entry Points
310  *********************************************************************/
311
312 static device_method_t em_methods[] = {
313         /* Device interface */
314         DEVMETHOD(device_probe, em_probe),
315         DEVMETHOD(device_attach, em_attach),
316         DEVMETHOD(device_detach, em_detach),
317         DEVMETHOD(device_shutdown, em_shutdown),
318         DEVMETHOD(device_suspend, em_suspend),
319         DEVMETHOD(device_resume, em_resume),
320         DEVMETHOD_END
321 };
322
323 static driver_t em_driver = {
324         "em", em_methods, sizeof(struct adapter),
325 };
326
327 devclass_t em_devclass;
328 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
329 MODULE_DEPEND(em, pci, 1, 1, 1);
330 MODULE_DEPEND(em, ether, 1, 1, 1);
331
332 /*********************************************************************
333  *  Tunable default values.
334  *********************************************************************/
335
336 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
337 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
338 #define M_TSO_LEN                       66
339
340 #define MAX_INTS_PER_SEC        8000
341 #define DEFAULT_ITR             (1000000000/(MAX_INTS_PER_SEC * 256))
342
343 /* Allow common code without TSO */
344 #ifndef CSUM_TSO
345 #define CSUM_TSO        0
346 #endif
347
348 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
349
350 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
351 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
352 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
353 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
354 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
355     0, "Default transmit interrupt delay in usecs");
356 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
357     0, "Default receive interrupt delay in usecs");
358
359 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
360 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
361 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
362 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
363 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
364     &em_tx_abs_int_delay_dflt, 0,
365     "Default transmit interrupt delay limit in usecs");
366 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
367     &em_rx_abs_int_delay_dflt, 0,
368     "Default receive interrupt delay limit in usecs");
369
370 static int em_rxd = EM_DEFAULT_RXD;
371 static int em_txd = EM_DEFAULT_TXD;
372 TUNABLE_INT("hw.em.rxd", &em_rxd);
373 TUNABLE_INT("hw.em.txd", &em_txd);
374 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
375     "Number of receive descriptors per queue");
376 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
377     "Number of transmit descriptors per queue");
378
379 static int em_smart_pwr_down = FALSE;
380 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
381 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
382     0, "Set to true to leave smart power down enabled on newer adapters");
383
384 /* Controls whether promiscuous also shows bad packets */
385 static int em_debug_sbp = FALSE;
386 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
387 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
388     "Show bad packets in promiscuous mode");
389
390 static int em_enable_msix = TRUE;
391 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
392 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
393     "Enable MSI-X interrupts");
394
395 /* How many packets rxeof tries to clean at a time */
396 static int em_rx_process_limit = 100;
397 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
398 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
399     &em_rx_process_limit, 0,
400     "Maximum number of received packets to process "
401     "at a time, -1 means unlimited");
402
403 /* Energy efficient ethernet - default to OFF */
404 static int eee_setting = 1;
405 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
406 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
407     "Enable Energy Efficient Ethernet");
408
409 /* Global used in WOL setup with multiport cards */
410 static int global_quad_port_a = 0;
411
412 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
413 #include <dev/netmap/if_em_netmap.h>
414 #endif /* DEV_NETMAP */
415
416 /*********************************************************************
417  *  Device identification routine
418  *
419  *  em_probe determines if the driver should be loaded on
420  *  adapter based on PCI vendor/device id of the adapter.
421  *
422  *  return BUS_PROBE_DEFAULT on success, positive on failure
423  *********************************************************************/
424
425 static int
426 em_probe(device_t dev)
427 {
428         char            adapter_name[60];
429         u16             pci_vendor_id = 0;
430         u16             pci_device_id = 0;
431         u16             pci_subvendor_id = 0;
432         u16             pci_subdevice_id = 0;
433         em_vendor_info_t *ent;
434
435         INIT_DEBUGOUT("em_probe: begin");
436
437         pci_vendor_id = pci_get_vendor(dev);
438         if (pci_vendor_id != EM_VENDOR_ID)
439                 return (ENXIO);
440
441         pci_device_id = pci_get_device(dev);
442         pci_subvendor_id = pci_get_subvendor(dev);
443         pci_subdevice_id = pci_get_subdevice(dev);
444
445         ent = em_vendor_info_array;
446         while (ent->vendor_id != 0) {
447                 if ((pci_vendor_id == ent->vendor_id) &&
448                     (pci_device_id == ent->device_id) &&
449
450                     ((pci_subvendor_id == ent->subvendor_id) ||
451                     (ent->subvendor_id == PCI_ANY_ID)) &&
452
453                     ((pci_subdevice_id == ent->subdevice_id) ||
454                     (ent->subdevice_id == PCI_ANY_ID))) {
455                         sprintf(adapter_name, "%s %s",
456                                 em_strings[ent->index],
457                                 em_driver_version);
458                         device_set_desc_copy(dev, adapter_name);
459                         return (BUS_PROBE_DEFAULT);
460                 }
461                 ent++;
462         }
463
464         return (ENXIO);
465 }
466
467 /*********************************************************************
468  *  Device initialization routine
469  *
470  *  The attach entry point is called when the driver is being loaded.
471  *  This routine identifies the type of hardware, allocates all resources
472  *  and initializes the hardware.
473  *
474  *  return 0 on success, positive on failure
475  *********************************************************************/
476
477 static int
478 em_attach(device_t dev)
479 {
480         struct adapter  *adapter;
481         struct e1000_hw *hw;
482         int             error = 0;
483
484         INIT_DEBUGOUT("em_attach: begin");
485
486         if (resource_disabled("em", device_get_unit(dev))) {
487                 device_printf(dev, "Disabled by device hint\n");
488                 return (ENXIO);
489         }
490
491         adapter = device_get_softc(dev);
492         adapter->dev = adapter->osdep.dev = dev;
493         hw = &adapter->hw;
494         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
495
496         /* SYSCTL stuff */
497         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
498             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
499             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
500             em_sysctl_nvm_info, "I", "NVM Information");
501
502         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
503             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
504             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
505             em_sysctl_debug_info, "I", "Debug Information");
506
507         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
508             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
509             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
510             em_set_flowcntl, "I", "Flow Control");
511
512         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
513
514         /* Determine hardware and mac info */
515         em_identify_hardware(adapter);
516
517         /* Setup PCI resources */
518         if (em_allocate_pci_resources(adapter)) {
519                 device_printf(dev, "Allocation of PCI resources failed\n");
520                 error = ENXIO;
521                 goto err_pci;
522         }
523
524         /*
525         ** For ICH8 and family we need to
526         ** map the flash memory, and this
527         ** must happen after the MAC is 
528         ** identified
529         */
530         if ((hw->mac.type == e1000_ich8lan) ||
531             (hw->mac.type == e1000_ich9lan) ||
532             (hw->mac.type == e1000_ich10lan) ||
533             (hw->mac.type == e1000_pchlan) ||
534             (hw->mac.type == e1000_pch2lan) ||
535             (hw->mac.type == e1000_pch_lpt)) {
536                 int rid = EM_BAR_TYPE_FLASH;
537                 adapter->flash = bus_alloc_resource_any(dev,
538                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
539                 if (adapter->flash == NULL) {
540                         device_printf(dev, "Mapping of Flash failed\n");
541                         error = ENXIO;
542                         goto err_pci;
543                 }
544                 /* This is used in the shared code */
545                 hw->flash_address = (u8 *)adapter->flash;
546                 adapter->osdep.flash_bus_space_tag =
547                     rman_get_bustag(adapter->flash);
548                 adapter->osdep.flash_bus_space_handle =
549                     rman_get_bushandle(adapter->flash);
550         }
551
552         /* Do Shared Code initialization */
553         if (e1000_setup_init_funcs(hw, TRUE)) {
554                 device_printf(dev, "Setup of Shared code failed\n");
555                 error = ENXIO;
556                 goto err_pci;
557         }
558
559         e1000_get_bus_info(hw);
560
561         /* Set up some sysctls for the tunable interrupt delays */
562         em_add_int_delay_sysctl(adapter, "rx_int_delay",
563             "receive interrupt delay in usecs", &adapter->rx_int_delay,
564             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
565         em_add_int_delay_sysctl(adapter, "tx_int_delay",
566             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
567             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
568         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
569             "receive interrupt delay limit in usecs",
570             &adapter->rx_abs_int_delay,
571             E1000_REGISTER(hw, E1000_RADV),
572             em_rx_abs_int_delay_dflt);
573         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
574             "transmit interrupt delay limit in usecs",
575             &adapter->tx_abs_int_delay,
576             E1000_REGISTER(hw, E1000_TADV),
577             em_tx_abs_int_delay_dflt);
578         em_add_int_delay_sysctl(adapter, "itr",
579             "interrupt delay limit in usecs/4",
580             &adapter->tx_itr,
581             E1000_REGISTER(hw, E1000_ITR),
582             DEFAULT_ITR);
583
584         /* Sysctl for limiting the amount of work done in the taskqueue */
585         em_set_sysctl_value(adapter, "rx_processing_limit",
586             "max number of rx packets to process", &adapter->rx_process_limit,
587             em_rx_process_limit);
588
589         /*
590          * Validate number of transmit and receive descriptors. It
591          * must not exceed hardware maximum, and must be multiple
592          * of E1000_DBA_ALIGN.
593          */
594         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
595             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
596                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
597                     EM_DEFAULT_TXD, em_txd);
598                 adapter->num_tx_desc = EM_DEFAULT_TXD;
599         } else
600                 adapter->num_tx_desc = em_txd;
601
602         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
603             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
604                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
605                     EM_DEFAULT_RXD, em_rxd);
606                 adapter->num_rx_desc = EM_DEFAULT_RXD;
607         } else
608                 adapter->num_rx_desc = em_rxd;
609
610         hw->mac.autoneg = DO_AUTO_NEG;
611         hw->phy.autoneg_wait_to_complete = FALSE;
612         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
613
614         /* Copper options */
615         if (hw->phy.media_type == e1000_media_type_copper) {
616                 hw->phy.mdix = AUTO_ALL_MODES;
617                 hw->phy.disable_polarity_correction = FALSE;
618                 hw->phy.ms_type = EM_MASTER_SLAVE;
619         }
620
621         /*
622          * Set the frame limits assuming
623          * standard ethernet sized frames.
624          */
625         adapter->hw.mac.max_frame_size =
626             ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
627
628         /*
629          * This controls when hardware reports transmit completion
630          * status.
631          */
632         hw->mac.report_tx_early = 1;
633
634         /* 
635         ** Get queue/ring memory
636         */
637         if (em_allocate_queues(adapter)) {
638                 error = ENOMEM;
639                 goto err_pci;
640         }
641
642         /* Allocate multicast array memory. */
643         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
644             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
645         if (adapter->mta == NULL) {
646                 device_printf(dev, "Can not allocate multicast setup array\n");
647                 error = ENOMEM;
648                 goto err_late;
649         }
650
651         /* Check SOL/IDER usage */
652         if (e1000_check_reset_block(hw))
653                 device_printf(dev, "PHY reset is blocked"
654                     " due to SOL/IDER session.\n");
655
656         /* Sysctl for setting Energy Efficient Ethernet */
657         hw->dev_spec.ich8lan.eee_disable = eee_setting;
658         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
659             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
660             OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
661             adapter, 0, em_sysctl_eee, "I",
662             "Disable Energy Efficient Ethernet");
663
664         /*
665         ** Start from a known state, this is
666         ** important in reading the nvm and
667         ** mac from that.
668         */
669         e1000_reset_hw(hw);
670
671
672         /* Make sure we have a good EEPROM before we read from it */
673         if (e1000_validate_nvm_checksum(hw) < 0) {
674                 /*
675                 ** Some PCI-E parts fail the first check due to
676                 ** the link being in sleep state, call it again,
677                 ** if it fails a second time its a real issue.
678                 */
679                 if (e1000_validate_nvm_checksum(hw) < 0) {
680                         device_printf(dev,
681                             "The EEPROM Checksum Is Not Valid\n");
682                         error = EIO;
683                         goto err_late;
684                 }
685         }
686
687         /* Copy the permanent MAC address out of the EEPROM */
688         if (e1000_read_mac_addr(hw) < 0) {
689                 device_printf(dev, "EEPROM read error while reading MAC"
690                     " address\n");
691                 error = EIO;
692                 goto err_late;
693         }
694
695         if (!em_is_valid_ether_addr(hw->mac.addr)) {
696                 device_printf(dev, "Invalid MAC address\n");
697                 error = EIO;
698                 goto err_late;
699         }
700
701         /*
702         **  Do interrupt configuration
703         */
704         if (adapter->msix > 1) /* Do MSIX */
705                 error = em_allocate_msix(adapter);
706         else  /* MSI or Legacy */
707                 error = em_allocate_legacy(adapter);
708         if (error)
709                 goto err_late;
710
711         /*
712          * Get Wake-on-Lan and Management info for later use
713          */
714         em_get_wakeup(dev);
715
716         /* Setup OS specific network interface */
717         if (em_setup_interface(dev, adapter) != 0)
718                 goto err_late;
719
720         em_reset(adapter);
721
722         /* Initialize statistics */
723         em_update_stats_counters(adapter);
724
725         hw->mac.get_link_status = 1;
726         em_update_link_status(adapter);
727
728         /* Register for VLAN events */
729         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
730             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
731         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
732             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
733
734         em_add_hw_stats(adapter);
735
736         /* Non-AMT based hardware can now take control from firmware */
737         if (adapter->has_manage && !adapter->has_amt)
738                 em_get_hw_control(adapter);
739
740         /* Tell the stack that the interface is not active */
741         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
742         adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
743
744         adapter->led_dev = led_create(em_led_func, adapter,
745             device_get_nameunit(dev));
746 #ifdef DEV_NETMAP
747         em_netmap_attach(adapter);
748 #endif /* DEV_NETMAP */
749
750         INIT_DEBUGOUT("em_attach: end");
751
752         return (0);
753
754 err_late:
755         em_free_transmit_structures(adapter);
756         em_free_receive_structures(adapter);
757         em_release_hw_control(adapter);
758         if (adapter->ifp != NULL)
759                 if_free(adapter->ifp);
760 err_pci:
761         em_free_pci_resources(adapter);
762         free(adapter->mta, M_DEVBUF);
763         EM_CORE_LOCK_DESTROY(adapter);
764
765         return (error);
766 }
767
768 /*********************************************************************
769  *  Device removal routine
770  *
771  *  The detach entry point is called when the driver is being removed.
772  *  This routine stops the adapter and deallocates all the resources
773  *  that were allocated for driver operation.
774  *
775  *  return 0 on success, positive on failure
776  *********************************************************************/
777
778 static int
779 em_detach(device_t dev)
780 {
781         struct adapter  *adapter = device_get_softc(dev);
782         struct ifnet    *ifp = adapter->ifp;
783
784         INIT_DEBUGOUT("em_detach: begin");
785
786         /* Make sure VLANS are not using driver */
787         if (adapter->ifp->if_vlantrunk != NULL) {
788                 device_printf(dev,"Vlan in use, detach first\n");
789                 return (EBUSY);
790         }
791
792 #ifdef DEVICE_POLLING
793         if (ifp->if_capenable & IFCAP_POLLING)
794                 ether_poll_deregister(ifp);
795 #endif
796
797         if (adapter->led_dev != NULL)
798                 led_destroy(adapter->led_dev);
799
800         EM_CORE_LOCK(adapter);
801         adapter->in_detach = 1;
802         em_stop(adapter);
803         EM_CORE_UNLOCK(adapter);
804         EM_CORE_LOCK_DESTROY(adapter);
805
806         e1000_phy_hw_reset(&adapter->hw);
807
808         em_release_manageability(adapter);
809         em_release_hw_control(adapter);
810
811         /* Unregister VLAN events */
812         if (adapter->vlan_attach != NULL)
813                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
814         if (adapter->vlan_detach != NULL)
815                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
816
817         ether_ifdetach(adapter->ifp);
818         callout_drain(&adapter->timer);
819
820 #ifdef DEV_NETMAP
821         netmap_detach(ifp);
822 #endif /* DEV_NETMAP */
823
824         em_free_pci_resources(adapter);
825         bus_generic_detach(dev);
826         if_free(ifp);
827
828         em_free_transmit_structures(adapter);
829         em_free_receive_structures(adapter);
830
831         em_release_hw_control(adapter);
832         free(adapter->mta, M_DEVBUF);
833
834         return (0);
835 }
836
837 /*********************************************************************
838  *
839  *  Shutdown entry point
840  *
841  **********************************************************************/
842
843 static int
844 em_shutdown(device_t dev)
845 {
846         return em_suspend(dev);
847 }
848
849 /*
850  * Suspend/resume device methods.
851  */
852 static int
853 em_suspend(device_t dev)
854 {
855         struct adapter *adapter = device_get_softc(dev);
856
857         EM_CORE_LOCK(adapter);
858
859         em_release_manageability(adapter);
860         em_release_hw_control(adapter);
861         em_enable_wakeup(dev);
862
863         EM_CORE_UNLOCK(adapter);
864
865         return bus_generic_suspend(dev);
866 }
867
868 static int
869 em_resume(device_t dev)
870 {
871         struct adapter *adapter = device_get_softc(dev);
872         struct tx_ring  *txr = adapter->tx_rings;
873         struct ifnet *ifp = adapter->ifp;
874
875         EM_CORE_LOCK(adapter);
876         if (adapter->hw.mac.type == e1000_pch2lan)
877                 e1000_resume_workarounds_pchlan(&adapter->hw);
878         em_init_locked(adapter);
879         em_init_manageability(adapter);
880
881         if ((ifp->if_flags & IFF_UP) &&
882             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
883                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
884                         EM_TX_LOCK(txr);
885 #ifdef EM_MULTIQUEUE
886                         if (!drbr_empty(ifp, txr->br))
887                                 em_mq_start_locked(ifp, txr, NULL);
888 #else
889                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
890                                 em_start_locked(ifp, txr);
891 #endif
892                         EM_TX_UNLOCK(txr);
893                 }
894         }
895         EM_CORE_UNLOCK(adapter);
896
897         return bus_generic_resume(dev);
898 }
899
900
901 #ifdef EM_MULTIQUEUE
902 /*********************************************************************
903  *  Multiqueue Transmit routines 
904  *
905  *  em_mq_start is called by the stack to initiate a transmit.
906  *  however, if busy the driver can queue the request rather
907  *  than do an immediate send. It is this that is an advantage
908  *  in this driver, rather than also having multiple tx queues.
909  **********************************************************************/
910 static int
911 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
912 {
913         struct adapter  *adapter = txr->adapter;
914         struct mbuf     *next;
915         int             err = 0, enq = 0;
916
917         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
918             IFF_DRV_RUNNING || adapter->link_active == 0) {
919                 if (m != NULL)
920                         err = drbr_enqueue(ifp, txr->br, m);
921                 return (err);
922         }
923
924         enq = 0;
925         if (m != NULL) {
926                 err = drbr_enqueue(ifp, txr->br, m);
927                 if (err)
928                         return (err);
929         } 
930
931         /* Process the queue */
932         while ((next = drbr_peek(ifp, txr->br)) != NULL) {
933                 if ((err = em_xmit(txr, &next)) != 0) {
934                         if (next == NULL)
935                                 drbr_advance(ifp, txr->br);
936                         else 
937                                 drbr_putback(ifp, txr->br, next);
938                         break;
939                 }
940                 drbr_advance(ifp, txr->br);
941                 enq++;
942                 ifp->if_obytes += next->m_pkthdr.len;
943                 if (next->m_flags & M_MCAST)
944                         ifp->if_omcasts++;
945                 ETHER_BPF_MTAP(ifp, next);
946                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
947                         break;
948         }
949
950         if (enq > 0) {
951                 /* Set the watchdog */
952                 txr->queue_status = EM_QUEUE_WORKING;
953                 txr->watchdog_time = ticks;
954         }
955
956         if (txr->tx_avail < EM_MAX_SCATTER)
957                 em_txeof(txr);
958         if (txr->tx_avail < EM_MAX_SCATTER)
959                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
960         return (err);
961 }
962
963 /*
964 ** Multiqueue capable stack interface
965 */
966 static int
967 em_mq_start(struct ifnet *ifp, struct mbuf *m)
968 {
969         struct adapter  *adapter = ifp->if_softc;
970         struct tx_ring  *txr = adapter->tx_rings;
971         int             error;
972
973         if (EM_TX_TRYLOCK(txr)) {
974                 error = em_mq_start_locked(ifp, txr, m);
975                 EM_TX_UNLOCK(txr);
976         } else 
977                 error = drbr_enqueue(ifp, txr->br, m);
978
979         return (error);
980 }
981
982 /*
983 ** Flush all ring buffers
984 */
985 static void
986 em_qflush(struct ifnet *ifp)
987 {
988         struct adapter  *adapter = ifp->if_softc;
989         struct tx_ring  *txr = adapter->tx_rings;
990         struct mbuf     *m;
991
992         for (int i = 0; i < adapter->num_queues; i++, txr++) {
993                 EM_TX_LOCK(txr);
994                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
995                         m_freem(m);
996                 EM_TX_UNLOCK(txr);
997         }
998         if_qflush(ifp);
999 }
1000 #else  /* !EM_MULTIQUEUE */
1001
1002 static void
1003 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1004 {
1005         struct adapter  *adapter = ifp->if_softc;
1006         struct mbuf     *m_head;
1007
1008         EM_TX_LOCK_ASSERT(txr);
1009
1010         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1011             IFF_DRV_RUNNING)
1012                 return;
1013
1014         if (!adapter->link_active)
1015                 return;
1016
1017         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1018                 /* Call cleanup if number of TX descriptors low */
1019                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1020                         em_txeof(txr);
1021                 if (txr->tx_avail < EM_MAX_SCATTER) {
1022                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1023                         break;
1024                 }
1025                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1026                 if (m_head == NULL)
1027                         break;
1028                 /*
1029                  *  Encapsulation can modify our pointer, and or make it
1030                  *  NULL on failure.  In that event, we can't requeue.
1031                  */
1032                 if (em_xmit(txr, &m_head)) {
1033                         if (m_head == NULL)
1034                                 break;
1035                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1036                         break;
1037                 }
1038
1039                 /* Send a copy of the frame to the BPF listener */
1040                 ETHER_BPF_MTAP(ifp, m_head);
1041
1042                 /* Set timeout in case hardware has problems transmitting. */
1043                 txr->watchdog_time = ticks;
1044                 txr->queue_status = EM_QUEUE_WORKING;
1045         }
1046
1047         return;
1048 }
1049
1050 static void
1051 em_start(struct ifnet *ifp)
1052 {
1053         struct adapter  *adapter = ifp->if_softc;
1054         struct tx_ring  *txr = adapter->tx_rings;
1055
1056         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1057                 EM_TX_LOCK(txr);
1058                 em_start_locked(ifp, txr);
1059                 EM_TX_UNLOCK(txr);
1060         }
1061         return;
1062 }
1063 #endif /* EM_MULTIQUEUE */
1064
1065 /*********************************************************************
1066  *  Ioctl entry point
1067  *
1068  *  em_ioctl is called when the user wants to configure the
1069  *  interface.
1070  *
1071  *  return 0 on success, positive on failure
1072  **********************************************************************/
1073
1074 static int
1075 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1076 {
1077         struct adapter  *adapter = ifp->if_softc;
1078         struct ifreq    *ifr = (struct ifreq *)data;
1079 #if defined(INET) || defined(INET6)
1080         struct ifaddr   *ifa = (struct ifaddr *)data;
1081 #endif
1082         bool            avoid_reset = FALSE;
1083         int             error = 0;
1084
1085         if (adapter->in_detach)
1086                 return (error);
1087
1088         switch (command) {
1089         case SIOCSIFADDR:
1090 #ifdef INET
1091                 if (ifa->ifa_addr->sa_family == AF_INET)
1092                         avoid_reset = TRUE;
1093 #endif
1094 #ifdef INET6
1095                 if (ifa->ifa_addr->sa_family == AF_INET6)
1096                         avoid_reset = TRUE;
1097 #endif
1098                 /*
1099                 ** Calling init results in link renegotiation,
1100                 ** so we avoid doing it when possible.
1101                 */
1102                 if (avoid_reset) {
1103                         ifp->if_flags |= IFF_UP;
1104                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1105                                 em_init(adapter);
1106 #ifdef INET
1107                         if (!(ifp->if_flags & IFF_NOARP))
1108                                 arp_ifinit(ifp, ifa);
1109 #endif
1110                 } else
1111                         error = ether_ioctl(ifp, command, data);
1112                 break;
1113         case SIOCSIFMTU:
1114             {
1115                 int max_frame_size;
1116
1117                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1118
1119                 EM_CORE_LOCK(adapter);
1120                 switch (adapter->hw.mac.type) {
1121                 case e1000_82571:
1122                 case e1000_82572:
1123                 case e1000_ich9lan:
1124                 case e1000_ich10lan:
1125                 case e1000_pch2lan:
1126                 case e1000_pch_lpt:
1127                 case e1000_82574:
1128                 case e1000_82583:
1129                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1130                         max_frame_size = 9234;
1131                         break;
1132                 case e1000_pchlan:
1133                         max_frame_size = 4096;
1134                         break;
1135                         /* Adapters that do not support jumbo frames */
1136                 case e1000_ich8lan:
1137                         max_frame_size = ETHER_MAX_LEN;
1138                         break;
1139                 default:
1140                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1141                 }
1142                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1143                     ETHER_CRC_LEN) {
1144                         EM_CORE_UNLOCK(adapter);
1145                         error = EINVAL;
1146                         break;
1147                 }
1148
1149                 ifp->if_mtu = ifr->ifr_mtu;
1150                 adapter->hw.mac.max_frame_size =
1151                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1152                 em_init_locked(adapter);
1153                 EM_CORE_UNLOCK(adapter);
1154                 break;
1155             }
1156         case SIOCSIFFLAGS:
1157                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1158                     SIOCSIFFLAGS (Set Interface Flags)");
1159                 EM_CORE_LOCK(adapter);
1160                 if (ifp->if_flags & IFF_UP) {
1161                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1162                                 if ((ifp->if_flags ^ adapter->if_flags) &
1163                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1164                                         em_disable_promisc(adapter);
1165                                         em_set_promisc(adapter);
1166                                 }
1167                         } else
1168                                 em_init_locked(adapter);
1169                 } else
1170                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1171                                 em_stop(adapter);
1172                 adapter->if_flags = ifp->if_flags;
1173                 EM_CORE_UNLOCK(adapter);
1174                 break;
1175         case SIOCADDMULTI:
1176         case SIOCDELMULTI:
1177                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1178                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1179                         EM_CORE_LOCK(adapter);
1180                         em_disable_intr(adapter);
1181                         em_set_multi(adapter);
1182 #ifdef DEVICE_POLLING
1183                         if (!(ifp->if_capenable & IFCAP_POLLING))
1184 #endif
1185                                 em_enable_intr(adapter);
1186                         EM_CORE_UNLOCK(adapter);
1187                 }
1188                 break;
1189         case SIOCSIFMEDIA:
1190                 /* Check SOL/IDER usage */
1191                 EM_CORE_LOCK(adapter);
1192                 if (e1000_check_reset_block(&adapter->hw)) {
1193                         EM_CORE_UNLOCK(adapter);
1194                         device_printf(adapter->dev, "Media change is"
1195                             " blocked due to SOL/IDER session.\n");
1196                         break;
1197                 }
1198                 EM_CORE_UNLOCK(adapter);
1199                 /* falls thru */
1200         case SIOCGIFMEDIA:
1201                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1202                     SIOCxIFMEDIA (Get/Set Interface Media)");
1203                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1204                 break;
1205         case SIOCSIFCAP:
1206             {
1207                 int mask, reinit;
1208
1209                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1210                 reinit = 0;
1211                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1212 #ifdef DEVICE_POLLING
1213                 if (mask & IFCAP_POLLING) {
1214                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1215                                 error = ether_poll_register(em_poll, ifp);
1216                                 if (error)
1217                                         return (error);
1218                                 EM_CORE_LOCK(adapter);
1219                                 em_disable_intr(adapter);
1220                                 ifp->if_capenable |= IFCAP_POLLING;
1221                                 EM_CORE_UNLOCK(adapter);
1222                         } else {
1223                                 error = ether_poll_deregister(ifp);
1224                                 /* Enable interrupt even in error case */
1225                                 EM_CORE_LOCK(adapter);
1226                                 em_enable_intr(adapter);
1227                                 ifp->if_capenable &= ~IFCAP_POLLING;
1228                                 EM_CORE_UNLOCK(adapter);
1229                         }
1230                 }
1231 #endif
1232                 if (mask & IFCAP_HWCSUM) {
1233                         ifp->if_capenable ^= IFCAP_HWCSUM;
1234                         reinit = 1;
1235                 }
1236                 if (mask & IFCAP_TSO4) {
1237                         ifp->if_capenable ^= IFCAP_TSO4;
1238                         reinit = 1;
1239                 }
1240                 if (mask & IFCAP_VLAN_HWTAGGING) {
1241                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1242                         reinit = 1;
1243                 }
1244                 if (mask & IFCAP_VLAN_HWFILTER) {
1245                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1246                         reinit = 1;
1247                 }
1248                 if (mask & IFCAP_VLAN_HWTSO) {
1249                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1250                         reinit = 1;
1251                 }
1252                 if ((mask & IFCAP_WOL) &&
1253                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1254                         if (mask & IFCAP_WOL_MCAST)
1255                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1256                         if (mask & IFCAP_WOL_MAGIC)
1257                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1258                 }
1259                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1260                         em_init(adapter);
1261                 VLAN_CAPABILITIES(ifp);
1262                 break;
1263             }
1264
1265         default:
1266                 error = ether_ioctl(ifp, command, data);
1267                 break;
1268         }
1269
1270         return (error);
1271 }
1272
1273
1274 /*********************************************************************
1275  *  Init entry point
1276  *
1277  *  This routine is used in two ways. It is used by the stack as
1278  *  init entry point in network interface structure. It is also used
1279  *  by the driver as a hw/sw initialization routine to get to a
1280  *  consistent state.
1281  *
1282  *  return 0 on success, positive on failure
1283  **********************************************************************/
1284
1285 static void
1286 em_init_locked(struct adapter *adapter)
1287 {
1288         struct ifnet    *ifp = adapter->ifp;
1289         device_t        dev = adapter->dev;
1290
1291         INIT_DEBUGOUT("em_init: begin");
1292
1293         EM_CORE_LOCK_ASSERT(adapter);
1294
1295         em_disable_intr(adapter);
1296         callout_stop(&adapter->timer);
1297
1298         /* Get the latest mac address, User can use a LAA */
1299         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1300               ETHER_ADDR_LEN);
1301
1302         /* Put the address into the Receive Address Array */
1303         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1304
1305         /*
1306          * With the 82571 adapter, RAR[0] may be overwritten
1307          * when the other port is reset, we make a duplicate
1308          * in RAR[14] for that eventuality, this assures
1309          * the interface continues to function.
1310          */
1311         if (adapter->hw.mac.type == e1000_82571) {
1312                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1313                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1314                     E1000_RAR_ENTRIES - 1);
1315         }
1316
1317         /* Initialize the hardware */
1318         em_reset(adapter);
1319         em_update_link_status(adapter);
1320
1321         /* Setup VLAN support, basic and offload if available */
1322         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1323
1324         /* Set hardware offload abilities */
1325         ifp->if_hwassist = 0;
1326         if (ifp->if_capenable & IFCAP_TXCSUM)
1327                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1328         if (ifp->if_capenable & IFCAP_TSO4)
1329                 ifp->if_hwassist |= CSUM_TSO;
1330
1331         /* Configure for OS presence */
1332         em_init_manageability(adapter);
1333
1334         /* Prepare transmit descriptors and buffers */
1335         em_setup_transmit_structures(adapter);
1336         em_initialize_transmit_unit(adapter);
1337
1338         /* Setup Multicast table */
1339         em_set_multi(adapter);
1340
1341         /*
1342         ** Figure out the desired mbuf
1343         ** pool for doing jumbos
1344         */
1345         if (adapter->hw.mac.max_frame_size <= 2048)
1346                 adapter->rx_mbuf_sz = MCLBYTES;
1347         else if (adapter->hw.mac.max_frame_size <= 4096)
1348                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1349         else
1350                 adapter->rx_mbuf_sz = MJUM9BYTES;
1351
1352         /* Prepare receive descriptors and buffers */
1353         if (em_setup_receive_structures(adapter)) {
1354                 device_printf(dev, "Could not setup receive structures\n");
1355                 em_stop(adapter);
1356                 return;
1357         }
1358         em_initialize_receive_unit(adapter);
1359
1360         /* Use real VLAN Filter support? */
1361         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1362                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1363                         /* Use real VLAN Filter support */
1364                         em_setup_vlan_hw_support(adapter);
1365                 else {
1366                         u32 ctrl;
1367                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1368                         ctrl |= E1000_CTRL_VME;
1369                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1370                 }
1371         }
1372
1373         /* Don't lose promiscuous settings */
1374         em_set_promisc(adapter);
1375
1376         /* Set the interface as ACTIVE */
1377         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1378         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1379
1380         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1381         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1382
1383         /* MSI/X configuration for 82574 */
1384         if (adapter->hw.mac.type == e1000_82574) {
1385                 int tmp;
1386                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1387                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1388                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1389                 /* Set the IVAR - interrupt vector routing. */
1390                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1391         }
1392
1393 #ifdef DEVICE_POLLING
1394         /*
1395          * Only enable interrupts if we are not polling, make sure
1396          * they are off otherwise.
1397          */
1398         if (ifp->if_capenable & IFCAP_POLLING)
1399                 em_disable_intr(adapter);
1400         else
1401 #endif /* DEVICE_POLLING */
1402                 em_enable_intr(adapter);
1403
1404         /* AMT based hardware can now take control from firmware */
1405         if (adapter->has_manage && adapter->has_amt)
1406                 em_get_hw_control(adapter);
1407 }
1408
1409 static void
1410 em_init(void *arg)
1411 {
1412         struct adapter *adapter = arg;
1413
1414         EM_CORE_LOCK(adapter);
1415         em_init_locked(adapter);
1416         EM_CORE_UNLOCK(adapter);
1417 }
1418
1419
1420 #ifdef DEVICE_POLLING
1421 /*********************************************************************
1422  *
1423  *  Legacy polling routine: note this only works with single queue
1424  *
1425  *********************************************************************/
1426 static int
1427 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1428 {
1429         struct adapter *adapter = ifp->if_softc;
1430         struct tx_ring  *txr = adapter->tx_rings;
1431         struct rx_ring  *rxr = adapter->rx_rings;
1432         u32             reg_icr;
1433         int             rx_done;
1434
1435         EM_CORE_LOCK(adapter);
1436         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1437                 EM_CORE_UNLOCK(adapter);
1438                 return (0);
1439         }
1440
1441         if (cmd == POLL_AND_CHECK_STATUS) {
1442                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1443                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1444                         callout_stop(&adapter->timer);
1445                         adapter->hw.mac.get_link_status = 1;
1446                         em_update_link_status(adapter);
1447                         callout_reset(&adapter->timer, hz,
1448                             em_local_timer, adapter);
1449                 }
1450         }
1451         EM_CORE_UNLOCK(adapter);
1452
1453         em_rxeof(rxr, count, &rx_done);
1454
1455         EM_TX_LOCK(txr);
1456         em_txeof(txr);
1457 #ifdef EM_MULTIQUEUE
1458         if (!drbr_empty(ifp, txr->br))
1459                 em_mq_start_locked(ifp, txr, NULL);
1460 #else
1461         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1462                 em_start_locked(ifp, txr);
1463 #endif
1464         EM_TX_UNLOCK(txr);
1465
1466         return (rx_done);
1467 }
1468 #endif /* DEVICE_POLLING */
1469
1470
1471 /*********************************************************************
1472  *
1473  *  Fast Legacy/MSI Combined Interrupt Service routine  
1474  *
1475  *********************************************************************/
1476 static int
1477 em_irq_fast(void *arg)
1478 {
1479         struct adapter  *adapter = arg;
1480         struct ifnet    *ifp;
1481         u32             reg_icr;
1482
1483         ifp = adapter->ifp;
1484
1485         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1486
1487         /* Hot eject?  */
1488         if (reg_icr == 0xffffffff)
1489                 return FILTER_STRAY;
1490
1491         /* Definitely not our interrupt.  */
1492         if (reg_icr == 0x0)
1493                 return FILTER_STRAY;
1494
1495         /*
1496          * Starting with the 82571 chip, bit 31 should be used to
1497          * determine whether the interrupt belongs to us.
1498          */
1499         if (adapter->hw.mac.type >= e1000_82571 &&
1500             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1501                 return FILTER_STRAY;
1502
1503         em_disable_intr(adapter);
1504         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1505
1506         /* Link status change */
1507         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1508                 adapter->hw.mac.get_link_status = 1;
1509                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1510         }
1511
1512         if (reg_icr & E1000_ICR_RXO)
1513                 adapter->rx_overruns++;
1514         return FILTER_HANDLED;
1515 }
1516
1517 /* Combined RX/TX handler, used by Legacy and MSI */
1518 static void
1519 em_handle_que(void *context, int pending)
1520 {
1521         struct adapter  *adapter = context;
1522         struct ifnet    *ifp = adapter->ifp;
1523         struct tx_ring  *txr = adapter->tx_rings;
1524         struct rx_ring  *rxr = adapter->rx_rings;
1525
1526
1527         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1528                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1529                 EM_TX_LOCK(txr);
1530                 em_txeof(txr);
1531 #ifdef EM_MULTIQUEUE
1532                 if (!drbr_empty(ifp, txr->br))
1533                         em_mq_start_locked(ifp, txr, NULL);
1534 #else
1535                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1536                         em_start_locked(ifp, txr);
1537 #endif
1538                 EM_TX_UNLOCK(txr);
1539                 if (more) {
1540                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1541                         return;
1542                 }
1543         }
1544
1545         em_enable_intr(adapter);
1546         return;
1547 }
1548
1549
1550 /*********************************************************************
1551  *
1552  *  MSIX Interrupt Service Routines
1553  *
1554  **********************************************************************/
1555 static void
1556 em_msix_tx(void *arg)
1557 {
1558         struct tx_ring *txr = arg;
1559         struct adapter *adapter = txr->adapter;
1560         struct ifnet    *ifp = adapter->ifp;
1561
1562         ++txr->tx_irq;
1563         EM_TX_LOCK(txr);
1564         em_txeof(txr);
1565 #ifdef EM_MULTIQUEUE
1566         if (!drbr_empty(ifp, txr->br))
1567                 em_mq_start_locked(ifp, txr, NULL);
1568 #else
1569         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1570                 em_start_locked(ifp, txr);
1571 #endif
1572         /* Reenable this interrupt */
1573         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1574         EM_TX_UNLOCK(txr);
1575         return;
1576 }
1577
1578 /*********************************************************************
1579  *
1580  *  MSIX RX Interrupt Service routine
1581  *
1582  **********************************************************************/
1583
1584 static void
1585 em_msix_rx(void *arg)
1586 {
1587         struct rx_ring  *rxr = arg;
1588         struct adapter  *adapter = rxr->adapter;
1589         bool            more;
1590
1591         ++rxr->rx_irq;
1592         if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1593                 return;
1594         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1595         if (more)
1596                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1597         else
1598                 /* Reenable this interrupt */
1599                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1600         return;
1601 }
1602
1603 /*********************************************************************
1604  *
1605  *  MSIX Link Fast Interrupt Service routine
1606  *
1607  **********************************************************************/
1608 static void
1609 em_msix_link(void *arg)
1610 {
1611         struct adapter  *adapter = arg;
1612         u32             reg_icr;
1613
1614         ++adapter->link_irq;
1615         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1616
1617         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1618                 adapter->hw.mac.get_link_status = 1;
1619                 em_handle_link(adapter, 0);
1620         } else
1621                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1622                     EM_MSIX_LINK | E1000_IMS_LSC);
1623         return;
1624 }
1625
1626 static void
1627 em_handle_rx(void *context, int pending)
1628 {
1629         struct rx_ring  *rxr = context;
1630         struct adapter  *adapter = rxr->adapter;
1631         bool            more;
1632
1633         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1634         if (more)
1635                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1636         else
1637                 /* Reenable this interrupt */
1638                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1639 }
1640
1641 static void
1642 em_handle_tx(void *context, int pending)
1643 {
1644         struct tx_ring  *txr = context;
1645         struct adapter  *adapter = txr->adapter;
1646         struct ifnet    *ifp = adapter->ifp;
1647
1648         EM_TX_LOCK(txr);
1649         em_txeof(txr);
1650 #ifdef EM_MULTIQUEUE
1651         if (!drbr_empty(ifp, txr->br))
1652                 em_mq_start_locked(ifp, txr, NULL);
1653 #else
1654         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1655                 em_start_locked(ifp, txr);
1656 #endif
1657         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1658         EM_TX_UNLOCK(txr);
1659 }
1660
1661 static void
1662 em_handle_link(void *context, int pending)
1663 {
1664         struct adapter  *adapter = context;
1665         struct tx_ring  *txr = adapter->tx_rings;
1666         struct ifnet *ifp = adapter->ifp;
1667
1668         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1669                 return;
1670
1671         EM_CORE_LOCK(adapter);
1672         callout_stop(&adapter->timer);
1673         em_update_link_status(adapter);
1674         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1675         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1676             EM_MSIX_LINK | E1000_IMS_LSC);
1677         if (adapter->link_active) {
1678                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1679                         EM_TX_LOCK(txr);
1680 #ifdef EM_MULTIQUEUE
1681                         if (!drbr_empty(ifp, txr->br))
1682                                 em_mq_start_locked(ifp, txr, NULL);
1683 #else
1684                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1685                                 em_start_locked(ifp, txr);
1686 #endif
1687                         EM_TX_UNLOCK(txr);
1688                 }
1689         }
1690         EM_CORE_UNLOCK(adapter);
1691 }
1692
1693
1694 /*********************************************************************
1695  *
1696  *  Media Ioctl callback
1697  *
1698  *  This routine is called whenever the user queries the status of
1699  *  the interface using ifconfig.
1700  *
1701  **********************************************************************/
1702 static void
1703 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1704 {
1705         struct adapter *adapter = ifp->if_softc;
1706         u_char fiber_type = IFM_1000_SX;
1707
1708         INIT_DEBUGOUT("em_media_status: begin");
1709
1710         EM_CORE_LOCK(adapter);
1711         em_update_link_status(adapter);
1712
1713         ifmr->ifm_status = IFM_AVALID;
1714         ifmr->ifm_active = IFM_ETHER;
1715
1716         if (!adapter->link_active) {
1717                 EM_CORE_UNLOCK(adapter);
1718                 return;
1719         }
1720
1721         ifmr->ifm_status |= IFM_ACTIVE;
1722
1723         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1724             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1725                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1726         } else {
1727                 switch (adapter->link_speed) {
1728                 case 10:
1729                         ifmr->ifm_active |= IFM_10_T;
1730                         break;
1731                 case 100:
1732                         ifmr->ifm_active |= IFM_100_TX;
1733                         break;
1734                 case 1000:
1735                         ifmr->ifm_active |= IFM_1000_T;
1736                         break;
1737                 }
1738                 if (adapter->link_duplex == FULL_DUPLEX)
1739                         ifmr->ifm_active |= IFM_FDX;
1740                 else
1741                         ifmr->ifm_active |= IFM_HDX;
1742         }
1743         EM_CORE_UNLOCK(adapter);
1744 }
1745
1746 /*********************************************************************
1747  *
1748  *  Media Ioctl callback
1749  *
1750  *  This routine is called when the user changes speed/duplex using
1751  *  media/mediopt option with ifconfig.
1752  *
1753  **********************************************************************/
1754 static int
1755 em_media_change(struct ifnet *ifp)
1756 {
1757         struct adapter *adapter = ifp->if_softc;
1758         struct ifmedia  *ifm = &adapter->media;
1759
1760         INIT_DEBUGOUT("em_media_change: begin");
1761
1762         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1763                 return (EINVAL);
1764
1765         EM_CORE_LOCK(adapter);
1766         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1767         case IFM_AUTO:
1768                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1769                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1770                 break;
1771         case IFM_1000_LX:
1772         case IFM_1000_SX:
1773         case IFM_1000_T:
1774                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1775                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1776                 break;
1777         case IFM_100_TX:
1778                 adapter->hw.mac.autoneg = FALSE;
1779                 adapter->hw.phy.autoneg_advertised = 0;
1780                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1781                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1782                 else
1783                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1784                 break;
1785         case IFM_10_T:
1786                 adapter->hw.mac.autoneg = FALSE;
1787                 adapter->hw.phy.autoneg_advertised = 0;
1788                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1789                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1790                 else
1791                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1792                 break;
1793         default:
1794                 device_printf(adapter->dev, "Unsupported media type\n");
1795         }
1796
1797         em_init_locked(adapter);
1798         EM_CORE_UNLOCK(adapter);
1799
1800         return (0);
1801 }
1802
1803 /*********************************************************************
1804  *
1805  *  This routine maps the mbufs to tx descriptors.
1806  *
1807  *  return 0 on success, positive on failure
1808  **********************************************************************/
1809
1810 static int
1811 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1812 {
1813         struct adapter          *adapter = txr->adapter;
1814         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1815         bus_dmamap_t            map;
1816         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1817         struct e1000_tx_desc    *ctxd = NULL;
1818         struct mbuf             *m_head;
1819         struct ether_header     *eh;
1820         struct ip               *ip = NULL;
1821         struct tcphdr           *tp = NULL;
1822         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1823         int                     ip_off, poff;
1824         int                     nsegs, i, j, first, last = 0;
1825         int                     error, do_tso, tso_desc = 0, remap = 1;
1826
1827 retry:
1828         m_head = *m_headp;
1829         txd_upper = txd_lower = txd_used = txd_saved = 0;
1830         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1831         ip_off = poff = 0;
1832
1833         /*
1834          * Intel recommends entire IP/TCP header length reside in a single
1835          * buffer. If multiple descriptors are used to describe the IP and
1836          * TCP header, each descriptor should describe one or more
1837          * complete headers; descriptors referencing only parts of headers
1838          * are not supported. If all layer headers are not coalesced into
1839          * a single buffer, each buffer should not cross a 4KB boundary,
1840          * or be larger than the maximum read request size.
1841          * Controller also requires modifing IP/TCP header to make TSO work
1842          * so we firstly get a writable mbuf chain then coalesce ethernet/
1843          * IP/TCP header into a single buffer to meet the requirement of
1844          * controller. This also simplifies IP/TCP/UDP checksum offloading
1845          * which also has similiar restrictions.
1846          */
1847         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1848                 if (do_tso || (m_head->m_next != NULL && 
1849                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1850                         if (M_WRITABLE(*m_headp) == 0) {
1851                                 m_head = m_dup(*m_headp, M_NOWAIT);
1852                                 m_freem(*m_headp);
1853                                 if (m_head == NULL) {
1854                                         *m_headp = NULL;
1855                                         return (ENOBUFS);
1856                                 }
1857                                 *m_headp = m_head;
1858                         }
1859                 }
1860                 /*
1861                  * XXX
1862                  * Assume IPv4, we don't have TSO/checksum offload support
1863                  * for IPv6 yet.
1864                  */
1865                 ip_off = sizeof(struct ether_header);
1866                 m_head = m_pullup(m_head, ip_off);
1867                 if (m_head == NULL) {
1868                         *m_headp = NULL;
1869                         return (ENOBUFS);
1870                 }
1871                 eh = mtod(m_head, struct ether_header *);
1872                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1873                         ip_off = sizeof(struct ether_vlan_header);
1874                         m_head = m_pullup(m_head, ip_off);
1875                         if (m_head == NULL) {
1876                                 *m_headp = NULL;
1877                                 return (ENOBUFS);
1878                         }
1879                 }
1880                 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1881                 if (m_head == NULL) {
1882                         *m_headp = NULL;
1883                         return (ENOBUFS);
1884                 }
1885                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1886                 poff = ip_off + (ip->ip_hl << 2);
1887                 if (do_tso) {
1888                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1889                         if (m_head == NULL) {
1890                                 *m_headp = NULL;
1891                                 return (ENOBUFS);
1892                         }
1893                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1894                         /*
1895                          * TSO workaround:
1896                          *   pull 4 more bytes of data into it.
1897                          */
1898                         m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1899                         if (m_head == NULL) {
1900                                 *m_headp = NULL;
1901                                 return (ENOBUFS);
1902                         }
1903                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1904                         ip->ip_len = 0;
1905                         ip->ip_sum = 0;
1906                         /*
1907                          * The pseudo TCP checksum does not include TCP payload
1908                          * length so driver should recompute the checksum here
1909                          * what hardware expect to see. This is adherence of
1910                          * Microsoft's Large Send specification.
1911                          */
1912                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1913                         tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1914                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1915                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1916                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1917                         if (m_head == NULL) {
1918                                 *m_headp = NULL;
1919                                 return (ENOBUFS);
1920                         }
1921                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1922                         m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1923                         if (m_head == NULL) {
1924                                 *m_headp = NULL;
1925                                 return (ENOBUFS);
1926                         }
1927                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1928                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1929                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1930                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1931                         if (m_head == NULL) {
1932                                 *m_headp = NULL;
1933                                 return (ENOBUFS);
1934                         }
1935                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1936                 }
1937                 *m_headp = m_head;
1938         }
1939
1940         /*
1941          * Map the packet for DMA
1942          *
1943          * Capture the first descriptor index,
1944          * this descriptor will have the index
1945          * of the EOP which is the only one that
1946          * now gets a DONE bit writeback.
1947          */
1948         first = txr->next_avail_desc;
1949         tx_buffer = &txr->tx_buffers[first];
1950         tx_buffer_mapped = tx_buffer;
1951         map = tx_buffer->map;
1952
1953         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1954             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1955
1956         /*
1957          * There are two types of errors we can (try) to handle:
1958          * - EFBIG means the mbuf chain was too long and bus_dma ran
1959          *   out of segments.  Defragment the mbuf chain and try again.
1960          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1961          *   at this point in time.  Defer sending and try again later.
1962          * All other errors, in particular EINVAL, are fatal and prevent the
1963          * mbuf chain from ever going through.  Drop it and report error.
1964          */
1965         if (error == EFBIG && remap) {
1966                 struct mbuf *m;
1967
1968                 m = m_defrag(*m_headp, M_NOWAIT);
1969                 if (m == NULL) {
1970                         adapter->mbuf_alloc_failed++;
1971                         m_freem(*m_headp);
1972                         *m_headp = NULL;
1973                         return (ENOBUFS);
1974                 }
1975                 *m_headp = m;
1976
1977                 /* Try it again, but only once */
1978                 remap = 0;
1979                 goto retry;
1980         } else if (error == ENOMEM) {
1981                 adapter->no_tx_dma_setup++;
1982                 return (error);
1983         } else if (error != 0) {
1984                 adapter->no_tx_dma_setup++;
1985                 m_freem(*m_headp);
1986                 *m_headp = NULL;
1987                 return (error);
1988         }
1989
1990         /*
1991          * TSO Hardware workaround, if this packet is not
1992          * TSO, and is only a single descriptor long, and
1993          * it follows a TSO burst, then we need to add a
1994          * sentinel descriptor to prevent premature writeback.
1995          */
1996         if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1997                 if (nsegs == 1)
1998                         tso_desc = TRUE;
1999                 txr->tx_tso = FALSE;
2000         }
2001
2002         if (nsegs > (txr->tx_avail - 2)) {
2003                 txr->no_desc_avail++;
2004                 bus_dmamap_unload(txr->txtag, map);
2005                 return (ENOBUFS);
2006         }
2007         m_head = *m_headp;
2008
2009         /* Do hardware assists */
2010         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2011                 em_tso_setup(txr, m_head, ip_off, ip, tp,
2012                     &txd_upper, &txd_lower);
2013                 /* we need to make a final sentinel transmit desc */
2014                 tso_desc = TRUE;
2015         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2016                 em_transmit_checksum_setup(txr, m_head,
2017                     ip_off, ip, &txd_upper, &txd_lower);
2018
2019         if (m_head->m_flags & M_VLANTAG) {
2020                 /* Set the vlan id. */
2021                 txd_upper |=
2022                     (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2023                 /* Tell hardware to add tag */
2024                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2025         }
2026
2027         i = txr->next_avail_desc;
2028
2029         /* Set up our transmit descriptors */
2030         for (j = 0; j < nsegs; j++) {
2031                 bus_size_t seg_len;
2032                 bus_addr_t seg_addr;
2033
2034                 tx_buffer = &txr->tx_buffers[i];
2035                 ctxd = &txr->tx_base[i];
2036                 seg_addr = segs[j].ds_addr;
2037                 seg_len  = segs[j].ds_len;
2038                 /*
2039                 ** TSO Workaround:
2040                 ** If this is the last descriptor, we want to
2041                 ** split it so we have a small final sentinel
2042                 */
2043                 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2044                         seg_len -= 4;
2045                         ctxd->buffer_addr = htole64(seg_addr);
2046                         ctxd->lower.data = htole32(
2047                         adapter->txd_cmd | txd_lower | seg_len);
2048                         ctxd->upper.data =
2049                             htole32(txd_upper);
2050                         if (++i == adapter->num_tx_desc)
2051                                 i = 0;
2052                         /* Now make the sentinel */     
2053                         ++txd_used; /* using an extra txd */
2054                         ctxd = &txr->tx_base[i];
2055                         tx_buffer = &txr->tx_buffers[i];
2056                         ctxd->buffer_addr =
2057                             htole64(seg_addr + seg_len);
2058                         ctxd->lower.data = htole32(
2059                         adapter->txd_cmd | txd_lower | 4);
2060                         ctxd->upper.data =
2061                             htole32(txd_upper);
2062                         last = i;
2063                         if (++i == adapter->num_tx_desc)
2064                                 i = 0;
2065                 } else {
2066                         ctxd->buffer_addr = htole64(seg_addr);
2067                         ctxd->lower.data = htole32(
2068                         adapter->txd_cmd | txd_lower | seg_len);
2069                         ctxd->upper.data =
2070                             htole32(txd_upper);
2071                         last = i;
2072                         if (++i == adapter->num_tx_desc)
2073                                 i = 0;
2074                 }
2075                 tx_buffer->m_head = NULL;
2076                 tx_buffer->next_eop = -1;
2077         }
2078
2079         txr->next_avail_desc = i;
2080         txr->tx_avail -= nsegs;
2081         if (tso_desc) /* TSO used an extra for sentinel */
2082                 txr->tx_avail -= txd_used;
2083
2084         tx_buffer->m_head = m_head;
2085         /*
2086         ** Here we swap the map so the last descriptor,
2087         ** which gets the completion interrupt has the
2088         ** real map, and the first descriptor gets the
2089         ** unused map from this descriptor.
2090         */
2091         tx_buffer_mapped->map = tx_buffer->map;
2092         tx_buffer->map = map;
2093         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2094
2095         /*
2096          * Last Descriptor of Packet
2097          * needs End Of Packet (EOP)
2098          * and Report Status (RS)
2099          */
2100         ctxd->lower.data |=
2101             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2102         /*
2103          * Keep track in the first buffer which
2104          * descriptor will be written back
2105          */
2106         tx_buffer = &txr->tx_buffers[first];
2107         tx_buffer->next_eop = last;
2108         /* Update the watchdog time early and often */
2109         txr->watchdog_time = ticks;
2110
2111         /*
2112          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2113          * that this frame is available to transmit.
2114          */
2115         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2116             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2117         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2118
2119         return (0);
2120 }
2121
2122 static void
2123 em_set_promisc(struct adapter *adapter)
2124 {
2125         struct ifnet    *ifp = adapter->ifp;
2126         u32             reg_rctl;
2127
2128         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2129
2130         if (ifp->if_flags & IFF_PROMISC) {
2131                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2132                 /* Turn this on if you want to see bad packets */
2133                 if (em_debug_sbp)
2134                         reg_rctl |= E1000_RCTL_SBP;
2135                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2136         } else if (ifp->if_flags & IFF_ALLMULTI) {
2137                 reg_rctl |= E1000_RCTL_MPE;
2138                 reg_rctl &= ~E1000_RCTL_UPE;
2139                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2140         }
2141 }
2142
2143 static void
2144 em_disable_promisc(struct adapter *adapter)
2145 {
2146         struct ifnet    *ifp = adapter->ifp;
2147         u32             reg_rctl;
2148         int             mcnt = 0;
2149
2150         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2151         reg_rctl &=  (~E1000_RCTL_UPE);
2152         if (ifp->if_flags & IFF_ALLMULTI)
2153                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2154         else {
2155                 struct  ifmultiaddr *ifma;
2156 #if __FreeBSD_version < 800000
2157                 IF_ADDR_LOCK(ifp);
2158 #else   
2159                 if_maddr_rlock(ifp);
2160 #endif
2161                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2162                         if (ifma->ifma_addr->sa_family != AF_LINK)
2163                                 continue;
2164                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2165                                 break;
2166                         mcnt++;
2167                 }
2168 #if __FreeBSD_version < 800000
2169                 IF_ADDR_UNLOCK(ifp);
2170 #else
2171                 if_maddr_runlock(ifp);
2172 #endif
2173         }
2174         /* Don't disable if in MAX groups */
2175         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2176                 reg_rctl &=  (~E1000_RCTL_MPE);
2177         reg_rctl &=  (~E1000_RCTL_SBP);
2178         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2179 }
2180
2181
2182 /*********************************************************************
2183  *  Multicast Update
2184  *
2185  *  This routine is called whenever multicast address list is updated.
2186  *
2187  **********************************************************************/
2188
2189 static void
2190 em_set_multi(struct adapter *adapter)
2191 {
2192         struct ifnet    *ifp = adapter->ifp;
2193         struct ifmultiaddr *ifma;
2194         u32 reg_rctl = 0;
2195         u8  *mta; /* Multicast array memory */
2196         int mcnt = 0;
2197
2198         IOCTL_DEBUGOUT("em_set_multi: begin");
2199
2200         mta = adapter->mta;
2201         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2202
2203         if (adapter->hw.mac.type == e1000_82542 && 
2204             adapter->hw.revision_id == E1000_REVISION_2) {
2205                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2206                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2207                         e1000_pci_clear_mwi(&adapter->hw);
2208                 reg_rctl |= E1000_RCTL_RST;
2209                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2210                 msec_delay(5);
2211         }
2212
2213 #if __FreeBSD_version < 800000
2214         IF_ADDR_LOCK(ifp);
2215 #else
2216         if_maddr_rlock(ifp);
2217 #endif
2218         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2219                 if (ifma->ifma_addr->sa_family != AF_LINK)
2220                         continue;
2221
2222                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2223                         break;
2224
2225                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2226                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2227                 mcnt++;
2228         }
2229 #if __FreeBSD_version < 800000
2230         IF_ADDR_UNLOCK(ifp);
2231 #else
2232         if_maddr_runlock(ifp);
2233 #endif
2234         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2235                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2236                 reg_rctl |= E1000_RCTL_MPE;
2237                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2238         } else
2239                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2240
2241         if (adapter->hw.mac.type == e1000_82542 && 
2242             adapter->hw.revision_id == E1000_REVISION_2) {
2243                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2244                 reg_rctl &= ~E1000_RCTL_RST;
2245                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2246                 msec_delay(5);
2247                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2248                         e1000_pci_set_mwi(&adapter->hw);
2249         }
2250 }
2251
2252
2253 /*********************************************************************
2254  *  Timer routine
2255  *
2256  *  This routine checks for link status and updates statistics.
2257  *
2258  **********************************************************************/
2259
2260 static void
2261 em_local_timer(void *arg)
2262 {
2263         struct adapter  *adapter = arg;
2264         struct ifnet    *ifp = adapter->ifp;
2265         struct tx_ring  *txr = adapter->tx_rings;
2266         struct rx_ring  *rxr = adapter->rx_rings;
2267         u32             trigger;
2268
2269         EM_CORE_LOCK_ASSERT(adapter);
2270
2271         em_update_link_status(adapter);
2272         em_update_stats_counters(adapter);
2273
2274         /* Reset LAA into RAR[0] on 82571 */
2275         if ((adapter->hw.mac.type == e1000_82571) &&
2276             e1000_get_laa_state_82571(&adapter->hw))
2277                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2278
2279         /* Mask to use in the irq trigger */
2280         if (adapter->msix_mem)
2281                 trigger = rxr->ims;
2282         else
2283                 trigger = E1000_ICS_RXDMT0;
2284
2285         /*
2286         ** Check on the state of the TX queue(s), this 
2287         ** can be done without the lock because its RO
2288         ** and the HUNG state will be static if set.
2289         */
2290         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2291                 if ((txr->queue_status == EM_QUEUE_HUNG) &&
2292                     (adapter->pause_frames == 0))
2293                         goto hung;
2294                 /* Schedule a TX tasklet if needed */
2295                 if (txr->tx_avail <= EM_MAX_SCATTER)
2296                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2297         }
2298         
2299         adapter->pause_frames = 0;
2300         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2301 #ifndef DEVICE_POLLING
2302         /* Trigger an RX interrupt to guarantee mbuf refresh */
2303         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2304 #endif
2305         return;
2306 hung:
2307         /* Looks like we're hung */
2308         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2309         device_printf(adapter->dev,
2310             "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2311             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2312             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2313         device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2314             "Next TX to Clean = %d\n",
2315             txr->me, txr->tx_avail, txr->next_to_clean);
2316         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2317         adapter->watchdog_events++;
2318         adapter->pause_frames = 0;
2319         em_init_locked(adapter);
2320 }
2321
2322
2323 static void
2324 em_update_link_status(struct adapter *adapter)
2325 {
2326         struct e1000_hw *hw = &adapter->hw;
2327         struct ifnet *ifp = adapter->ifp;
2328         device_t dev = adapter->dev;
2329         struct tx_ring *txr = adapter->tx_rings;
2330         u32 link_check = 0;
2331
2332         /* Get the cached link value or read phy for real */
2333         switch (hw->phy.media_type) {
2334         case e1000_media_type_copper:
2335                 if (hw->mac.get_link_status) {
2336                         /* Do the work to read phy */
2337                         e1000_check_for_link(hw);
2338                         link_check = !hw->mac.get_link_status;
2339                         if (link_check) /* ESB2 fix */
2340                                 e1000_cfg_on_link_up(hw);
2341                 } else
2342                         link_check = TRUE;
2343                 break;
2344         case e1000_media_type_fiber:
2345                 e1000_check_for_link(hw);
2346                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2347                                  E1000_STATUS_LU);
2348                 break;
2349         case e1000_media_type_internal_serdes:
2350                 e1000_check_for_link(hw);
2351                 link_check = adapter->hw.mac.serdes_has_link;
2352                 break;
2353         default:
2354         case e1000_media_type_unknown:
2355                 break;
2356         }
2357
2358         /* Now check for a transition */
2359         if (link_check && (adapter->link_active == 0)) {
2360                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2361                     &adapter->link_duplex);
2362                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2363                 if ((adapter->link_speed != SPEED_1000) &&
2364                     ((hw->mac.type == e1000_82571) ||
2365                     (hw->mac.type == e1000_82572))) {
2366                         int tarc0;
2367                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2368                         tarc0 &= ~SPEED_MODE_BIT;
2369                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2370                 }
2371                 if (bootverbose)
2372                         device_printf(dev, "Link is up %d Mbps %s\n",
2373                             adapter->link_speed,
2374                             ((adapter->link_duplex == FULL_DUPLEX) ?
2375                             "Full Duplex" : "Half Duplex"));
2376                 adapter->link_active = 1;
2377                 adapter->smartspeed = 0;
2378                 ifp->if_baudrate = adapter->link_speed * 1000000;
2379                 if_link_state_change(ifp, LINK_STATE_UP);
2380         } else if (!link_check && (adapter->link_active == 1)) {
2381                 ifp->if_baudrate = adapter->link_speed = 0;
2382                 adapter->link_duplex = 0;
2383                 if (bootverbose)
2384                         device_printf(dev, "Link is Down\n");
2385                 adapter->link_active = 0;
2386                 /* Link down, disable watchdog */
2387                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2388                         txr->queue_status = EM_QUEUE_IDLE;
2389                 if_link_state_change(ifp, LINK_STATE_DOWN);
2390         }
2391 }
2392
2393 /*********************************************************************
2394  *
2395  *  This routine disables all traffic on the adapter by issuing a
2396  *  global reset on the MAC and deallocates TX/RX buffers.
2397  *
2398  *  This routine should always be called with BOTH the CORE
2399  *  and TX locks.
2400  **********************************************************************/
2401
2402 static void
2403 em_stop(void *arg)
2404 {
2405         struct adapter  *adapter = arg;
2406         struct ifnet    *ifp = adapter->ifp;
2407         struct tx_ring  *txr = adapter->tx_rings;
2408
2409         EM_CORE_LOCK_ASSERT(adapter);
2410
2411         INIT_DEBUGOUT("em_stop: begin");
2412
2413         em_disable_intr(adapter);
2414         callout_stop(&adapter->timer);
2415
2416         /* Tell the stack that the interface is no longer active */
2417         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2418         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2419
2420         /* Unarm watchdog timer. */
2421         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2422                 EM_TX_LOCK(txr);
2423                 txr->queue_status = EM_QUEUE_IDLE;
2424                 EM_TX_UNLOCK(txr);
2425         }
2426
2427         e1000_reset_hw(&adapter->hw);
2428         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2429
2430         e1000_led_off(&adapter->hw);
2431         e1000_cleanup_led(&adapter->hw);
2432 }
2433
2434
2435 /*********************************************************************
2436  *
2437  *  Determine hardware revision.
2438  *
2439  **********************************************************************/
2440 static void
2441 em_identify_hardware(struct adapter *adapter)
2442 {
2443         device_t dev = adapter->dev;
2444
2445         /* Make sure our PCI config space has the necessary stuff set */
2446         pci_enable_busmaster(dev);
2447         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2448
2449         /* Save off the information about this board */
2450         adapter->hw.vendor_id = pci_get_vendor(dev);
2451         adapter->hw.device_id = pci_get_device(dev);
2452         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2453         adapter->hw.subsystem_vendor_id =
2454             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2455         adapter->hw.subsystem_device_id =
2456             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2457
2458         /* Do Shared Code Init and Setup */
2459         if (e1000_set_mac_type(&adapter->hw)) {
2460                 device_printf(dev, "Setup init failure\n");
2461                 return;
2462         }
2463 }
2464
2465 static int
2466 em_allocate_pci_resources(struct adapter *adapter)
2467 {
2468         device_t        dev = adapter->dev;
2469         int             rid;
2470
2471         rid = PCIR_BAR(0);
2472         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2473             &rid, RF_ACTIVE);
2474         if (adapter->memory == NULL) {
2475                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2476                 return (ENXIO);
2477         }
2478         adapter->osdep.mem_bus_space_tag =
2479             rman_get_bustag(adapter->memory);
2480         adapter->osdep.mem_bus_space_handle =
2481             rman_get_bushandle(adapter->memory);
2482         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2483
2484         /* Default to a single queue */
2485         adapter->num_queues = 1;
2486
2487         /*
2488          * Setup MSI/X or MSI if PCI Express
2489          */
2490         adapter->msix = em_setup_msix(adapter);
2491
2492         adapter->hw.back = &adapter->osdep;
2493
2494         return (0);
2495 }
2496
2497 /*********************************************************************
2498  *
2499  *  Setup the Legacy or MSI Interrupt handler
2500  *
2501  **********************************************************************/
2502 int
2503 em_allocate_legacy(struct adapter *adapter)
2504 {
2505         device_t dev = adapter->dev;
2506         struct tx_ring  *txr = adapter->tx_rings;
2507         int error, rid = 0;
2508
2509         /* Manually turn off all interrupts */
2510         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2511
2512         if (adapter->msix == 1) /* using MSI */
2513                 rid = 1;
2514         /* We allocate a single interrupt resource */
2515         adapter->res = bus_alloc_resource_any(dev,
2516             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2517         if (adapter->res == NULL) {
2518                 device_printf(dev, "Unable to allocate bus resource: "
2519                     "interrupt\n");
2520                 return (ENXIO);
2521         }
2522
2523         /*
2524          * Allocate a fast interrupt and the associated
2525          * deferred processing contexts.
2526          */
2527         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2528         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2529             taskqueue_thread_enqueue, &adapter->tq);
2530         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2531             device_get_nameunit(adapter->dev));
2532         /* Use a TX only tasklet for local timer */
2533         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2534         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2535             taskqueue_thread_enqueue, &txr->tq);
2536         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2537             device_get_nameunit(adapter->dev));
2538         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2539         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2540             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2541                 device_printf(dev, "Failed to register fast interrupt "
2542                             "handler: %d\n", error);
2543                 taskqueue_free(adapter->tq);
2544                 adapter->tq = NULL;
2545                 return (error);
2546         }
2547         
2548         return (0);
2549 }
2550
2551 /*********************************************************************
2552  *
2553  *  Setup the MSIX Interrupt handlers
2554  *   This is not really Multiqueue, rather
2555  *   its just seperate interrupt vectors
2556  *   for TX, RX, and Link.
2557  *
2558  **********************************************************************/
2559 int
2560 em_allocate_msix(struct adapter *adapter)
2561 {
2562         device_t        dev = adapter->dev;
2563         struct          tx_ring *txr = adapter->tx_rings;
2564         struct          rx_ring *rxr = adapter->rx_rings;
2565         int             error, rid, vector = 0;
2566
2567
2568         /* Make sure all interrupts are disabled */
2569         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2570
2571         /* First set up ring resources */
2572         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2573
2574                 /* RX ring */
2575                 rid = vector + 1;
2576
2577                 rxr->res = bus_alloc_resource_any(dev,
2578                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2579                 if (rxr->res == NULL) {
2580                         device_printf(dev,
2581                             "Unable to allocate bus resource: "
2582                             "RX MSIX Interrupt %d\n", i);
2583                         return (ENXIO);
2584                 }
2585                 if ((error = bus_setup_intr(dev, rxr->res,
2586                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2587                     rxr, &rxr->tag)) != 0) {
2588                         device_printf(dev, "Failed to register RX handler");
2589                         return (error);
2590                 }
2591 #if __FreeBSD_version >= 800504
2592                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2593 #endif
2594                 rxr->msix = vector++; /* NOTE increment vector for TX */
2595                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2596                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2597                     taskqueue_thread_enqueue, &rxr->tq);
2598                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2599                     device_get_nameunit(adapter->dev));
2600                 /*
2601                 ** Set the bit to enable interrupt
2602                 ** in E1000_IMS -- bits 20 and 21
2603                 ** are for RX0 and RX1, note this has
2604                 ** NOTHING to do with the MSIX vector
2605                 */
2606                 rxr->ims = 1 << (20 + i);
2607                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2608
2609                 /* TX ring */
2610                 rid = vector + 1;
2611                 txr->res = bus_alloc_resource_any(dev,
2612                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2613                 if (txr->res == NULL) {
2614                         device_printf(dev,
2615                             "Unable to allocate bus resource: "
2616                             "TX MSIX Interrupt %d\n", i);
2617                         return (ENXIO);
2618                 }
2619                 if ((error = bus_setup_intr(dev, txr->res,
2620                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2621                     txr, &txr->tag)) != 0) {
2622                         device_printf(dev, "Failed to register TX handler");
2623                         return (error);
2624                 }
2625 #if __FreeBSD_version >= 800504
2626                 bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2627 #endif
2628                 txr->msix = vector++; /* Increment vector for next pass */
2629                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2630                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2631                     taskqueue_thread_enqueue, &txr->tq);
2632                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2633                     device_get_nameunit(adapter->dev));
2634                 /*
2635                 ** Set the bit to enable interrupt
2636                 ** in E1000_IMS -- bits 22 and 23
2637                 ** are for TX0 and TX1, note this has
2638                 ** NOTHING to do with the MSIX vector
2639                 */
2640                 txr->ims = 1 << (22 + i);
2641                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2642         }
2643
2644         /* Link interrupt */
2645         ++rid;
2646         adapter->res = bus_alloc_resource_any(dev,
2647             SYS_RES_IRQ, &rid, RF_ACTIVE);
2648         if (!adapter->res) {
2649                 device_printf(dev,"Unable to allocate "
2650                     "bus resource: Link interrupt [%d]\n", rid);
2651                 return (ENXIO);
2652         }
2653         /* Set the link handler function */
2654         error = bus_setup_intr(dev, adapter->res,
2655             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2656             em_msix_link, adapter, &adapter->tag);
2657         if (error) {
2658                 adapter->res = NULL;
2659                 device_printf(dev, "Failed to register LINK handler");
2660                 return (error);
2661         }
2662 #if __FreeBSD_version >= 800504
2663                 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2664 #endif
2665         adapter->linkvec = vector;
2666         adapter->ivars |=  (8 | vector) << 16;
2667         adapter->ivars |= 0x80000000;
2668
2669         return (0);
2670 }
2671
2672
2673 static void
2674 em_free_pci_resources(struct adapter *adapter)
2675 {
2676         device_t        dev = adapter->dev;
2677         struct tx_ring  *txr;
2678         struct rx_ring  *rxr;
2679         int             rid;
2680
2681
2682         /*
2683         ** Release all the queue interrupt resources:
2684         */
2685         for (int i = 0; i < adapter->num_queues; i++) {
2686                 txr = &adapter->tx_rings[i];
2687                 rxr = &adapter->rx_rings[i];
2688                 /* an early abort? */
2689                 if ((txr == NULL) || (rxr == NULL))
2690                         break;
2691                 rid = txr->msix +1;
2692                 if (txr->tag != NULL) {
2693                         bus_teardown_intr(dev, txr->res, txr->tag);
2694                         txr->tag = NULL;
2695                 }
2696                 if (txr->res != NULL)
2697                         bus_release_resource(dev, SYS_RES_IRQ,
2698                             rid, txr->res);
2699                 rid = rxr->msix +1;
2700                 if (rxr->tag != NULL) {
2701                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2702                         rxr->tag = NULL;
2703                 }
2704                 if (rxr->res != NULL)
2705                         bus_release_resource(dev, SYS_RES_IRQ,
2706                             rid, rxr->res);
2707         }
2708
2709         if (adapter->linkvec) /* we are doing MSIX */
2710                 rid = adapter->linkvec + 1;
2711         else
2712                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2713
2714         if (adapter->tag != NULL) {
2715                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2716                 adapter->tag = NULL;
2717         }
2718
2719         if (adapter->res != NULL)
2720                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2721
2722
2723         if (adapter->msix)
2724                 pci_release_msi(dev);
2725
2726         if (adapter->msix_mem != NULL)
2727                 bus_release_resource(dev, SYS_RES_MEMORY,
2728                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2729
2730         if (adapter->memory != NULL)
2731                 bus_release_resource(dev, SYS_RES_MEMORY,
2732                     PCIR_BAR(0), adapter->memory);
2733
2734         if (adapter->flash != NULL)
2735                 bus_release_resource(dev, SYS_RES_MEMORY,
2736                     EM_FLASH, adapter->flash);
2737 }
2738
2739 /*
2740  * Setup MSI or MSI/X
2741  */
2742 static int
2743 em_setup_msix(struct adapter *adapter)
2744 {
2745         device_t dev = adapter->dev;
2746         int val;
2747
2748         /*
2749         ** Setup MSI/X for Hartwell: tests have shown
2750         ** use of two queues to be unstable, and to
2751         ** provide no great gain anyway, so we simply
2752         ** seperate the interrupts and use a single queue.
2753         */
2754         if ((adapter->hw.mac.type == e1000_82574) &&
2755             (em_enable_msix == TRUE)) {
2756                 /* Map the MSIX BAR */
2757                 int rid = PCIR_BAR(EM_MSIX_BAR);
2758                 adapter->msix_mem = bus_alloc_resource_any(dev,
2759                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2760                 if (adapter->msix_mem == NULL) {
2761                         /* May not be enabled */
2762                         device_printf(adapter->dev,
2763                             "Unable to map MSIX table \n");
2764                         goto msi;
2765                 }
2766                 val = pci_msix_count(dev); 
2767                 /* We only need/want 3 vectors */
2768                 if (val >= 3)
2769                         val = 3;
2770                 else {
2771                         device_printf(adapter->dev,
2772                             "MSIX: insufficient vectors, using MSI\n");
2773                         goto msi;
2774                 }
2775
2776                 if ((pci_alloc_msix(dev, &val) == 0) && (val == 3)) {
2777                         device_printf(adapter->dev,
2778                             "Using MSIX interrupts "
2779                             "with %d vectors\n", val);
2780                         return (val);
2781                 }
2782
2783                 /*
2784                 ** If MSIX alloc failed or provided us with
2785                 ** less than needed, free and fall through to MSI
2786                 */
2787                 pci_release_msi(dev);
2788         }
2789 msi:
2790         if (adapter->msix_mem != NULL) {
2791                 bus_release_resource(dev, SYS_RES_MEMORY,
2792                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2793                 adapter->msix_mem = NULL;
2794         }
2795         val = 1;
2796         if (pci_alloc_msi(dev, &val) == 0) {
2797                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2798                 return (val);
2799         } 
2800         /* Should only happen due to manual configuration */
2801         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2802         return (0);
2803 }
2804
2805
2806 /*********************************************************************
2807  *
2808  *  Initialize the hardware to a configuration
2809  *  as specified by the adapter structure.
2810  *
2811  **********************************************************************/
2812 static void
2813 em_reset(struct adapter *adapter)
2814 {
2815         device_t        dev = adapter->dev;
2816         struct ifnet    *ifp = adapter->ifp;
2817         struct e1000_hw *hw = &adapter->hw;
2818         u16             rx_buffer_size;
2819         u32             pba;
2820
2821         INIT_DEBUGOUT("em_reset: begin");
2822
2823         /* Set up smart power down as default off on newer adapters. */
2824         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2825             hw->mac.type == e1000_82572)) {
2826                 u16 phy_tmp = 0;
2827
2828                 /* Speed up time to link by disabling smart power down. */
2829                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2830                 phy_tmp &= ~IGP02E1000_PM_SPD;
2831                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2832         }
2833
2834         /*
2835          * Packet Buffer Allocation (PBA)
2836          * Writing PBA sets the receive portion of the buffer
2837          * the remainder is used for the transmit buffer.
2838          */
2839         switch (hw->mac.type) {
2840         /* Total Packet Buffer on these is 48K */
2841         case e1000_82571:
2842         case e1000_82572:
2843         case e1000_80003es2lan:
2844                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2845                 break;
2846         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2847                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2848                 break;
2849         case e1000_82574:
2850         case e1000_82583:
2851                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2852                 break;
2853         case e1000_ich8lan:
2854                 pba = E1000_PBA_8K;
2855                 break;
2856         case e1000_ich9lan:
2857         case e1000_ich10lan:
2858                 /* Boost Receive side for jumbo frames */
2859                 if (adapter->hw.mac.max_frame_size > 4096)
2860                         pba = E1000_PBA_14K;
2861                 else
2862                         pba = E1000_PBA_10K;
2863                 break;
2864         case e1000_pchlan:
2865         case e1000_pch2lan:
2866         case e1000_pch_lpt:
2867                 pba = E1000_PBA_26K;
2868                 break;
2869         default:
2870                 if (adapter->hw.mac.max_frame_size > 8192)
2871                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2872                 else
2873                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2874         }
2875         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2876
2877         /*
2878          * These parameters control the automatic generation (Tx) and
2879          * response (Rx) to Ethernet PAUSE frames.
2880          * - High water mark should allow for at least two frames to be
2881          *   received after sending an XOFF.
2882          * - Low water mark works best when it is very near the high water mark.
2883          *   This allows the receiver to restart by sending XON when it has
2884          *   drained a bit. Here we use an arbitary value of 1500 which will
2885          *   restart after one full frame is pulled from the buffer. There
2886          *   could be several smaller frames in the buffer and if so they will
2887          *   not trigger the XON until their total number reduces the buffer
2888          *   by 1500.
2889          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2890          */
2891         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2892         hw->fc.high_water = rx_buffer_size -
2893             roundup2(adapter->hw.mac.max_frame_size, 1024);
2894         hw->fc.low_water = hw->fc.high_water - 1500;
2895
2896         if (adapter->fc) /* locally set flow control value? */
2897                 hw->fc.requested_mode = adapter->fc;
2898         else
2899                 hw->fc.requested_mode = e1000_fc_full;
2900
2901         if (hw->mac.type == e1000_80003es2lan)
2902                 hw->fc.pause_time = 0xFFFF;
2903         else
2904                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2905
2906         hw->fc.send_xon = TRUE;
2907
2908         /* Device specific overrides/settings */
2909         switch (hw->mac.type) {
2910         case e1000_pchlan:
2911                 /* Workaround: no TX flow ctrl for PCH */
2912                 hw->fc.requested_mode = e1000_fc_rx_pause;
2913                 hw->fc.pause_time = 0xFFFF; /* override */
2914                 if (ifp->if_mtu > ETHERMTU) {
2915                         hw->fc.high_water = 0x3500;
2916                         hw->fc.low_water = 0x1500;
2917                 } else {
2918                         hw->fc.high_water = 0x5000;
2919                         hw->fc.low_water = 0x3000;
2920                 }
2921                 hw->fc.refresh_time = 0x1000;
2922                 break;
2923         case e1000_pch2lan:
2924         case e1000_pch_lpt:
2925                 hw->fc.high_water = 0x5C20;
2926                 hw->fc.low_water = 0x5048;
2927                 hw->fc.pause_time = 0x0650;
2928                 hw->fc.refresh_time = 0x0400;
2929                 /* Jumbos need adjusted PBA */
2930                 if (ifp->if_mtu > ETHERMTU)
2931                         E1000_WRITE_REG(hw, E1000_PBA, 12);
2932                 else
2933                         E1000_WRITE_REG(hw, E1000_PBA, 26);
2934                 break;
2935         case e1000_ich9lan:
2936         case e1000_ich10lan:
2937                 if (ifp->if_mtu > ETHERMTU) {
2938                         hw->fc.high_water = 0x2800;
2939                         hw->fc.low_water = hw->fc.high_water - 8;
2940                         break;
2941                 } 
2942                 /* else fall thru */
2943         default:
2944                 if (hw->mac.type == e1000_80003es2lan)
2945                         hw->fc.pause_time = 0xFFFF;
2946                 break;
2947         }
2948
2949         /* Issue a global reset */
2950         e1000_reset_hw(hw);
2951         E1000_WRITE_REG(hw, E1000_WUC, 0);
2952         em_disable_aspm(adapter);
2953         /* and a re-init */
2954         if (e1000_init_hw(hw) < 0) {
2955                 device_printf(dev, "Hardware Initialization Failed\n");
2956                 return;
2957         }
2958
2959         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2960         e1000_get_phy_info(hw);
2961         e1000_check_for_link(hw);
2962         return;
2963 }
2964
2965 /*********************************************************************
2966  *
2967  *  Setup networking device structure and register an interface.
2968  *
2969  **********************************************************************/
2970 static int
2971 em_setup_interface(device_t dev, struct adapter *adapter)
2972 {
2973         struct ifnet   *ifp;
2974
2975         INIT_DEBUGOUT("em_setup_interface: begin");
2976
2977         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2978         if (ifp == NULL) {
2979                 device_printf(dev, "can not allocate ifnet structure\n");
2980                 return (-1);
2981         }
2982         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2983         ifp->if_init =  em_init;
2984         ifp->if_softc = adapter;
2985         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2986         ifp->if_ioctl = em_ioctl;
2987 #ifdef EM_MULTIQUEUE
2988         /* Multiqueue stack interface */
2989         ifp->if_transmit = em_mq_start;
2990         ifp->if_qflush = em_qflush;
2991 #else
2992         ifp->if_start = em_start;
2993         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2994         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2995         IFQ_SET_READY(&ifp->if_snd);
2996 #endif  
2997
2998         ether_ifattach(ifp, adapter->hw.mac.addr);
2999
3000         ifp->if_capabilities = ifp->if_capenable = 0;
3001
3002
3003         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3004         ifp->if_capabilities |= IFCAP_TSO4;
3005         /*
3006          * Tell the upper layer(s) we
3007          * support full VLAN capability
3008          */
3009         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3010         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3011                              |  IFCAP_VLAN_HWTSO
3012                              |  IFCAP_VLAN_MTU;
3013         ifp->if_capenable = ifp->if_capabilities;
3014
3015         /*
3016         ** Don't turn this on by default, if vlans are
3017         ** created on another pseudo device (eg. lagg)
3018         ** then vlan events are not passed thru, breaking
3019         ** operation, but with HW FILTER off it works. If
3020         ** using vlans directly on the em driver you can
3021         ** enable this and get full hardware tag filtering.
3022         */
3023         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3024
3025 #ifdef DEVICE_POLLING
3026         ifp->if_capabilities |= IFCAP_POLLING;
3027 #endif
3028
3029         /* Enable only WOL MAGIC by default */
3030         if (adapter->wol) {
3031                 ifp->if_capabilities |= IFCAP_WOL;
3032                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
3033         }
3034                 
3035         /*
3036          * Specify the media types supported by this adapter and register
3037          * callbacks to update media and link information
3038          */
3039         ifmedia_init(&adapter->media, IFM_IMASK,
3040             em_media_change, em_media_status);
3041         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3042             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3043                 u_char fiber_type = IFM_1000_SX;        /* default type */
3044
3045                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
3046                             0, NULL);
3047                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3048         } else {
3049                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3050                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3051                             0, NULL);
3052                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3053                             0, NULL);
3054                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3055                             0, NULL);
3056                 if (adapter->hw.phy.type != e1000_phy_ife) {
3057                         ifmedia_add(&adapter->media,
3058                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3059                         ifmedia_add(&adapter->media,
3060                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3061                 }
3062         }
3063         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3064         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3065         return (0);
3066 }
3067
3068
3069 /*
3070  * Manage DMA'able memory.
3071  */
3072 static void
3073 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3074 {
3075         if (error)
3076                 return;
3077         *(bus_addr_t *) arg = segs[0].ds_addr;
3078 }
3079
3080 static int
3081 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3082         struct em_dma_alloc *dma, int mapflags)
3083 {
3084         int error;
3085
3086         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3087                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3088                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3089                                 BUS_SPACE_MAXADDR,      /* highaddr */
3090                                 NULL, NULL,             /* filter, filterarg */
3091                                 size,                   /* maxsize */
3092                                 1,                      /* nsegments */
3093                                 size,                   /* maxsegsize */
3094                                 0,                      /* flags */
3095                                 NULL,                   /* lockfunc */
3096                                 NULL,                   /* lockarg */
3097                                 &dma->dma_tag);
3098         if (error) {
3099                 device_printf(adapter->dev,
3100                     "%s: bus_dma_tag_create failed: %d\n",
3101                     __func__, error);
3102                 goto fail_0;
3103         }
3104
3105         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3106             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3107         if (error) {
3108                 device_printf(adapter->dev,
3109                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3110                     __func__, (uintmax_t)size, error);
3111                 goto fail_2;
3112         }
3113
3114         dma->dma_paddr = 0;
3115         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3116             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3117         if (error || dma->dma_paddr == 0) {
3118                 device_printf(adapter->dev,
3119                     "%s: bus_dmamap_load failed: %d\n",
3120                     __func__, error);
3121                 goto fail_3;
3122         }
3123
3124         return (0);
3125
3126 fail_3:
3127         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3128 fail_2:
3129         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3130         bus_dma_tag_destroy(dma->dma_tag);
3131 fail_0:
3132         dma->dma_map = NULL;
3133         dma->dma_tag = NULL;
3134
3135         return (error);
3136 }
3137
3138 static void
3139 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3140 {
3141         if (dma->dma_tag == NULL)
3142                 return;
3143         if (dma->dma_map != NULL) {
3144                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3145                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3146                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3147                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3148                 dma->dma_map = NULL;
3149         }
3150         bus_dma_tag_destroy(dma->dma_tag);
3151         dma->dma_tag = NULL;
3152 }
3153
3154
3155 /*********************************************************************
3156  *
3157  *  Allocate memory for the transmit and receive rings, and then
3158  *  the descriptors associated with each, called only once at attach.
3159  *
3160  **********************************************************************/
3161 static int
3162 em_allocate_queues(struct adapter *adapter)
3163 {
3164         device_t                dev = adapter->dev;
3165         struct tx_ring          *txr = NULL;
3166         struct rx_ring          *rxr = NULL;
3167         int rsize, tsize, error = E1000_SUCCESS;
3168         int txconf = 0, rxconf = 0;
3169
3170
3171         /* Allocate the TX ring struct memory */
3172         if (!(adapter->tx_rings =
3173             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3174             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3175                 device_printf(dev, "Unable to allocate TX ring memory\n");
3176                 error = ENOMEM;
3177                 goto fail;
3178         }
3179
3180         /* Now allocate the RX */
3181         if (!(adapter->rx_rings =
3182             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3183             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3184                 device_printf(dev, "Unable to allocate RX ring memory\n");
3185                 error = ENOMEM;
3186                 goto rx_fail;
3187         }
3188
3189         tsize = roundup2(adapter->num_tx_desc *
3190             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3191         /*
3192          * Now set up the TX queues, txconf is needed to handle the
3193          * possibility that things fail midcourse and we need to
3194          * undo memory gracefully
3195          */ 
3196         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3197                 /* Set up some basics */
3198                 txr = &adapter->tx_rings[i];
3199                 txr->adapter = adapter;
3200                 txr->me = i;
3201
3202                 /* Initialize the TX lock */
3203                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3204                     device_get_nameunit(dev), txr->me);
3205                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3206
3207                 if (em_dma_malloc(adapter, tsize,
3208                         &txr->txdma, BUS_DMA_NOWAIT)) {
3209                         device_printf(dev,
3210                             "Unable to allocate TX Descriptor memory\n");
3211                         error = ENOMEM;
3212                         goto err_tx_desc;
3213                 }
3214                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3215                 bzero((void *)txr->tx_base, tsize);
3216
3217                 if (em_allocate_transmit_buffers(txr)) {
3218                         device_printf(dev,
3219                             "Critical Failure setting up transmit buffers\n");
3220                         error = ENOMEM;
3221                         goto err_tx_desc;
3222                 }
3223 #if __FreeBSD_version >= 800000
3224                 /* Allocate a buf ring */
3225                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3226                     M_WAITOK, &txr->tx_mtx);
3227 #endif
3228         }
3229
3230         /*
3231          * Next the RX queues...
3232          */ 
3233         rsize = roundup2(adapter->num_rx_desc *
3234             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3235         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3236                 rxr = &adapter->rx_rings[i];
3237                 rxr->adapter = adapter;
3238                 rxr->me = i;
3239
3240                 /* Initialize the RX lock */
3241                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3242                     device_get_nameunit(dev), txr->me);
3243                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3244
3245                 if (em_dma_malloc(adapter, rsize,
3246                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3247                         device_printf(dev,
3248                             "Unable to allocate RxDescriptor memory\n");
3249                         error = ENOMEM;
3250                         goto err_rx_desc;
3251                 }
3252                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3253                 bzero((void *)rxr->rx_base, rsize);
3254
3255                 /* Allocate receive buffers for the ring*/
3256                 if (em_allocate_receive_buffers(rxr)) {
3257                         device_printf(dev,
3258                             "Critical Failure setting up receive buffers\n");
3259                         error = ENOMEM;
3260                         goto err_rx_desc;
3261                 }
3262         }
3263
3264         return (0);
3265
3266 err_rx_desc:
3267         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3268                 em_dma_free(adapter, &rxr->rxdma);
3269 err_tx_desc:
3270         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3271                 em_dma_free(adapter, &txr->txdma);
3272         free(adapter->rx_rings, M_DEVBUF);
3273 rx_fail:
3274 #if __FreeBSD_version >= 800000
3275         buf_ring_free(txr->br, M_DEVBUF);
3276 #endif
3277         free(adapter->tx_rings, M_DEVBUF);
3278 fail:
3279         return (error);
3280 }
3281
3282
3283 /*********************************************************************
3284  *
3285  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3286  *  the information needed to transmit a packet on the wire. This is
3287  *  called only once at attach, setup is done every reset.
3288  *
3289  **********************************************************************/
3290 static int
3291 em_allocate_transmit_buffers(struct tx_ring *txr)
3292 {
3293         struct adapter *adapter = txr->adapter;
3294         device_t dev = adapter->dev;
3295         struct em_buffer *txbuf;
3296         int error, i;
3297
3298         /*
3299          * Setup DMA descriptor areas.
3300          */
3301         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3302                                1, 0,                    /* alignment, bounds */
3303                                BUS_SPACE_MAXADDR,       /* lowaddr */
3304                                BUS_SPACE_MAXADDR,       /* highaddr */
3305                                NULL, NULL,              /* filter, filterarg */
3306                                EM_TSO_SIZE,             /* maxsize */
3307                                EM_MAX_SCATTER,          /* nsegments */
3308                                PAGE_SIZE,               /* maxsegsize */
3309                                0,                       /* flags */
3310                                NULL,                    /* lockfunc */
3311                                NULL,                    /* lockfuncarg */
3312                                &txr->txtag))) {
3313                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3314                 goto fail;
3315         }
3316
3317         if (!(txr->tx_buffers =
3318             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3319             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3320                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3321                 error = ENOMEM;
3322                 goto fail;
3323         }
3324
3325         /* Create the descriptor buffer dma maps */
3326         txbuf = txr->tx_buffers;
3327         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3328                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3329                 if (error != 0) {
3330                         device_printf(dev, "Unable to create TX DMA map\n");
3331                         goto fail;
3332                 }
3333         }
3334
3335         return 0;
3336 fail:
3337         /* We free all, it handles case where we are in the middle */
3338         em_free_transmit_structures(adapter);
3339         return (error);
3340 }
3341
3342 /*********************************************************************
3343  *
3344  *  Initialize a transmit ring.
3345  *
3346  **********************************************************************/
3347 static void
3348 em_setup_transmit_ring(struct tx_ring *txr)
3349 {
3350         struct adapter *adapter = txr->adapter;
3351         struct em_buffer *txbuf;
3352         int i;
3353 #ifdef DEV_NETMAP
3354         struct netmap_adapter *na = NA(adapter->ifp);
3355         struct netmap_slot *slot;
3356 #endif /* DEV_NETMAP */
3357
3358         /* Clear the old descriptor contents */
3359         EM_TX_LOCK(txr);
3360 #ifdef DEV_NETMAP
3361         slot = netmap_reset(na, NR_TX, txr->me, 0);
3362 #endif /* DEV_NETMAP */
3363
3364         bzero((void *)txr->tx_base,
3365               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3366         /* Reset indices */
3367         txr->next_avail_desc = 0;
3368         txr->next_to_clean = 0;
3369
3370         /* Free any existing tx buffers. */
3371         txbuf = txr->tx_buffers;
3372         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3373                 if (txbuf->m_head != NULL) {
3374                         bus_dmamap_sync(txr->txtag, txbuf->map,
3375                             BUS_DMASYNC_POSTWRITE);
3376                         bus_dmamap_unload(txr->txtag, txbuf->map);
3377                         m_freem(txbuf->m_head);
3378                         txbuf->m_head = NULL;
3379                 }
3380 #ifdef DEV_NETMAP
3381                 if (slot) {
3382                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3383                         uint64_t paddr;
3384                         void *addr;
3385
3386                         addr = PNMB(slot + si, &paddr);
3387                         txr->tx_base[i].buffer_addr = htole64(paddr);
3388                         /* reload the map for netmap mode */
3389                         netmap_load_map(txr->txtag, txbuf->map, addr);
3390                 }
3391 #endif /* DEV_NETMAP */
3392
3393                 /* clear the watch index */
3394                 txbuf->next_eop = -1;
3395         }
3396
3397         /* Set number of descriptors available */
3398         txr->tx_avail = adapter->num_tx_desc;
3399         txr->queue_status = EM_QUEUE_IDLE;
3400
3401         /* Clear checksum offload context. */
3402         txr->last_hw_offload = 0;
3403         txr->last_hw_ipcss = 0;
3404         txr->last_hw_ipcso = 0;
3405         txr->last_hw_tucss = 0;
3406         txr->last_hw_tucso = 0;
3407
3408         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3409             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3410         EM_TX_UNLOCK(txr);
3411 }
3412
3413 /*********************************************************************
3414  *
3415  *  Initialize all transmit rings.
3416  *
3417  **********************************************************************/
3418 static void
3419 em_setup_transmit_structures(struct adapter *adapter)
3420 {
3421         struct tx_ring *txr = adapter->tx_rings;
3422
3423         for (int i = 0; i < adapter->num_queues; i++, txr++)
3424                 em_setup_transmit_ring(txr);
3425
3426         return;
3427 }
3428
3429 /*********************************************************************
3430  *
3431  *  Enable transmit unit.
3432  *
3433  **********************************************************************/
3434 static void
3435 em_initialize_transmit_unit(struct adapter *adapter)
3436 {
3437         struct tx_ring  *txr = adapter->tx_rings;
3438         struct e1000_hw *hw = &adapter->hw;
3439         u32     tctl, tarc, tipg = 0;
3440
3441          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3442
3443         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3444                 u64 bus_addr = txr->txdma.dma_paddr;
3445                 /* Base and Len of TX Ring */
3446                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3447                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3448                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3449                     (u32)(bus_addr >> 32));
3450                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3451                     (u32)bus_addr);
3452                 /* Init the HEAD/TAIL indices */
3453                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3454                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3455
3456                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3457                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3458                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3459
3460                 txr->queue_status = EM_QUEUE_IDLE;
3461         }
3462
3463         /* Set the default values for the Tx Inter Packet Gap timer */
3464         switch (adapter->hw.mac.type) {
3465         case e1000_80003es2lan:
3466                 tipg = DEFAULT_82543_TIPG_IPGR1;
3467                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3468                     E1000_TIPG_IPGR2_SHIFT;
3469                 break;
3470         default:
3471                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3472                     (adapter->hw.phy.media_type ==
3473                     e1000_media_type_internal_serdes))
3474                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3475                 else
3476                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3477                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3478                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3479         }
3480
3481         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3482         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3483
3484         if(adapter->hw.mac.type >= e1000_82540)
3485                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3486                     adapter->tx_abs_int_delay.value);
3487
3488         if ((adapter->hw.mac.type == e1000_82571) ||
3489             (adapter->hw.mac.type == e1000_82572)) {
3490                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3491                 tarc |= SPEED_MODE_BIT;
3492                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3493         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3494                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3495                 tarc |= 1;
3496                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3497                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3498                 tarc |= 1;
3499                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3500         }
3501
3502         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3503         if (adapter->tx_int_delay.value > 0)
3504                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3505
3506         /* Program the Transmit Control Register */
3507         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3508         tctl &= ~E1000_TCTL_CT;
3509         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3510                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3511
3512         if (adapter->hw.mac.type >= e1000_82571)
3513                 tctl |= E1000_TCTL_MULR;
3514
3515         /* This write will effectively turn on the transmit unit. */
3516         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3517
3518 }
3519
3520
3521 /*********************************************************************
3522  *
3523  *  Free all transmit rings.
3524  *
3525  **********************************************************************/
3526 static void
3527 em_free_transmit_structures(struct adapter *adapter)
3528 {
3529         struct tx_ring *txr = adapter->tx_rings;
3530
3531         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3532                 EM_TX_LOCK(txr);
3533                 em_free_transmit_buffers(txr);
3534                 em_dma_free(adapter, &txr->txdma);
3535                 EM_TX_UNLOCK(txr);
3536                 EM_TX_LOCK_DESTROY(txr);
3537         }
3538
3539         free(adapter->tx_rings, M_DEVBUF);
3540 }
3541
3542 /*********************************************************************
3543  *
3544  *  Free transmit ring related data structures.
3545  *
3546  **********************************************************************/
3547 static void
3548 em_free_transmit_buffers(struct tx_ring *txr)
3549 {
3550         struct adapter          *adapter = txr->adapter;
3551         struct em_buffer        *txbuf;
3552
3553         INIT_DEBUGOUT("free_transmit_ring: begin");
3554
3555         if (txr->tx_buffers == NULL)
3556                 return;
3557
3558         for (int i = 0; i < adapter->num_tx_desc; i++) {
3559                 txbuf = &txr->tx_buffers[i];
3560                 if (txbuf->m_head != NULL) {
3561                         bus_dmamap_sync(txr->txtag, txbuf->map,
3562                             BUS_DMASYNC_POSTWRITE);
3563                         bus_dmamap_unload(txr->txtag,
3564                             txbuf->map);
3565                         m_freem(txbuf->m_head);
3566                         txbuf->m_head = NULL;
3567                         if (txbuf->map != NULL) {
3568                                 bus_dmamap_destroy(txr->txtag,
3569                                     txbuf->map);
3570                                 txbuf->map = NULL;
3571                         }
3572                 } else if (txbuf->map != NULL) {
3573                         bus_dmamap_unload(txr->txtag,
3574                             txbuf->map);
3575                         bus_dmamap_destroy(txr->txtag,
3576                             txbuf->map);
3577                         txbuf->map = NULL;
3578                 }
3579         }
3580 #if __FreeBSD_version >= 800000
3581         if (txr->br != NULL)
3582                 buf_ring_free(txr->br, M_DEVBUF);
3583 #endif
3584         if (txr->tx_buffers != NULL) {
3585                 free(txr->tx_buffers, M_DEVBUF);
3586                 txr->tx_buffers = NULL;
3587         }
3588         if (txr->txtag != NULL) {
3589                 bus_dma_tag_destroy(txr->txtag);
3590                 txr->txtag = NULL;
3591         }
3592         return;
3593 }
3594
3595
3596 /*********************************************************************
3597  *  The offload context is protocol specific (TCP/UDP) and thus
3598  *  only needs to be set when the protocol changes. The occasion
3599  *  of a context change can be a performance detriment, and
3600  *  might be better just disabled. The reason arises in the way
3601  *  in which the controller supports pipelined requests from the
3602  *  Tx data DMA. Up to four requests can be pipelined, and they may
3603  *  belong to the same packet or to multiple packets. However all
3604  *  requests for one packet are issued before a request is issued
3605  *  for a subsequent packet and if a request for the next packet
3606  *  requires a context change, that request will be stalled
3607  *  until the previous request completes. This means setting up
3608  *  a new context effectively disables pipelined Tx data DMA which
3609  *  in turn greatly slow down performance to send small sized
3610  *  frames. 
3611  **********************************************************************/
3612 static void
3613 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3614     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3615 {
3616         struct adapter                  *adapter = txr->adapter;
3617         struct e1000_context_desc       *TXD = NULL;
3618         struct em_buffer                *tx_buffer;
3619         int                             cur, hdr_len;
3620         u32                             cmd = 0;
3621         u16                             offload = 0;
3622         u8                              ipcso, ipcss, tucso, tucss;
3623
3624         ipcss = ipcso = tucss = tucso = 0;
3625         hdr_len = ip_off + (ip->ip_hl << 2);
3626         cur = txr->next_avail_desc;
3627
3628         /* Setup of IP header checksum. */
3629         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3630                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3631                 offload |= CSUM_IP;
3632                 ipcss = ip_off;
3633                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3634                 /*
3635                  * Start offset for header checksum calculation.
3636                  * End offset for header checksum calculation.
3637                  * Offset of place to put the checksum.
3638                  */
3639                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3640                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3641                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3642                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3643                 cmd |= E1000_TXD_CMD_IP;
3644         }
3645
3646         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3647                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3648                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3649                 offload |= CSUM_TCP;
3650                 tucss = hdr_len;
3651                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3652                 /*
3653                  * Setting up new checksum offload context for every frames
3654                  * takes a lot of processing time for hardware. This also
3655                  * reduces performance a lot for small sized frames so avoid
3656                  * it if driver can use previously configured checksum
3657                  * offload context.
3658                  */
3659                 if (txr->last_hw_offload == offload) {
3660                         if (offload & CSUM_IP) {
3661                                 if (txr->last_hw_ipcss == ipcss &&
3662                                     txr->last_hw_ipcso == ipcso &&
3663                                     txr->last_hw_tucss == tucss &&
3664                                     txr->last_hw_tucso == tucso)
3665                                         return;
3666                         } else {
3667                                 if (txr->last_hw_tucss == tucss &&
3668                                     txr->last_hw_tucso == tucso)
3669                                         return;
3670                         }
3671                 }
3672                 txr->last_hw_offload = offload;
3673                 txr->last_hw_tucss = tucss;
3674                 txr->last_hw_tucso = tucso;
3675                 /*
3676                  * Start offset for payload checksum calculation.
3677                  * End offset for payload checksum calculation.
3678                  * Offset of place to put the checksum.
3679                  */
3680                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3681                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3682                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3683                 TXD->upper_setup.tcp_fields.tucso = tucso;
3684                 cmd |= E1000_TXD_CMD_TCP;
3685         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3686                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3687                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3688                 tucss = hdr_len;
3689                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3690                 /*
3691                  * Setting up new checksum offload context for every frames
3692                  * takes a lot of processing time for hardware. This also
3693                  * reduces performance a lot for small sized frames so avoid
3694                  * it if driver can use previously configured checksum
3695                  * offload context.
3696                  */
3697                 if (txr->last_hw_offload == offload) {
3698                         if (offload & CSUM_IP) {
3699                                 if (txr->last_hw_ipcss == ipcss &&
3700                                     txr->last_hw_ipcso == ipcso &&
3701                                     txr->last_hw_tucss == tucss &&
3702                                     txr->last_hw_tucso == tucso)
3703                                         return;
3704                         } else {
3705                                 if (txr->last_hw_tucss == tucss &&
3706                                     txr->last_hw_tucso == tucso)
3707                                         return;
3708                         }
3709                 }
3710                 txr->last_hw_offload = offload;
3711                 txr->last_hw_tucss = tucss;
3712                 txr->last_hw_tucso = tucso;
3713                 /*
3714                  * Start offset for header checksum calculation.
3715                  * End offset for header checksum calculation.
3716                  * Offset of place to put the checksum.
3717                  */
3718                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3719                 TXD->upper_setup.tcp_fields.tucss = tucss;
3720                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3721                 TXD->upper_setup.tcp_fields.tucso = tucso;
3722         }
3723   
3724         if (offload & CSUM_IP) {
3725                 txr->last_hw_ipcss = ipcss;
3726                 txr->last_hw_ipcso = ipcso;
3727         }
3728
3729         TXD->tcp_seg_setup.data = htole32(0);
3730         TXD->cmd_and_length =
3731             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3732         tx_buffer = &txr->tx_buffers[cur];
3733         tx_buffer->m_head = NULL;
3734         tx_buffer->next_eop = -1;
3735
3736         if (++cur == adapter->num_tx_desc)
3737                 cur = 0;
3738
3739         txr->tx_avail--;
3740         txr->next_avail_desc = cur;
3741 }
3742
3743
3744 /**********************************************************************
3745  *
3746  *  Setup work for hardware segmentation offload (TSO)
3747  *
3748  **********************************************************************/
3749 static void
3750 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3751     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3752 {
3753         struct adapter                  *adapter = txr->adapter;
3754         struct e1000_context_desc       *TXD;
3755         struct em_buffer                *tx_buffer;
3756         int cur, hdr_len;
3757
3758         /*
3759          * In theory we can use the same TSO context if and only if
3760          * frame is the same type(IP/TCP) and the same MSS. However
3761          * checking whether a frame has the same IP/TCP structure is
3762          * hard thing so just ignore that and always restablish a
3763          * new TSO context.
3764          */
3765         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3766         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3767                       E1000_TXD_DTYP_D |        /* Data descr type */
3768                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3769
3770         /* IP and/or TCP header checksum calculation and insertion. */
3771         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3772
3773         cur = txr->next_avail_desc;
3774         tx_buffer = &txr->tx_buffers[cur];
3775         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3776
3777         /*
3778          * Start offset for header checksum calculation.
3779          * End offset for header checksum calculation.
3780          * Offset of place put the checksum.
3781          */
3782         TXD->lower_setup.ip_fields.ipcss = ip_off;
3783         TXD->lower_setup.ip_fields.ipcse =
3784             htole16(ip_off + (ip->ip_hl << 2) - 1);
3785         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3786         /*
3787          * Start offset for payload checksum calculation.
3788          * End offset for payload checksum calculation.
3789          * Offset of place to put the checksum.
3790          */
3791         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3792         TXD->upper_setup.tcp_fields.tucse = 0;
3793         TXD->upper_setup.tcp_fields.tucso =
3794             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3795         /*
3796          * Payload size per packet w/o any headers.
3797          * Length of all headers up to payload.
3798          */
3799         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3800         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3801
3802         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3803                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3804                                 E1000_TXD_CMD_TSE |     /* TSE context */
3805                                 E1000_TXD_CMD_IP |      /* Do IP csum */
3806                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3807                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3808
3809         tx_buffer->m_head = NULL;
3810         tx_buffer->next_eop = -1;
3811
3812         if (++cur == adapter->num_tx_desc)
3813                 cur = 0;
3814
3815         txr->tx_avail--;
3816         txr->next_avail_desc = cur;
3817         txr->tx_tso = TRUE;
3818 }
3819
3820
3821 /**********************************************************************
3822  *
3823  *  Examine each tx_buffer in the used queue. If the hardware is done
3824  *  processing the packet then free associated resources. The
3825  *  tx_buffer is put back on the free queue.
3826  *
3827  **********************************************************************/
3828 static void
3829 em_txeof(struct tx_ring *txr)
3830 {
3831         struct adapter  *adapter = txr->adapter;
3832         int first, last, done, processed;
3833         struct em_buffer *tx_buffer;
3834         struct e1000_tx_desc   *tx_desc, *eop_desc;
3835         struct ifnet   *ifp = adapter->ifp;
3836
3837         EM_TX_LOCK_ASSERT(txr);
3838 #ifdef DEV_NETMAP
3839         if (netmap_tx_irq(ifp, txr->me))
3840                 return;
3841 #endif /* DEV_NETMAP */
3842
3843         /* No work, make sure watchdog is off */
3844         if (txr->tx_avail == adapter->num_tx_desc) {
3845                 txr->queue_status = EM_QUEUE_IDLE;
3846                 return;
3847         }
3848
3849         processed = 0;
3850         first = txr->next_to_clean;
3851         tx_desc = &txr->tx_base[first];
3852         tx_buffer = &txr->tx_buffers[first];
3853         last = tx_buffer->next_eop;
3854         eop_desc = &txr->tx_base[last];
3855
3856         /*
3857          * What this does is get the index of the
3858          * first descriptor AFTER the EOP of the 
3859          * first packet, that way we can do the
3860          * simple comparison on the inner while loop.
3861          */
3862         if (++last == adapter->num_tx_desc)
3863                 last = 0;
3864         done = last;
3865
3866         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3867             BUS_DMASYNC_POSTREAD);
3868
3869         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3870                 /* We clean the range of the packet */
3871                 while (first != done) {
3872                         tx_desc->upper.data = 0;
3873                         tx_desc->lower.data = 0;
3874                         tx_desc->buffer_addr = 0;
3875                         ++txr->tx_avail;
3876                         ++processed;
3877
3878                         if (tx_buffer->m_head) {
3879                                 bus_dmamap_sync(txr->txtag,
3880                                     tx_buffer->map,
3881                                     BUS_DMASYNC_POSTWRITE);
3882                                 bus_dmamap_unload(txr->txtag,
3883                                     tx_buffer->map);
3884                                 m_freem(tx_buffer->m_head);
3885                                 tx_buffer->m_head = NULL;
3886                         }
3887                         tx_buffer->next_eop = -1;
3888                         txr->watchdog_time = ticks;
3889
3890                         if (++first == adapter->num_tx_desc)
3891                                 first = 0;
3892
3893                         tx_buffer = &txr->tx_buffers[first];
3894                         tx_desc = &txr->tx_base[first];
3895                 }
3896                 ++ifp->if_opackets;
3897                 /* See if we can continue to the next packet */
3898                 last = tx_buffer->next_eop;
3899                 if (last != -1) {
3900                         eop_desc = &txr->tx_base[last];
3901                         /* Get new done point */
3902                         if (++last == adapter->num_tx_desc) last = 0;
3903                         done = last;
3904                 } else
3905                         break;
3906         }
3907         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3908             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3909
3910         txr->next_to_clean = first;
3911
3912         /*
3913         ** Watchdog calculation, we know there's
3914         ** work outstanding or the first return
3915         ** would have been taken, so none processed
3916         ** for too long indicates a hang. local timer
3917         ** will examine this and do a reset if needed.
3918         */
3919         if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3920                 txr->queue_status = EM_QUEUE_HUNG;
3921
3922         /*
3923          * If we have a minimum free, clear IFF_DRV_OACTIVE
3924          * to tell the stack that it is OK to send packets.
3925          * Notice that all writes of OACTIVE happen under the
3926          * TX lock which, with a single queue, guarantees 
3927          * sanity.
3928          */
3929         if (txr->tx_avail >= EM_MAX_SCATTER)
3930                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3931
3932         /* Disable watchdog if all clean */
3933         if (txr->tx_avail == adapter->num_tx_desc) {
3934                 txr->queue_status = EM_QUEUE_IDLE;
3935         } 
3936 }
3937
3938
3939 /*********************************************************************
3940  *
3941  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3942  *
3943  **********************************************************************/
3944 static void
3945 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3946 {
3947         struct adapter          *adapter = rxr->adapter;
3948         struct mbuf             *m;
3949         bus_dma_segment_t       segs[1];
3950         struct em_buffer        *rxbuf;
3951         int                     i, j, error, nsegs;
3952         bool                    cleaned = FALSE;
3953
3954         i = j = rxr->next_to_refresh;
3955         /*
3956         ** Get one descriptor beyond
3957         ** our work mark to control
3958         ** the loop.
3959         */
3960         if (++j == adapter->num_rx_desc)
3961                 j = 0;
3962
3963         while (j != limit) {
3964                 rxbuf = &rxr->rx_buffers[i];
3965                 if (rxbuf->m_head == NULL) {
3966                         m = m_getjcl(M_NOWAIT, MT_DATA,
3967                             M_PKTHDR, adapter->rx_mbuf_sz);
3968                         /*
3969                         ** If we have a temporary resource shortage
3970                         ** that causes a failure, just abort refresh
3971                         ** for now, we will return to this point when
3972                         ** reinvoked from em_rxeof.
3973                         */
3974                         if (m == NULL)
3975                                 goto update;
3976                 } else
3977                         m = rxbuf->m_head;
3978
3979                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3980                 m->m_flags |= M_PKTHDR;
3981                 m->m_data = m->m_ext.ext_buf;
3982
3983                 /* Use bus_dma machinery to setup the memory mapping  */
3984                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3985                     m, segs, &nsegs, BUS_DMA_NOWAIT);
3986                 if (error != 0) {
3987                         printf("Refresh mbufs: hdr dmamap load"
3988                             " failure - %d\n", error);
3989                         m_free(m);
3990                         rxbuf->m_head = NULL;
3991                         goto update;
3992                 }
3993                 rxbuf->m_head = m;
3994                 bus_dmamap_sync(rxr->rxtag,
3995                     rxbuf->map, BUS_DMASYNC_PREREAD);
3996                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3997                 cleaned = TRUE;
3998
3999                 i = j; /* Next is precalulated for us */
4000                 rxr->next_to_refresh = i;
4001                 /* Calculate next controlling index */
4002                 if (++j == adapter->num_rx_desc)
4003                         j = 0;
4004         }
4005 update:
4006         /*
4007         ** Update the tail pointer only if,
4008         ** and as far as we have refreshed.
4009         */
4010         if (cleaned)
4011                 E1000_WRITE_REG(&adapter->hw,
4012                     E1000_RDT(rxr->me), rxr->next_to_refresh);
4013
4014         return;
4015 }
4016
4017
4018 /*********************************************************************
4019  *
4020  *  Allocate memory for rx_buffer structures. Since we use one
4021  *  rx_buffer per received packet, the maximum number of rx_buffer's
4022  *  that we'll need is equal to the number of receive descriptors
4023  *  that we've allocated.
4024  *
4025  **********************************************************************/
4026 static int
4027 em_allocate_receive_buffers(struct rx_ring *rxr)
4028 {
4029         struct adapter          *adapter = rxr->adapter;
4030         device_t                dev = adapter->dev;
4031         struct em_buffer        *rxbuf;
4032         int                     error;
4033
4034         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4035             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4036         if (rxr->rx_buffers == NULL) {
4037                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4038                 return (ENOMEM);
4039         }
4040
4041         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4042                                 1, 0,                   /* alignment, bounds */
4043                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4044                                 BUS_SPACE_MAXADDR,      /* highaddr */
4045                                 NULL, NULL,             /* filter, filterarg */
4046                                 MJUM9BYTES,             /* maxsize */
4047                                 1,                      /* nsegments */
4048                                 MJUM9BYTES,             /* maxsegsize */
4049                                 0,                      /* flags */
4050                                 NULL,                   /* lockfunc */
4051                                 NULL,                   /* lockarg */
4052                                 &rxr->rxtag);
4053         if (error) {
4054                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4055                     __func__, error);
4056                 goto fail;
4057         }
4058
4059         rxbuf = rxr->rx_buffers;
4060         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4061                 rxbuf = &rxr->rx_buffers[i];
4062                 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4063                 if (error) {
4064                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4065                             __func__, error);
4066                         goto fail;
4067                 }
4068         }
4069
4070         return (0);
4071
4072 fail:
4073         em_free_receive_structures(adapter);
4074         return (error);
4075 }
4076
4077
4078 /*********************************************************************
4079  *
4080  *  Initialize a receive ring and its buffers.
4081  *
4082  **********************************************************************/
4083 static int
4084 em_setup_receive_ring(struct rx_ring *rxr)
4085 {
4086         struct  adapter         *adapter = rxr->adapter;
4087         struct em_buffer        *rxbuf;
4088         bus_dma_segment_t       seg[1];
4089         int                     rsize, nsegs, error = 0;
4090 #ifdef DEV_NETMAP
4091         struct netmap_adapter *na = NA(adapter->ifp);
4092         struct netmap_slot *slot;
4093 #endif
4094
4095
4096         /* Clear the ring contents */
4097         EM_RX_LOCK(rxr);
4098         rsize = roundup2(adapter->num_rx_desc *
4099             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4100         bzero((void *)rxr->rx_base, rsize);
4101 #ifdef DEV_NETMAP
4102         slot = netmap_reset(na, NR_RX, rxr->me, 0);
4103 #endif
4104
4105         /*
4106         ** Free current RX buffer structs and their mbufs
4107         */
4108         for (int i = 0; i < adapter->num_rx_desc; i++) {
4109                 rxbuf = &rxr->rx_buffers[i];
4110                 if (rxbuf->m_head != NULL) {
4111                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4112                             BUS_DMASYNC_POSTREAD);
4113                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4114                         m_freem(rxbuf->m_head);
4115                         rxbuf->m_head = NULL; /* mark as freed */
4116                 }
4117         }
4118
4119         /* Now replenish the mbufs */
4120         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4121                 rxbuf = &rxr->rx_buffers[j];
4122 #ifdef DEV_NETMAP
4123                 if (slot) {
4124                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4125                         uint64_t paddr;
4126                         void *addr;
4127
4128                         addr = PNMB(slot + si, &paddr);
4129                         netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4130                         /* Update descriptor */
4131                         rxr->rx_base[j].buffer_addr = htole64(paddr);
4132                         continue;
4133                 }
4134 #endif /* DEV_NETMAP */
4135                 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4136                     M_PKTHDR, adapter->rx_mbuf_sz);
4137                 if (rxbuf->m_head == NULL) {
4138                         error = ENOBUFS;
4139                         goto fail;
4140                 }
4141                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4142                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4143                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4144
4145                 /* Get the memory mapping */
4146                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4147                     rxbuf->map, rxbuf->m_head, seg,
4148                     &nsegs, BUS_DMA_NOWAIT);
4149                 if (error != 0) {
4150                         m_freem(rxbuf->m_head);
4151                         rxbuf->m_head = NULL;
4152                         goto fail;
4153                 }
4154                 bus_dmamap_sync(rxr->rxtag,
4155                     rxbuf->map, BUS_DMASYNC_PREREAD);
4156
4157                 /* Update descriptor */
4158                 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4159         }
4160         rxr->next_to_check = 0;
4161         rxr->next_to_refresh = 0;
4162         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4163             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4164
4165 fail:
4166         EM_RX_UNLOCK(rxr);
4167         return (error);
4168 }
4169
4170 /*********************************************************************
4171  *
4172  *  Initialize all receive rings.
4173  *
4174  **********************************************************************/
4175 static int
4176 em_setup_receive_structures(struct adapter *adapter)
4177 {
4178         struct rx_ring *rxr = adapter->rx_rings;
4179         int q;
4180
4181         for (q = 0; q < adapter->num_queues; q++, rxr++)
4182                 if (em_setup_receive_ring(rxr))
4183                         goto fail;
4184
4185         return (0);
4186 fail:
4187         /*
4188          * Free RX buffers allocated so far, we will only handle
4189          * the rings that completed, the failing case will have
4190          * cleaned up for itself. 'q' failed, so its the terminus.
4191          */
4192         for (int i = 0; i < q; ++i) {
4193                 rxr = &adapter->rx_rings[i];
4194                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4195                         struct em_buffer *rxbuf;
4196                         rxbuf = &rxr->rx_buffers[n];
4197                         if (rxbuf->m_head != NULL) {
4198                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4199                                   BUS_DMASYNC_POSTREAD);
4200                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4201                                 m_freem(rxbuf->m_head);
4202                                 rxbuf->m_head = NULL;
4203                         }
4204                 }
4205                 rxr->next_to_check = 0;
4206                 rxr->next_to_refresh = 0;
4207         }
4208
4209         return (ENOBUFS);
4210 }
4211
4212 /*********************************************************************
4213  *
4214  *  Free all receive rings.
4215  *
4216  **********************************************************************/
4217 static void
4218 em_free_receive_structures(struct adapter *adapter)
4219 {
4220         struct rx_ring *rxr = adapter->rx_rings;
4221
4222         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4223                 em_free_receive_buffers(rxr);
4224                 /* Free the ring memory as well */
4225                 em_dma_free(adapter, &rxr->rxdma);
4226                 EM_RX_LOCK_DESTROY(rxr);
4227         }
4228
4229         free(adapter->rx_rings, M_DEVBUF);
4230 }
4231
4232
4233 /*********************************************************************
4234  *
4235  *  Free receive ring data structures
4236  *
4237  **********************************************************************/
4238 static void
4239 em_free_receive_buffers(struct rx_ring *rxr)
4240 {
4241         struct adapter          *adapter = rxr->adapter;
4242         struct em_buffer        *rxbuf = NULL;
4243
4244         INIT_DEBUGOUT("free_receive_buffers: begin");
4245
4246         if (rxr->rx_buffers != NULL) {
4247                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4248                         rxbuf = &rxr->rx_buffers[i];
4249                         if (rxbuf->map != NULL) {
4250                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4251                                     BUS_DMASYNC_POSTREAD);
4252                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4253                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4254                         }
4255                         if (rxbuf->m_head != NULL) {
4256                                 m_freem(rxbuf->m_head);
4257                                 rxbuf->m_head = NULL;
4258                         }
4259                 }
4260                 free(rxr->rx_buffers, M_DEVBUF);
4261                 rxr->rx_buffers = NULL;
4262                 rxr->next_to_check = 0;
4263                 rxr->next_to_refresh = 0;
4264         }
4265
4266         if (rxr->rxtag != NULL) {
4267                 bus_dma_tag_destroy(rxr->rxtag);
4268                 rxr->rxtag = NULL;
4269         }
4270
4271         return;
4272 }
4273
4274
4275 /*********************************************************************
4276  *
4277  *  Enable receive unit.
4278  *
4279  **********************************************************************/
4280
4281 static void
4282 em_initialize_receive_unit(struct adapter *adapter)
4283 {
4284         struct rx_ring  *rxr = adapter->rx_rings;
4285         struct ifnet    *ifp = adapter->ifp;
4286         struct e1000_hw *hw = &adapter->hw;
4287         u64     bus_addr;
4288         u32     rctl, rxcsum;
4289
4290         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4291
4292         /*
4293          * Make sure receives are disabled while setting
4294          * up the descriptor ring
4295          */
4296         rctl = E1000_READ_REG(hw, E1000_RCTL);
4297         /* Do not disable if ever enabled on this hardware */
4298         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4299                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4300
4301         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4302             adapter->rx_abs_int_delay.value);
4303         /*
4304          * Set the interrupt throttling rate. Value is calculated
4305          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4306          */
4307         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4308
4309         /*
4310         ** When using MSIX interrupts we need to throttle
4311         ** using the EITR register (82574 only)
4312         */
4313         if (hw->mac.type == e1000_82574) {
4314                 for (int i = 0; i < 4; i++)
4315                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4316                             DEFAULT_ITR);
4317                 /* Disable accelerated acknowledge */
4318                 E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4319         }
4320
4321         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4322         if (ifp->if_capenable & IFCAP_RXCSUM)
4323                 rxcsum |= E1000_RXCSUM_TUOFL;
4324         else
4325                 rxcsum &= ~E1000_RXCSUM_TUOFL;
4326         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4327
4328         /*
4329         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4330         ** long latencies are observed, like Lenovo X60. This
4331         ** change eliminates the problem, but since having positive
4332         ** values in RDTR is a known source of problems on other
4333         ** platforms another solution is being sought.
4334         */
4335         if (hw->mac.type == e1000_82573)
4336                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4337
4338         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4339                 /* Setup the Base and Length of the Rx Descriptor Ring */
4340                 u32 rdt = adapter->num_rx_desc - 1; /* default */
4341
4342                 bus_addr = rxr->rxdma.dma_paddr;
4343                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4344                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4345                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4346                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4347                 /* Setup the Head and Tail Descriptor Pointers */
4348                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4349 #ifdef DEV_NETMAP
4350                 /*
4351                  * an init() while a netmap client is active must
4352                  * preserve the rx buffers passed to userspace.
4353                  */
4354                 if (ifp->if_capenable & IFCAP_NETMAP)
4355                         rdt -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[i]);
4356 #endif /* DEV_NETMAP */
4357                 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4358         }
4359
4360         /* Set PTHRESH for improved jumbo performance */
4361         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4362             (adapter->hw.mac.type == e1000_pch2lan) ||
4363             (adapter->hw.mac.type == e1000_ich10lan)) &&
4364             (ifp->if_mtu > ETHERMTU)) {
4365                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4366                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4367         }
4368                 
4369         if (adapter->hw.mac.type >= e1000_pch2lan) {
4370                 if (ifp->if_mtu > ETHERMTU)
4371                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4372                 else
4373                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4374         }
4375
4376         /* Setup the Receive Control Register */
4377         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4378         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4379             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4380             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4381
4382         /* Strip the CRC */
4383         rctl |= E1000_RCTL_SECRC;
4384
4385         /* Make sure VLAN Filters are off */
4386         rctl &= ~E1000_RCTL_VFE;
4387         rctl &= ~E1000_RCTL_SBP;
4388
4389         if (adapter->rx_mbuf_sz == MCLBYTES)
4390                 rctl |= E1000_RCTL_SZ_2048;
4391         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4392                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4393         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4394                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4395
4396         if (ifp->if_mtu > ETHERMTU)
4397                 rctl |= E1000_RCTL_LPE;
4398         else
4399                 rctl &= ~E1000_RCTL_LPE;
4400
4401         /* Write out the settings */
4402         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4403
4404         return;
4405 }
4406
4407
4408 /*********************************************************************
4409  *
4410  *  This routine executes in interrupt context. It replenishes
4411  *  the mbufs in the descriptor and sends data which has been
4412  *  dma'ed into host memory to upper layer.
4413  *
4414  *  We loop at most count times if count is > 0, or until done if
4415  *  count < 0.
4416  *  
4417  *  For polling we also now return the number of cleaned packets
4418  *********************************************************************/
4419 static bool
4420 em_rxeof(struct rx_ring *rxr, int count, int *done)
4421 {
4422         struct adapter          *adapter = rxr->adapter;
4423         struct ifnet            *ifp = adapter->ifp;
4424         struct mbuf             *mp, *sendmp;
4425         u8                      status = 0;
4426         u16                     len;
4427         int                     i, processed, rxdone = 0;
4428         bool                    eop;
4429         struct e1000_rx_desc    *cur;
4430
4431         EM_RX_LOCK(rxr);
4432
4433 #ifdef DEV_NETMAP
4434         if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4435                 EM_RX_UNLOCK(rxr);
4436                 return (FALSE);
4437         }
4438 #endif /* DEV_NETMAP */
4439
4440         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4441
4442                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4443                         break;
4444
4445                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4446                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4447
4448                 cur = &rxr->rx_base[i];
4449                 status = cur->status;
4450                 mp = sendmp = NULL;
4451
4452                 if ((status & E1000_RXD_STAT_DD) == 0)
4453                         break;
4454
4455                 len = le16toh(cur->length);
4456                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4457
4458                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4459                     (rxr->discard == TRUE)) {
4460                         adapter->dropped_pkts++;
4461                         ++rxr->rx_discarded;
4462                         if (!eop) /* Catch subsequent segs */
4463                                 rxr->discard = TRUE;
4464                         else
4465                                 rxr->discard = FALSE;
4466                         em_rx_discard(rxr, i);
4467                         goto next_desc;
4468                 }
4469                 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4470
4471                 /* Assign correct length to the current fragment */
4472                 mp = rxr->rx_buffers[i].m_head;
4473                 mp->m_len = len;
4474
4475                 /* Trigger for refresh */
4476                 rxr->rx_buffers[i].m_head = NULL;
4477
4478                 /* First segment? */
4479                 if (rxr->fmp == NULL) {
4480                         mp->m_pkthdr.len = len;
4481                         rxr->fmp = rxr->lmp = mp;
4482                 } else {
4483                         /* Chain mbuf's together */
4484                         mp->m_flags &= ~M_PKTHDR;
4485                         rxr->lmp->m_next = mp;
4486                         rxr->lmp = mp;
4487                         rxr->fmp->m_pkthdr.len += len;
4488                 }
4489
4490                 if (eop) {
4491                         --count;
4492                         sendmp = rxr->fmp;
4493                         sendmp->m_pkthdr.rcvif = ifp;
4494                         ifp->if_ipackets++;
4495                         em_receive_checksum(cur, sendmp);
4496 #ifndef __NO_STRICT_ALIGNMENT
4497                         if (adapter->hw.mac.max_frame_size >
4498                             (MCLBYTES - ETHER_ALIGN) &&
4499                             em_fixup_rx(rxr) != 0)
4500                                 goto skip;
4501 #endif
4502                         if (status & E1000_RXD_STAT_VP) {
4503                                 sendmp->m_pkthdr.ether_vtag =
4504                                     le16toh(cur->special);
4505                                 sendmp->m_flags |= M_VLANTAG;
4506                         }
4507 #ifndef __NO_STRICT_ALIGNMENT
4508 skip:
4509 #endif
4510                         rxr->fmp = rxr->lmp = NULL;
4511                 }
4512 next_desc:
4513                 /* Zero out the receive descriptors status. */
4514                 cur->status = 0;
4515                 ++rxdone;       /* cumulative for POLL */
4516                 ++processed;
4517
4518                 /* Advance our pointers to the next descriptor. */
4519                 if (++i == adapter->num_rx_desc)
4520                         i = 0;
4521
4522                 /* Send to the stack */
4523                 if (sendmp != NULL) {
4524                         rxr->next_to_check = i;
4525                         EM_RX_UNLOCK(rxr);
4526                         (*ifp->if_input)(ifp, sendmp);
4527                         EM_RX_LOCK(rxr);
4528                         i = rxr->next_to_check;
4529                 }
4530
4531                 /* Only refresh mbufs every 8 descriptors */
4532                 if (processed == 8) {
4533                         em_refresh_mbufs(rxr, i);
4534                         processed = 0;
4535                 }
4536         }
4537
4538         /* Catch any remaining refresh work */
4539         if (e1000_rx_unrefreshed(rxr))
4540                 em_refresh_mbufs(rxr, i);
4541
4542         rxr->next_to_check = i;
4543         if (done != NULL)
4544                 *done = rxdone;
4545         EM_RX_UNLOCK(rxr);
4546
4547         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4548 }
4549
4550 static __inline void
4551 em_rx_discard(struct rx_ring *rxr, int i)
4552 {
4553         struct em_buffer        *rbuf;
4554
4555         rbuf = &rxr->rx_buffers[i];
4556         bus_dmamap_unload(rxr->rxtag, rbuf->map);
4557
4558         /* Free any previous pieces */
4559         if (rxr->fmp != NULL) {
4560                 rxr->fmp->m_flags |= M_PKTHDR;
4561                 m_freem(rxr->fmp);
4562                 rxr->fmp = NULL;
4563                 rxr->lmp = NULL;
4564         }
4565         /*
4566         ** Free buffer and allow em_refresh_mbufs()
4567         ** to clean up and recharge buffer.
4568         */
4569         if (rbuf->m_head) {
4570                 m_free(rbuf->m_head);
4571                 rbuf->m_head = NULL;
4572         }
4573         return;
4574 }
4575
4576 #ifndef __NO_STRICT_ALIGNMENT
4577 /*
4578  * When jumbo frames are enabled we should realign entire payload on
4579  * architecures with strict alignment. This is serious design mistake of 8254x
4580  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4581  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4582  * payload. On architecures without strict alignment restrictions 8254x still
4583  * performs unaligned memory access which would reduce the performance too.
4584  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4585  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4586  * existing mbuf chain.
4587  *
4588  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4589  * not used at all on architectures with strict alignment.
4590  */
4591 static int
4592 em_fixup_rx(struct rx_ring *rxr)
4593 {
4594         struct adapter *adapter = rxr->adapter;
4595         struct mbuf *m, *n;
4596         int error;
4597
4598         error = 0;
4599         m = rxr->fmp;
4600         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4601                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4602                 m->m_data += ETHER_HDR_LEN;
4603         } else {
4604                 MGETHDR(n, M_NOWAIT, MT_DATA);
4605                 if (n != NULL) {
4606                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4607                         m->m_data += ETHER_HDR_LEN;
4608                         m->m_len -= ETHER_HDR_LEN;
4609                         n->m_len = ETHER_HDR_LEN;
4610                         M_MOVE_PKTHDR(n, m);
4611                         n->m_next = m;
4612                         rxr->fmp = n;
4613                 } else {
4614                         adapter->dropped_pkts++;
4615                         m_freem(rxr->fmp);
4616                         rxr->fmp = NULL;
4617                         error = ENOMEM;
4618                 }
4619         }
4620
4621         return (error);
4622 }
4623 #endif
4624
4625 /*********************************************************************
4626  *
4627  *  Verify that the hardware indicated that the checksum is valid.
4628  *  Inform the stack about the status of checksum so that stack
4629  *  doesn't spend time verifying the checksum.
4630  *
4631  *********************************************************************/
4632 static void
4633 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4634 {
4635         mp->m_pkthdr.csum_flags = 0;
4636
4637         /* Ignore Checksum bit is set */
4638         if (rx_desc->status & E1000_RXD_STAT_IXSM)
4639                 return;
4640
4641         if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4642                 return;
4643
4644         /* IP Checksum Good? */
4645         if (rx_desc->status & E1000_RXD_STAT_IPCS)
4646                 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4647
4648         /* TCP or UDP checksum */
4649         if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4650                 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4651                 mp->m_pkthdr.csum_data = htons(0xffff);
4652         }
4653 }
4654
4655 /*
4656  * This routine is run via an vlan
4657  * config EVENT
4658  */
4659 static void
4660 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4661 {
4662         struct adapter  *adapter = ifp->if_softc;
4663         u32             index, bit;
4664
4665         if (ifp->if_softc !=  arg)   /* Not our event */
4666                 return;
4667
4668         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4669                 return;
4670
4671         EM_CORE_LOCK(adapter);
4672         index = (vtag >> 5) & 0x7F;
4673         bit = vtag & 0x1F;
4674         adapter->shadow_vfta[index] |= (1 << bit);
4675         ++adapter->num_vlans;
4676         /* Re-init to load the changes */
4677         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4678                 em_init_locked(adapter);
4679         EM_CORE_UNLOCK(adapter);
4680 }
4681
4682 /*
4683  * This routine is run via an vlan
4684  * unconfig EVENT
4685  */
4686 static void
4687 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4688 {
4689         struct adapter  *adapter = ifp->if_softc;
4690         u32             index, bit;
4691
4692         if (ifp->if_softc !=  arg)
4693                 return;
4694
4695         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4696                 return;
4697
4698         EM_CORE_LOCK(adapter);
4699         index = (vtag >> 5) & 0x7F;
4700         bit = vtag & 0x1F;
4701         adapter->shadow_vfta[index] &= ~(1 << bit);
4702         --adapter->num_vlans;
4703         /* Re-init to load the changes */
4704         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4705                 em_init_locked(adapter);
4706         EM_CORE_UNLOCK(adapter);
4707 }
4708
4709 static void
4710 em_setup_vlan_hw_support(struct adapter *adapter)
4711 {
4712         struct e1000_hw *hw = &adapter->hw;
4713         u32             reg;
4714
4715         /*
4716         ** We get here thru init_locked, meaning
4717         ** a soft reset, this has already cleared
4718         ** the VFTA and other state, so if there
4719         ** have been no vlan's registered do nothing.
4720         */
4721         if (adapter->num_vlans == 0)
4722                 return;
4723
4724         /*
4725         ** A soft reset zero's out the VFTA, so
4726         ** we need to repopulate it now.
4727         */
4728         for (int i = 0; i < EM_VFTA_SIZE; i++)
4729                 if (adapter->shadow_vfta[i] != 0)
4730                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4731                             i, adapter->shadow_vfta[i]);
4732
4733         reg = E1000_READ_REG(hw, E1000_CTRL);
4734         reg |= E1000_CTRL_VME;
4735         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4736
4737         /* Enable the Filter Table */
4738         reg = E1000_READ_REG(hw, E1000_RCTL);
4739         reg &= ~E1000_RCTL_CFIEN;
4740         reg |= E1000_RCTL_VFE;
4741         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4742 }
4743
4744 static void
4745 em_enable_intr(struct adapter *adapter)
4746 {
4747         struct e1000_hw *hw = &adapter->hw;
4748         u32 ims_mask = IMS_ENABLE_MASK;
4749
4750         if (hw->mac.type == e1000_82574) {
4751                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4752                 ims_mask |= EM_MSIX_MASK;
4753         } 
4754         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4755 }
4756
4757 static void
4758 em_disable_intr(struct adapter *adapter)
4759 {
4760         struct e1000_hw *hw = &adapter->hw;
4761
4762         if (hw->mac.type == e1000_82574)
4763                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4764         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4765 }
4766
4767 /*
4768  * Bit of a misnomer, what this really means is
4769  * to enable OS management of the system... aka
4770  * to disable special hardware management features 
4771  */
4772 static void
4773 em_init_manageability(struct adapter *adapter)
4774 {
4775         /* A shared code workaround */
4776 #define E1000_82542_MANC2H E1000_MANC2H
4777         if (adapter->has_manage) {
4778                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4779                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4780
4781                 /* disable hardware interception of ARP */
4782                 manc &= ~(E1000_MANC_ARP_EN);
4783
4784                 /* enable receiving management packets to the host */
4785                 manc |= E1000_MANC_EN_MNG2HOST;
4786 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4787 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4788                 manc2h |= E1000_MNG2HOST_PORT_623;
4789                 manc2h |= E1000_MNG2HOST_PORT_664;
4790                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4791                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4792         }
4793 }
4794
4795 /*
4796  * Give control back to hardware management
4797  * controller if there is one.
4798  */
4799 static void
4800 em_release_manageability(struct adapter *adapter)
4801 {
4802         if (adapter->has_manage) {
4803                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4804
4805                 /* re-enable hardware interception of ARP */
4806                 manc |= E1000_MANC_ARP_EN;
4807                 manc &= ~E1000_MANC_EN_MNG2HOST;
4808
4809                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4810         }
4811 }
4812
4813 /*
4814  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4815  * For ASF and Pass Through versions of f/w this means
4816  * that the driver is loaded. For AMT version type f/w
4817  * this means that the network i/f is open.
4818  */
4819 static void
4820 em_get_hw_control(struct adapter *adapter)
4821 {
4822         u32 ctrl_ext, swsm;
4823
4824         if (adapter->hw.mac.type == e1000_82573) {
4825                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4826                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4827                     swsm | E1000_SWSM_DRV_LOAD);
4828                 return;
4829         }
4830         /* else */
4831         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4832         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4833             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4834         return;
4835 }
4836
4837 /*
4838  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4839  * For ASF and Pass Through versions of f/w this means that
4840  * the driver is no longer loaded. For AMT versions of the
4841  * f/w this means that the network i/f is closed.
4842  */
4843 static void
4844 em_release_hw_control(struct adapter *adapter)
4845 {
4846         u32 ctrl_ext, swsm;
4847
4848         if (!adapter->has_manage)
4849                 return;
4850
4851         if (adapter->hw.mac.type == e1000_82573) {
4852                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4853                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4854                     swsm & ~E1000_SWSM_DRV_LOAD);
4855                 return;
4856         }
4857         /* else */
4858         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4859         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4860             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4861         return;
4862 }
4863
4864 static int
4865 em_is_valid_ether_addr(u8 *addr)
4866 {
4867         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4868
4869         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4870                 return (FALSE);
4871         }
4872
4873         return (TRUE);
4874 }
4875
4876 /*
4877 ** Parse the interface capabilities with regard
4878 ** to both system management and wake-on-lan for
4879 ** later use.
4880 */
4881 static void
4882 em_get_wakeup(device_t dev)
4883 {
4884         struct adapter  *adapter = device_get_softc(dev);
4885         u16             eeprom_data = 0, device_id, apme_mask;
4886
4887         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4888         apme_mask = EM_EEPROM_APME;
4889
4890         switch (adapter->hw.mac.type) {
4891         case e1000_82573:
4892         case e1000_82583:
4893                 adapter->has_amt = TRUE;
4894                 /* Falls thru */
4895         case e1000_82571:
4896         case e1000_82572:
4897         case e1000_80003es2lan:
4898                 if (adapter->hw.bus.func == 1) {
4899                         e1000_read_nvm(&adapter->hw,
4900                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4901                         break;
4902                 } else
4903                         e1000_read_nvm(&adapter->hw,
4904                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4905                 break;
4906         case e1000_ich8lan:
4907         case e1000_ich9lan:
4908         case e1000_ich10lan:
4909         case e1000_pchlan:
4910         case e1000_pch2lan:
4911                 apme_mask = E1000_WUC_APME;
4912                 adapter->has_amt = TRUE;
4913                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4914                 break;
4915         default:
4916                 e1000_read_nvm(&adapter->hw,
4917                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4918                 break;
4919         }
4920         if (eeprom_data & apme_mask)
4921                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4922         /*
4923          * We have the eeprom settings, now apply the special cases
4924          * where the eeprom may be wrong or the board won't support
4925          * wake on lan on a particular port
4926          */
4927         device_id = pci_get_device(dev);
4928         switch (device_id) {
4929         case E1000_DEV_ID_82571EB_FIBER:
4930                 /* Wake events only supported on port A for dual fiber
4931                  * regardless of eeprom setting */
4932                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4933                     E1000_STATUS_FUNC_1)
4934                         adapter->wol = 0;
4935                 break;
4936         case E1000_DEV_ID_82571EB_QUAD_COPPER:
4937         case E1000_DEV_ID_82571EB_QUAD_FIBER:
4938         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4939                 /* if quad port adapter, disable WoL on all but port A */
4940                 if (global_quad_port_a != 0)
4941                         adapter->wol = 0;
4942                 /* Reset for multiple quad port adapters */
4943                 if (++global_quad_port_a == 4)
4944                         global_quad_port_a = 0;
4945                 break;
4946         }
4947         return;
4948 }
4949
4950
4951 /*
4952  * Enable PCI Wake On Lan capability
4953  */
4954 static void
4955 em_enable_wakeup(device_t dev)
4956 {
4957         struct adapter  *adapter = device_get_softc(dev);
4958         struct ifnet    *ifp = adapter->ifp;
4959         u32             pmc, ctrl, ctrl_ext, rctl;
4960         u16             status;
4961
4962         if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4963                 return;
4964
4965         /* Advertise the wakeup capability */
4966         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4967         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4968         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4969         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4970
4971         if ((adapter->hw.mac.type == e1000_ich8lan) ||
4972             (adapter->hw.mac.type == e1000_pchlan) ||
4973             (adapter->hw.mac.type == e1000_ich9lan) ||
4974             (adapter->hw.mac.type == e1000_ich10lan))
4975                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
4976
4977         /* Keep the laser running on Fiber adapters */
4978         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4979             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4980                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4981                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4982                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4983         }
4984
4985         /*
4986         ** Determine type of Wakeup: note that wol
4987         ** is set with all bits on by default.
4988         */
4989         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4990                 adapter->wol &= ~E1000_WUFC_MAG;
4991
4992         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4993                 adapter->wol &= ~E1000_WUFC_MC;
4994         else {
4995                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4996                 rctl |= E1000_RCTL_MPE;
4997                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4998         }
4999
5000         if ((adapter->hw.mac.type == e1000_pchlan) ||
5001             (adapter->hw.mac.type == e1000_pch2lan)) {
5002                 if (em_enable_phy_wakeup(adapter))
5003                         return;
5004         } else {
5005                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5006                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5007         }
5008
5009         if (adapter->hw.phy.type == e1000_phy_igp_3)
5010                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5011
5012         /* Request PME */
5013         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5014         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5015         if (ifp->if_capenable & IFCAP_WOL)
5016                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5017         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5018
5019         return;
5020 }
5021
5022 /*
5023 ** WOL in the newer chipset interfaces (pchlan)
5024 ** require thing to be copied into the phy
5025 */
5026 static int
5027 em_enable_phy_wakeup(struct adapter *adapter)
5028 {
5029         struct e1000_hw *hw = &adapter->hw;
5030         u32 mreg, ret = 0;
5031         u16 preg;
5032
5033         /* copy MAC RARs to PHY RARs */
5034         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5035
5036         /* copy MAC MTA to PHY MTA */
5037         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5038                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5039                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5040                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5041                     (u16)((mreg >> 16) & 0xFFFF));
5042         }
5043
5044         /* configure PHY Rx Control register */
5045         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5046         mreg = E1000_READ_REG(hw, E1000_RCTL);
5047         if (mreg & E1000_RCTL_UPE)
5048                 preg |= BM_RCTL_UPE;
5049         if (mreg & E1000_RCTL_MPE)
5050                 preg |= BM_RCTL_MPE;
5051         preg &= ~(BM_RCTL_MO_MASK);
5052         if (mreg & E1000_RCTL_MO_3)
5053                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5054                                 << BM_RCTL_MO_SHIFT);
5055         if (mreg & E1000_RCTL_BAM)
5056                 preg |= BM_RCTL_BAM;
5057         if (mreg & E1000_RCTL_PMCF)
5058                 preg |= BM_RCTL_PMCF;
5059         mreg = E1000_READ_REG(hw, E1000_CTRL);
5060         if (mreg & E1000_CTRL_RFCE)
5061                 preg |= BM_RCTL_RFCE;
5062         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5063
5064         /* enable PHY wakeup in MAC register */
5065         E1000_WRITE_REG(hw, E1000_WUC,
5066             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5067         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5068
5069         /* configure and enable PHY wakeup in PHY registers */
5070         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5071         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5072
5073         /* activate PHY wakeup */
5074         ret = hw->phy.ops.acquire(hw);
5075         if (ret) {
5076                 printf("Could not acquire PHY\n");
5077                 return ret;
5078         }
5079         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5080                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5081         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5082         if (ret) {
5083                 printf("Could not read PHY page 769\n");
5084                 goto out;
5085         }
5086         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5087         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5088         if (ret)
5089                 printf("Could not set PHY Host Wakeup bit\n");
5090 out:
5091         hw->phy.ops.release(hw);
5092
5093         return ret;
5094 }
5095
5096 static void
5097 em_led_func(void *arg, int onoff)
5098 {
5099         struct adapter  *adapter = arg;
5100  
5101         EM_CORE_LOCK(adapter);
5102         if (onoff) {
5103                 e1000_setup_led(&adapter->hw);
5104                 e1000_led_on(&adapter->hw);
5105         } else {
5106                 e1000_led_off(&adapter->hw);
5107                 e1000_cleanup_led(&adapter->hw);
5108         }
5109         EM_CORE_UNLOCK(adapter);
5110 }
5111
5112 /*
5113 ** Disable the L0S and L1 LINK states
5114 */
5115 static void
5116 em_disable_aspm(struct adapter *adapter)
5117 {
5118         int             base, reg;
5119         u16             link_cap,link_ctrl;
5120         device_t        dev = adapter->dev;
5121
5122         switch (adapter->hw.mac.type) {
5123                 case e1000_82573:
5124                 case e1000_82574:
5125                 case e1000_82583:
5126                         break;
5127                 default:
5128                         return;
5129         }
5130         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5131                 return;
5132         reg = base + PCIER_LINK_CAP;
5133         link_cap = pci_read_config(dev, reg, 2);
5134         if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5135                 return;
5136         reg = base + PCIER_LINK_CTL;
5137         link_ctrl = pci_read_config(dev, reg, 2);
5138         link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5139         pci_write_config(dev, reg, link_ctrl, 2);
5140         return;
5141 }
5142
5143 /**********************************************************************
5144  *
5145  *  Update the board statistics counters.
5146  *
5147  **********************************************************************/
5148 static void
5149 em_update_stats_counters(struct adapter *adapter)
5150 {
5151         struct ifnet   *ifp;
5152
5153         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5154            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5155                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5156                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5157         }
5158         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5159         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5160         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5161         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5162
5163         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5164         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5165         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5166         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5167         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5168         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5169         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5170         /*
5171         ** For watchdog management we need to know if we have been
5172         ** paused during the last interval, so capture that here.
5173         */
5174         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5175         adapter->stats.xoffrxc += adapter->pause_frames;
5176         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5177         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5178         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5179         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5180         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5181         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5182         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5183         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5184         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5185         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5186         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5187         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5188
5189         /* For the 64-bit byte counters the low dword must be read first. */
5190         /* Both registers clear on the read of the high dword */
5191
5192         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5193             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5194         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5195             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5196
5197         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5198         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5199         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5200         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5201         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5202
5203         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5204         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5205
5206         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5207         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5208         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5209         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5210         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5211         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5212         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5213         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5214         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5215         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5216
5217         /* Interrupt Counts */
5218
5219         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5220         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5221         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5222         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5223         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5224         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5225         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5226         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5227         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5228
5229         if (adapter->hw.mac.type >= e1000_82543) {
5230                 adapter->stats.algnerrc += 
5231                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5232                 adapter->stats.rxerrc += 
5233                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5234                 adapter->stats.tncrs += 
5235                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5236                 adapter->stats.cexterr += 
5237                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5238                 adapter->stats.tsctc += 
5239                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5240                 adapter->stats.tsctfc += 
5241                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5242         }
5243         ifp = adapter->ifp;
5244
5245         ifp->if_collisions = adapter->stats.colc;
5246
5247         /* Rx Errors */
5248         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5249             adapter->stats.crcerrs + adapter->stats.algnerrc +
5250             adapter->stats.ruc + adapter->stats.roc +
5251             adapter->stats.mpc + adapter->stats.cexterr;
5252
5253         /* Tx Errors */
5254         ifp->if_oerrors = adapter->stats.ecol +
5255             adapter->stats.latecol + adapter->watchdog_events;
5256 }
5257
5258 /* Export a single 32-bit register via a read-only sysctl. */
5259 static int
5260 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5261 {
5262         struct adapter *adapter;
5263         u_int val;
5264
5265         adapter = oidp->oid_arg1;
5266         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5267         return (sysctl_handle_int(oidp, &val, 0, req));
5268 }
5269
5270 /*
5271  * Add sysctl variables, one per statistic, to the system.
5272  */
5273 static void
5274 em_add_hw_stats(struct adapter *adapter)
5275 {
5276         device_t dev = adapter->dev;
5277
5278         struct tx_ring *txr = adapter->tx_rings;
5279         struct rx_ring *rxr = adapter->rx_rings;
5280
5281         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5282         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5283         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5284         struct e1000_hw_stats *stats = &adapter->stats;
5285
5286         struct sysctl_oid *stat_node, *queue_node, *int_node;
5287         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5288
5289 #define QUEUE_NAME_LEN 32
5290         char namebuf[QUEUE_NAME_LEN];
5291         
5292         /* Driver Statistics */
5293         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5294                         CTLFLAG_RD, &adapter->link_irq,
5295                         "Link MSIX IRQ Handled");
5296         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", 
5297                          CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5298                          "Std mbuf failed");
5299         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
5300                          CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5301                          "Std mbuf cluster failed");
5302         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5303                         CTLFLAG_RD, &adapter->dropped_pkts,
5304                         "Driver dropped packets");
5305         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5306                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5307                         "Driver tx dma failure in xmit");
5308         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5309                         CTLFLAG_RD, &adapter->rx_overruns,
5310                         "RX overruns");
5311         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5312                         CTLFLAG_RD, &adapter->watchdog_events,
5313                         "Watchdog timeouts");
5314         
5315         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5316                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5317                         em_sysctl_reg_handler, "IU",
5318                         "Device Control Register");
5319         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5320                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5321                         em_sysctl_reg_handler, "IU",
5322                         "Receiver Control Register");
5323         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5324                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5325                         "Flow Control High Watermark");
5326         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5327                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5328                         "Flow Control Low Watermark");
5329
5330         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5331                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5332                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5333                                             CTLFLAG_RD, NULL, "Queue Name");
5334                 queue_list = SYSCTL_CHILDREN(queue_node);
5335
5336                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5337                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5338                                 E1000_TDH(txr->me),
5339                                 em_sysctl_reg_handler, "IU",
5340                                 "Transmit Descriptor Head");
5341                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5342                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5343                                 E1000_TDT(txr->me),
5344                                 em_sysctl_reg_handler, "IU",
5345                                 "Transmit Descriptor Tail");
5346                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5347                                 CTLFLAG_RD, &txr->tx_irq,
5348                                 "Queue MSI-X Transmit Interrupts");
5349                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5350                                 CTLFLAG_RD, &txr->no_desc_avail,
5351                                 "Queue No Descriptor Available");
5352                 
5353                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5354                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5355                                 E1000_RDH(rxr->me),
5356                                 em_sysctl_reg_handler, "IU",
5357                                 "Receive Descriptor Head");
5358                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5359                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5360                                 E1000_RDT(rxr->me),
5361                                 em_sysctl_reg_handler, "IU",
5362                                 "Receive Descriptor Tail");
5363                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5364                                 CTLFLAG_RD, &rxr->rx_irq,
5365                                 "Queue MSI-X Receive Interrupts");
5366         }
5367
5368         /* MAC stats get their own sub node */
5369
5370         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5371                                     CTLFLAG_RD, NULL, "Statistics");
5372         stat_list = SYSCTL_CHILDREN(stat_node);
5373
5374         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5375                         CTLFLAG_RD, &stats->ecol,
5376                         "Excessive collisions");
5377         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5378                         CTLFLAG_RD, &stats->scc,
5379                         "Single collisions");
5380         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5381                         CTLFLAG_RD, &stats->mcc,
5382                         "Multiple collisions");
5383         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5384                         CTLFLAG_RD, &stats->latecol,
5385                         "Late collisions");
5386         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5387                         CTLFLAG_RD, &stats->colc,
5388                         "Collision Count");
5389         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5390                         CTLFLAG_RD, &adapter->stats.symerrs,
5391                         "Symbol Errors");
5392         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5393                         CTLFLAG_RD, &adapter->stats.sec,
5394                         "Sequence Errors");
5395         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5396                         CTLFLAG_RD, &adapter->stats.dc,
5397                         "Defer Count");
5398         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5399                         CTLFLAG_RD, &adapter->stats.mpc,
5400                         "Missed Packets");
5401         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5402                         CTLFLAG_RD, &adapter->stats.rnbc,
5403                         "Receive No Buffers");
5404         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5405                         CTLFLAG_RD, &adapter->stats.ruc,
5406                         "Receive Undersize");
5407         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5408                         CTLFLAG_RD, &adapter->stats.rfc,
5409                         "Fragmented Packets Received ");
5410         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5411                         CTLFLAG_RD, &adapter->stats.roc,
5412                         "Oversized Packets Received");
5413         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5414                         CTLFLAG_RD, &adapter->stats.rjc,
5415                         "Recevied Jabber");
5416         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5417                         CTLFLAG_RD, &adapter->stats.rxerrc,
5418                         "Receive Errors");
5419         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5420                         CTLFLAG_RD, &adapter->stats.crcerrs,
5421                         "CRC errors");
5422         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5423                         CTLFLAG_RD, &adapter->stats.algnerrc,
5424                         "Alignment Errors");
5425         /* On 82575 these are collision counts */
5426         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5427                         CTLFLAG_RD, &adapter->stats.cexterr,
5428                         "Collision/Carrier extension errors");
5429         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5430                         CTLFLAG_RD, &adapter->stats.xonrxc,
5431                         "XON Received");
5432         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5433                         CTLFLAG_RD, &adapter->stats.xontxc,
5434                         "XON Transmitted");
5435         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5436                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5437                         "XOFF Received");
5438         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5439                         CTLFLAG_RD, &adapter->stats.xofftxc,
5440                         "XOFF Transmitted");
5441
5442         /* Packet Reception Stats */
5443         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5444                         CTLFLAG_RD, &adapter->stats.tpr,
5445                         "Total Packets Received ");
5446         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5447                         CTLFLAG_RD, &adapter->stats.gprc,
5448                         "Good Packets Received");
5449         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5450                         CTLFLAG_RD, &adapter->stats.bprc,
5451                         "Broadcast Packets Received");
5452         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5453                         CTLFLAG_RD, &adapter->stats.mprc,
5454                         "Multicast Packets Received");
5455         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5456                         CTLFLAG_RD, &adapter->stats.prc64,
5457                         "64 byte frames received ");
5458         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5459                         CTLFLAG_RD, &adapter->stats.prc127,
5460                         "65-127 byte frames received");
5461         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5462                         CTLFLAG_RD, &adapter->stats.prc255,
5463                         "128-255 byte frames received");
5464         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5465                         CTLFLAG_RD, &adapter->stats.prc511,
5466                         "256-511 byte frames received");
5467         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5468                         CTLFLAG_RD, &adapter->stats.prc1023,
5469                         "512-1023 byte frames received");
5470         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5471                         CTLFLAG_RD, &adapter->stats.prc1522,
5472                         "1023-1522 byte frames received");
5473         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5474                         CTLFLAG_RD, &adapter->stats.gorc, 
5475                         "Good Octets Received"); 
5476
5477         /* Packet Transmission Stats */
5478         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5479                         CTLFLAG_RD, &adapter->stats.gotc, 
5480                         "Good Octets Transmitted"); 
5481         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5482                         CTLFLAG_RD, &adapter->stats.tpt,
5483                         "Total Packets Transmitted");
5484         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5485                         CTLFLAG_RD, &adapter->stats.gptc,
5486                         "Good Packets Transmitted");
5487         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5488                         CTLFLAG_RD, &adapter->stats.bptc,
5489                         "Broadcast Packets Transmitted");
5490         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5491                         CTLFLAG_RD, &adapter->stats.mptc,
5492                         "Multicast Packets Transmitted");
5493         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5494                         CTLFLAG_RD, &adapter->stats.ptc64,
5495                         "64 byte frames transmitted ");
5496         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5497                         CTLFLAG_RD, &adapter->stats.ptc127,
5498                         "65-127 byte frames transmitted");
5499         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5500                         CTLFLAG_RD, &adapter->stats.ptc255,
5501                         "128-255 byte frames transmitted");
5502         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5503                         CTLFLAG_RD, &adapter->stats.ptc511,
5504                         "256-511 byte frames transmitted");
5505         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5506                         CTLFLAG_RD, &adapter->stats.ptc1023,
5507                         "512-1023 byte frames transmitted");
5508         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5509                         CTLFLAG_RD, &adapter->stats.ptc1522,
5510                         "1024-1522 byte frames transmitted");
5511         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5512                         CTLFLAG_RD, &adapter->stats.tsctc,
5513                         "TSO Contexts Transmitted");
5514         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5515                         CTLFLAG_RD, &adapter->stats.tsctfc,
5516                         "TSO Contexts Failed");
5517
5518
5519         /* Interrupt Stats */
5520
5521         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5522                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5523         int_list = SYSCTL_CHILDREN(int_node);
5524
5525         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5526                         CTLFLAG_RD, &adapter->stats.iac,
5527                         "Interrupt Assertion Count");
5528
5529         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5530                         CTLFLAG_RD, &adapter->stats.icrxptc,
5531                         "Interrupt Cause Rx Pkt Timer Expire Count");
5532
5533         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5534                         CTLFLAG_RD, &adapter->stats.icrxatc,
5535                         "Interrupt Cause Rx Abs Timer Expire Count");
5536
5537         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5538                         CTLFLAG_RD, &adapter->stats.ictxptc,
5539                         "Interrupt Cause Tx Pkt Timer Expire Count");
5540
5541         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5542                         CTLFLAG_RD, &adapter->stats.ictxatc,
5543                         "Interrupt Cause Tx Abs Timer Expire Count");
5544
5545         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5546                         CTLFLAG_RD, &adapter->stats.ictxqec,
5547                         "Interrupt Cause Tx Queue Empty Count");
5548
5549         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5550                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5551                         "Interrupt Cause Tx Queue Min Thresh Count");
5552
5553         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5554                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5555                         "Interrupt Cause Rx Desc Min Thresh Count");
5556
5557         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5558                         CTLFLAG_RD, &adapter->stats.icrxoc,
5559                         "Interrupt Cause Receiver Overrun Count");
5560 }
5561
5562 /**********************************************************************
5563  *
5564  *  This routine provides a way to dump out the adapter eeprom,
5565  *  often a useful debug/service tool. This only dumps the first
5566  *  32 words, stuff that matters is in that extent.
5567  *
5568  **********************************************************************/
5569 static int
5570 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5571 {
5572         struct adapter *adapter = (struct adapter *)arg1;
5573         int error;
5574         int result;
5575
5576         result = -1;
5577         error = sysctl_handle_int(oidp, &result, 0, req);
5578
5579         if (error || !req->newptr)
5580                 return (error);
5581
5582         /*
5583          * This value will cause a hex dump of the
5584          * first 32 16-bit words of the EEPROM to
5585          * the screen.
5586          */
5587         if (result == 1)
5588                 em_print_nvm_info(adapter);
5589
5590         return (error);
5591 }
5592
5593 static void
5594 em_print_nvm_info(struct adapter *adapter)
5595 {
5596         u16     eeprom_data;
5597         int     i, j, row = 0;
5598
5599         /* Its a bit crude, but it gets the job done */
5600         printf("\nInterface EEPROM Dump:\n");
5601         printf("Offset\n0x0000  ");
5602         for (i = 0, j = 0; i < 32; i++, j++) {
5603                 if (j == 8) { /* Make the offset block */
5604                         j = 0; ++row;
5605                         printf("\n0x00%x0  ",row);
5606                 }
5607                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5608                 printf("%04x ", eeprom_data);
5609         }
5610         printf("\n");
5611 }
5612
5613 static int
5614 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5615 {
5616         struct em_int_delay_info *info;
5617         struct adapter *adapter;
5618         u32 regval;
5619         int error, usecs, ticks;
5620
5621         info = (struct em_int_delay_info *)arg1;
5622         usecs = info->value;
5623         error = sysctl_handle_int(oidp, &usecs, 0, req);
5624         if (error != 0 || req->newptr == NULL)
5625                 return (error);
5626         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5627                 return (EINVAL);
5628         info->value = usecs;
5629         ticks = EM_USECS_TO_TICKS(usecs);
5630         if (info->offset == E1000_ITR)  /* units are 256ns here */
5631                 ticks *= 4;
5632
5633         adapter = info->adapter;
5634         
5635         EM_CORE_LOCK(adapter);
5636         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5637         regval = (regval & ~0xffff) | (ticks & 0xffff);
5638         /* Handle a few special cases. */
5639         switch (info->offset) {
5640         case E1000_RDTR:
5641                 break;
5642         case E1000_TIDV:
5643                 if (ticks == 0) {
5644                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5645                         /* Don't write 0 into the TIDV register. */
5646                         regval++;
5647                 } else
5648                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5649                 break;
5650         }
5651         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5652         EM_CORE_UNLOCK(adapter);
5653         return (0);
5654 }
5655
5656 static void
5657 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5658         const char *description, struct em_int_delay_info *info,
5659         int offset, int value)
5660 {
5661         info->adapter = adapter;
5662         info->offset = offset;
5663         info->value = value;
5664         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5665             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5666             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5667             info, 0, em_sysctl_int_delay, "I", description);
5668 }
5669
5670 static void
5671 em_set_sysctl_value(struct adapter *adapter, const char *name,
5672         const char *description, int *limit, int value)
5673 {
5674         *limit = value;
5675         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5676             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5677             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5678 }
5679
5680
5681 /*
5682 ** Set flow control using sysctl:
5683 ** Flow control values:
5684 **      0 - off
5685 **      1 - rx pause
5686 **      2 - tx pause
5687 **      3 - full
5688 */
5689 static int
5690 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5691 {       
5692         int             error;
5693         static int      input = 3; /* default is full */
5694         struct adapter  *adapter = (struct adapter *) arg1;
5695                     
5696         error = sysctl_handle_int(oidp, &input, 0, req);
5697     
5698         if ((error) || (req->newptr == NULL))
5699                 return (error);
5700                 
5701         if (input == adapter->fc) /* no change? */
5702                 return (error);
5703
5704         switch (input) {
5705                 case e1000_fc_rx_pause:
5706                 case e1000_fc_tx_pause:
5707                 case e1000_fc_full:
5708                 case e1000_fc_none:
5709                         adapter->hw.fc.requested_mode = input;
5710                         adapter->fc = input;
5711                         break;
5712                 default:
5713                         /* Do nothing */
5714                         return (error);
5715         }
5716
5717         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5718         e1000_force_mac_fc(&adapter->hw);
5719         return (error);
5720 }
5721
5722 /*
5723 ** Manage Energy Efficient Ethernet:
5724 ** Control values:
5725 **     0/1 - enabled/disabled
5726 */
5727 static int
5728 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5729 {
5730        struct adapter *adapter = (struct adapter *) arg1;
5731        int             error, value;
5732
5733        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5734        error = sysctl_handle_int(oidp, &value, 0, req);
5735        if (error || req->newptr == NULL)
5736                return (error);
5737        EM_CORE_LOCK(adapter);
5738        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5739        em_init_locked(adapter);
5740        EM_CORE_UNLOCK(adapter);
5741        return (0);
5742 }
5743
5744 static int
5745 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5746 {
5747         struct adapter *adapter;
5748         int error;
5749         int result;
5750
5751         result = -1;
5752         error = sysctl_handle_int(oidp, &result, 0, req);
5753
5754         if (error || !req->newptr)
5755                 return (error);
5756
5757         if (result == 1) {
5758                 adapter = (struct adapter *)arg1;
5759                 em_print_debug_info(adapter);
5760         }
5761
5762         return (error);
5763 }
5764
5765 /*
5766 ** This routine is meant to be fluid, add whatever is
5767 ** needed for debugging a problem.  -jfv
5768 */
5769 static void
5770 em_print_debug_info(struct adapter *adapter)
5771 {
5772         device_t dev = adapter->dev;
5773         struct tx_ring *txr = adapter->tx_rings;
5774         struct rx_ring *rxr = adapter->rx_rings;
5775
5776         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5777                 printf("Interface is RUNNING ");
5778         else
5779                 printf("Interface is NOT RUNNING\n");
5780
5781         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5782                 printf("and INACTIVE\n");
5783         else
5784                 printf("and ACTIVE\n");
5785
5786         device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5787             E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5788             E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5789         device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5790             E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5791             E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5792         device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5793         device_printf(dev, "TX descriptors avail = %d\n",
5794             txr->tx_avail);
5795         device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5796             txr->no_desc_avail);
5797         device_printf(dev, "RX discarded packets = %ld\n",
5798             rxr->rx_discarded);
5799         device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5800         device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5801 }