]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/e1000/if_em.c
Switch some PCI register reads from using magic numbers to using the names
[FreeBSD/FreeBSD.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2011, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #endif
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #if __FreeBSD_version >= 800000
44 #include <sys/buf_ring.h>
45 #endif
46 #include <sys/bus.h>
47 #include <sys/endian.h>
48 #include <sys/kernel.h>
49 #include <sys/kthread.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/rman.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/taskqueue.h>
58 #include <sys/eventhandler.h>
59 #include <machine/bus.h>
60 #include <machine/resource.h>
61
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_arp.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68
69 #include <net/if_types.h>
70 #include <net/if_vlan_var.h>
71
72 #include <netinet/in_systm.h>
73 #include <netinet/in.h>
74 #include <netinet/if_ether.h>
75 #include <netinet/ip.h>
76 #include <netinet/ip6.h>
77 #include <netinet/tcp.h>
78 #include <netinet/udp.h>
79
80 #include <machine/in_cksum.h>
81 #include <dev/led/led.h>
82 #include <dev/pci/pcivar.h>
83 #include <dev/pci/pcireg.h>
84
85 #include "e1000_api.h"
86 #include "e1000_82571.h"
87 #include "if_em.h"
88
89 /*********************************************************************
90  *  Set this to one to display debug statistics
91  *********************************************************************/
92 int     em_display_debug_stats = 0;
93
94 /*********************************************************************
95  *  Driver version:
96  *********************************************************************/
97 char em_driver_version[] = "7.3.2";
98
99 /*********************************************************************
100  *  PCI Device ID Table
101  *
102  *  Used by probe to select devices to load on
103  *  Last field stores an index into e1000_strings
104  *  Last entry must be all 0s
105  *
106  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107  *********************************************************************/
108
109 static em_vendor_info_t em_vendor_info_array[] =
110 {
111         /* Intel(R) PRO/1000 Network Connection */
112         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
113         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
114         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
115         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
117         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
119         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
131
132         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
175         /* required last entry */
176         { 0, 0, 0, 0, 0}
177 };
178
179 /*********************************************************************
180  *  Table of branding strings for all supported NICs.
181  *********************************************************************/
182
183 static char *em_strings[] = {
184         "Intel(R) PRO/1000 Network Connection"
185 };
186
187 /*********************************************************************
188  *  Function prototypes
189  *********************************************************************/
190 static int      em_probe(device_t);
191 static int      em_attach(device_t);
192 static int      em_detach(device_t);
193 static int      em_shutdown(device_t);
194 static int      em_suspend(device_t);
195 static int      em_resume(device_t);
196 #ifdef EM_MULTIQUEUE
197 static int      em_mq_start(struct ifnet *, struct mbuf *);
198 static int      em_mq_start_locked(struct ifnet *,
199                     struct tx_ring *, struct mbuf *);
200 static void     em_qflush(struct ifnet *);
201 #else
202 static void     em_start(struct ifnet *);
203 static void     em_start_locked(struct ifnet *, struct tx_ring *);
204 #endif
205 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
206 static void     em_init(void *);
207 static void     em_init_locked(struct adapter *);
208 static void     em_stop(void *);
209 static void     em_media_status(struct ifnet *, struct ifmediareq *);
210 static int      em_media_change(struct ifnet *);
211 static void     em_identify_hardware(struct adapter *);
212 static int      em_allocate_pci_resources(struct adapter *);
213 static int      em_allocate_legacy(struct adapter *);
214 static int      em_allocate_msix(struct adapter *);
215 static int      em_allocate_queues(struct adapter *);
216 static int      em_setup_msix(struct adapter *);
217 static void     em_free_pci_resources(struct adapter *);
218 static void     em_local_timer(void *);
219 static void     em_reset(struct adapter *);
220 static int      em_setup_interface(device_t, struct adapter *);
221
222 static void     em_setup_transmit_structures(struct adapter *);
223 static void     em_initialize_transmit_unit(struct adapter *);
224 static int      em_allocate_transmit_buffers(struct tx_ring *);
225 static void     em_free_transmit_structures(struct adapter *);
226 static void     em_free_transmit_buffers(struct tx_ring *);
227
228 static int      em_setup_receive_structures(struct adapter *);
229 static int      em_allocate_receive_buffers(struct rx_ring *);
230 static void     em_initialize_receive_unit(struct adapter *);
231 static void     em_free_receive_structures(struct adapter *);
232 static void     em_free_receive_buffers(struct rx_ring *);
233
234 static void     em_enable_intr(struct adapter *);
235 static void     em_disable_intr(struct adapter *);
236 static void     em_update_stats_counters(struct adapter *);
237 static void     em_add_hw_stats(struct adapter *adapter);
238 static void     em_txeof(struct tx_ring *);
239 static bool     em_rxeof(struct rx_ring *, int, int *);
240 #ifndef __NO_STRICT_ALIGNMENT
241 static int      em_fixup_rx(struct rx_ring *);
242 #endif
243 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
244 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
245                     struct ip *, u32 *, u32 *);
246 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
247                     struct tcphdr *, u32 *, u32 *);
248 static void     em_set_promisc(struct adapter *);
249 static void     em_disable_promisc(struct adapter *);
250 static void     em_set_multi(struct adapter *);
251 static void     em_update_link_status(struct adapter *);
252 static void     em_refresh_mbufs(struct rx_ring *, int);
253 static void     em_register_vlan(void *, struct ifnet *, u16);
254 static void     em_unregister_vlan(void *, struct ifnet *, u16);
255 static void     em_setup_vlan_hw_support(struct adapter *);
256 static int      em_xmit(struct tx_ring *, struct mbuf **);
257 static int      em_dma_malloc(struct adapter *, bus_size_t,
258                     struct em_dma_alloc *, int);
259 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
260 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
261 static void     em_print_nvm_info(struct adapter *);
262 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
263 static void     em_print_debug_info(struct adapter *);
264 static int      em_is_valid_ether_addr(u8 *);
265 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
266 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
267                     const char *, struct em_int_delay_info *, int, int);
268 /* Management and WOL Support */
269 static void     em_init_manageability(struct adapter *);
270 static void     em_release_manageability(struct adapter *);
271 static void     em_get_hw_control(struct adapter *);
272 static void     em_release_hw_control(struct adapter *);
273 static void     em_get_wakeup(device_t);
274 static void     em_enable_wakeup(device_t);
275 static int      em_enable_phy_wakeup(struct adapter *);
276 static void     em_led_func(void *, int);
277 static void     em_disable_aspm(struct adapter *);
278
279 static int      em_irq_fast(void *);
280
281 /* MSIX handlers */
282 static void     em_msix_tx(void *);
283 static void     em_msix_rx(void *);
284 static void     em_msix_link(void *);
285 static void     em_handle_tx(void *context, int pending);
286 static void     em_handle_rx(void *context, int pending);
287 static void     em_handle_link(void *context, int pending);
288
289 static void     em_set_sysctl_value(struct adapter *, const char *,
290                     const char *, int *, int);
291 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
292 static int      em_sysctl_eee(SYSCTL_HANDLER_ARGS);
293
294 static __inline void em_rx_discard(struct rx_ring *, int);
295
296 #ifdef DEVICE_POLLING
297 static poll_handler_t em_poll;
298 #endif /* POLLING */
299
300 /*********************************************************************
301  *  FreeBSD Device Interface Entry Points
302  *********************************************************************/
303
304 static device_method_t em_methods[] = {
305         /* Device interface */
306         DEVMETHOD(device_probe, em_probe),
307         DEVMETHOD(device_attach, em_attach),
308         DEVMETHOD(device_detach, em_detach),
309         DEVMETHOD(device_shutdown, em_shutdown),
310         DEVMETHOD(device_suspend, em_suspend),
311         DEVMETHOD(device_resume, em_resume),
312         {0, 0}
313 };
314
315 static driver_t em_driver = {
316         "em", em_methods, sizeof(struct adapter),
317 };
318
319 devclass_t em_devclass;
320 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
321 MODULE_DEPEND(em, pci, 1, 1, 1);
322 MODULE_DEPEND(em, ether, 1, 1, 1);
323
324 /*********************************************************************
325  *  Tunable default values.
326  *********************************************************************/
327
328 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
329 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
330 #define M_TSO_LEN                       66
331
332 /* Allow common code without TSO */
333 #ifndef CSUM_TSO
334 #define CSUM_TSO        0
335 #endif
336
337 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
338
339 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
340 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
341 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
342 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
343 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
344     0, "Default transmit interrupt delay in usecs");
345 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
346     0, "Default receive interrupt delay in usecs");
347
348 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
349 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
350 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
351 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
352 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
353     &em_tx_abs_int_delay_dflt, 0,
354     "Default transmit interrupt delay limit in usecs");
355 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
356     &em_rx_abs_int_delay_dflt, 0,
357     "Default receive interrupt delay limit in usecs");
358
359 static int em_rxd = EM_DEFAULT_RXD;
360 static int em_txd = EM_DEFAULT_TXD;
361 TUNABLE_INT("hw.em.rxd", &em_rxd);
362 TUNABLE_INT("hw.em.txd", &em_txd);
363 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
364     "Number of receive descriptors per queue");
365 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
366     "Number of transmit descriptors per queue");
367
368 static int em_smart_pwr_down = FALSE;
369 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
370 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
371     0, "Set to true to leave smart power down enabled on newer adapters");
372
373 /* Controls whether promiscuous also shows bad packets */
374 static int em_debug_sbp = FALSE;
375 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
376 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
377     "Show bad packets in promiscuous mode");
378
379 static int em_enable_msix = TRUE;
380 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
381 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
382     "Enable MSI-X interrupts");
383
384 /* How many packets rxeof tries to clean at a time */
385 static int em_rx_process_limit = 100;
386 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
387 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
388     &em_rx_process_limit, 0,
389     "Maximum number of received packets to process "
390     "at a time, -1 means unlimited");
391
392 /* Energy efficient ethernet - default to OFF */
393 static int eee_setting = 1;
394 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
395 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
396     "Enable Energy Efficient Ethernet");
397
398 /* Global used in WOL setup with multiport cards */
399 static int global_quad_port_a = 0;
400
401 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
402 #include <dev/netmap/if_em_netmap.h>
403 #endif /* DEV_NETMAP */
404
405 /*********************************************************************
406  *  Device identification routine
407  *
408  *  em_probe determines if the driver should be loaded on
409  *  adapter based on PCI vendor/device id of the adapter.
410  *
411  *  return BUS_PROBE_DEFAULT on success, positive on failure
412  *********************************************************************/
413
414 static int
415 em_probe(device_t dev)
416 {
417         char            adapter_name[60];
418         u16             pci_vendor_id = 0;
419         u16             pci_device_id = 0;
420         u16             pci_subvendor_id = 0;
421         u16             pci_subdevice_id = 0;
422         em_vendor_info_t *ent;
423
424         INIT_DEBUGOUT("em_probe: begin");
425
426         pci_vendor_id = pci_get_vendor(dev);
427         if (pci_vendor_id != EM_VENDOR_ID)
428                 return (ENXIO);
429
430         pci_device_id = pci_get_device(dev);
431         pci_subvendor_id = pci_get_subvendor(dev);
432         pci_subdevice_id = pci_get_subdevice(dev);
433
434         ent = em_vendor_info_array;
435         while (ent->vendor_id != 0) {
436                 if ((pci_vendor_id == ent->vendor_id) &&
437                     (pci_device_id == ent->device_id) &&
438
439                     ((pci_subvendor_id == ent->subvendor_id) ||
440                     (ent->subvendor_id == PCI_ANY_ID)) &&
441
442                     ((pci_subdevice_id == ent->subdevice_id) ||
443                     (ent->subdevice_id == PCI_ANY_ID))) {
444                         sprintf(adapter_name, "%s %s",
445                                 em_strings[ent->index],
446                                 em_driver_version);
447                         device_set_desc_copy(dev, adapter_name);
448                         return (BUS_PROBE_DEFAULT);
449                 }
450                 ent++;
451         }
452
453         return (ENXIO);
454 }
455
456 /*********************************************************************
457  *  Device initialization routine
458  *
459  *  The attach entry point is called when the driver is being loaded.
460  *  This routine identifies the type of hardware, allocates all resources
461  *  and initializes the hardware.
462  *
463  *  return 0 on success, positive on failure
464  *********************************************************************/
465
466 static int
467 em_attach(device_t dev)
468 {
469         struct adapter  *adapter;
470         struct e1000_hw *hw;
471         int             error = 0;
472
473         INIT_DEBUGOUT("em_attach: begin");
474
475         if (resource_disabled("em", device_get_unit(dev))) {
476                 device_printf(dev, "Disabled by device hint\n");
477                 return (ENXIO);
478         }
479
480         adapter = device_get_softc(dev);
481         adapter->dev = adapter->osdep.dev = dev;
482         hw = &adapter->hw;
483         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
484
485         /* SYSCTL stuff */
486         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
487             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
488             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
489             em_sysctl_nvm_info, "I", "NVM Information");
490
491         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
492             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
493             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
494             em_sysctl_debug_info, "I", "Debug Information");
495
496         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
497             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
498             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
499             em_set_flowcntl, "I", "Flow Control");
500
501         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
502
503         /* Determine hardware and mac info */
504         em_identify_hardware(adapter);
505
506         /* Setup PCI resources */
507         if (em_allocate_pci_resources(adapter)) {
508                 device_printf(dev, "Allocation of PCI resources failed\n");
509                 error = ENXIO;
510                 goto err_pci;
511         }
512
513         /*
514         ** For ICH8 and family we need to
515         ** map the flash memory, and this
516         ** must happen after the MAC is 
517         ** identified
518         */
519         if ((hw->mac.type == e1000_ich8lan) ||
520             (hw->mac.type == e1000_ich9lan) ||
521             (hw->mac.type == e1000_ich10lan) ||
522             (hw->mac.type == e1000_pchlan) ||
523             (hw->mac.type == e1000_pch2lan)) {
524                 int rid = EM_BAR_TYPE_FLASH;
525                 adapter->flash = bus_alloc_resource_any(dev,
526                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
527                 if (adapter->flash == NULL) {
528                         device_printf(dev, "Mapping of Flash failed\n");
529                         error = ENXIO;
530                         goto err_pci;
531                 }
532                 /* This is used in the shared code */
533                 hw->flash_address = (u8 *)adapter->flash;
534                 adapter->osdep.flash_bus_space_tag =
535                     rman_get_bustag(adapter->flash);
536                 adapter->osdep.flash_bus_space_handle =
537                     rman_get_bushandle(adapter->flash);
538         }
539
540         /* Do Shared Code initialization */
541         if (e1000_setup_init_funcs(hw, TRUE)) {
542                 device_printf(dev, "Setup of Shared code failed\n");
543                 error = ENXIO;
544                 goto err_pci;
545         }
546
547         e1000_get_bus_info(hw);
548
549         /* Set up some sysctls for the tunable interrupt delays */
550         em_add_int_delay_sysctl(adapter, "rx_int_delay",
551             "receive interrupt delay in usecs", &adapter->rx_int_delay,
552             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
553         em_add_int_delay_sysctl(adapter, "tx_int_delay",
554             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
555             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
556         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
557             "receive interrupt delay limit in usecs",
558             &adapter->rx_abs_int_delay,
559             E1000_REGISTER(hw, E1000_RADV),
560             em_rx_abs_int_delay_dflt);
561         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
562             "transmit interrupt delay limit in usecs",
563             &adapter->tx_abs_int_delay,
564             E1000_REGISTER(hw, E1000_TADV),
565             em_tx_abs_int_delay_dflt);
566
567         /* Sysctl for limiting the amount of work done in the taskqueue */
568         em_set_sysctl_value(adapter, "rx_processing_limit",
569             "max number of rx packets to process", &adapter->rx_process_limit,
570             em_rx_process_limit);
571
572         /*
573          * Validate number of transmit and receive descriptors. It
574          * must not exceed hardware maximum, and must be multiple
575          * of E1000_DBA_ALIGN.
576          */
577         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
578             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
579                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
580                     EM_DEFAULT_TXD, em_txd);
581                 adapter->num_tx_desc = EM_DEFAULT_TXD;
582         } else
583                 adapter->num_tx_desc = em_txd;
584
585         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
586             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
587                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
588                     EM_DEFAULT_RXD, em_rxd);
589                 adapter->num_rx_desc = EM_DEFAULT_RXD;
590         } else
591                 adapter->num_rx_desc = em_rxd;
592
593         hw->mac.autoneg = DO_AUTO_NEG;
594         hw->phy.autoneg_wait_to_complete = FALSE;
595         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
596
597         /* Copper options */
598         if (hw->phy.media_type == e1000_media_type_copper) {
599                 hw->phy.mdix = AUTO_ALL_MODES;
600                 hw->phy.disable_polarity_correction = FALSE;
601                 hw->phy.ms_type = EM_MASTER_SLAVE;
602         }
603
604         /*
605          * Set the frame limits assuming
606          * standard ethernet sized frames.
607          */
608         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
609         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
610
611         /*
612          * This controls when hardware reports transmit completion
613          * status.
614          */
615         hw->mac.report_tx_early = 1;
616
617         /* 
618         ** Get queue/ring memory
619         */
620         if (em_allocate_queues(adapter)) {
621                 error = ENOMEM;
622                 goto err_pci;
623         }
624
625         /* Allocate multicast array memory. */
626         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
627             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
628         if (adapter->mta == NULL) {
629                 device_printf(dev, "Can not allocate multicast setup array\n");
630                 error = ENOMEM;
631                 goto err_late;
632         }
633
634         /* Check SOL/IDER usage */
635         if (e1000_check_reset_block(hw))
636                 device_printf(dev, "PHY reset is blocked"
637                     " due to SOL/IDER session.\n");
638
639         /* Sysctl for setting Energy Efficient Ethernet */
640         hw->dev_spec.ich8lan.eee_disable = eee_setting;
641         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
642             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
643             OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
644             adapter, 0, em_sysctl_eee, "I",
645             "Disable Energy Efficient Ethernet");
646
647         /*
648         ** Start from a known state, this is
649         ** important in reading the nvm and
650         ** mac from that.
651         */
652         e1000_reset_hw(hw);
653
654
655         /* Make sure we have a good EEPROM before we read from it */
656         if (e1000_validate_nvm_checksum(hw) < 0) {
657                 /*
658                 ** Some PCI-E parts fail the first check due to
659                 ** the link being in sleep state, call it again,
660                 ** if it fails a second time its a real issue.
661                 */
662                 if (e1000_validate_nvm_checksum(hw) < 0) {
663                         device_printf(dev,
664                             "The EEPROM Checksum Is Not Valid\n");
665                         error = EIO;
666                         goto err_late;
667                 }
668         }
669
670         /* Copy the permanent MAC address out of the EEPROM */
671         if (e1000_read_mac_addr(hw) < 0) {
672                 device_printf(dev, "EEPROM read error while reading MAC"
673                     " address\n");
674                 error = EIO;
675                 goto err_late;
676         }
677
678         if (!em_is_valid_ether_addr(hw->mac.addr)) {
679                 device_printf(dev, "Invalid MAC address\n");
680                 error = EIO;
681                 goto err_late;
682         }
683
684         /*
685         **  Do interrupt configuration
686         */
687         if (adapter->msix > 1) /* Do MSIX */
688                 error = em_allocate_msix(adapter);
689         else  /* MSI or Legacy */
690                 error = em_allocate_legacy(adapter);
691         if (error)
692                 goto err_late;
693
694         /*
695          * Get Wake-on-Lan and Management info for later use
696          */
697         em_get_wakeup(dev);
698
699         /* Setup OS specific network interface */
700         if (em_setup_interface(dev, adapter) != 0)
701                 goto err_late;
702
703         em_reset(adapter);
704
705         /* Initialize statistics */
706         em_update_stats_counters(adapter);
707
708         hw->mac.get_link_status = 1;
709         em_update_link_status(adapter);
710
711         /* Register for VLAN events */
712         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
713             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
714         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
715             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
716
717         em_add_hw_stats(adapter);
718
719         /* Non-AMT based hardware can now take control from firmware */
720         if (adapter->has_manage && !adapter->has_amt)
721                 em_get_hw_control(adapter);
722
723         /* Tell the stack that the interface is not active */
724         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
725         adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
726
727         adapter->led_dev = led_create(em_led_func, adapter,
728             device_get_nameunit(dev));
729 #ifdef DEV_NETMAP
730         em_netmap_attach(adapter);
731 #endif /* DEV_NETMAP */
732
733         INIT_DEBUGOUT("em_attach: end");
734
735         return (0);
736
737 err_late:
738         em_free_transmit_structures(adapter);
739         em_free_receive_structures(adapter);
740         em_release_hw_control(adapter);
741         if (adapter->ifp != NULL)
742                 if_free(adapter->ifp);
743 err_pci:
744         em_free_pci_resources(adapter);
745         free(adapter->mta, M_DEVBUF);
746         EM_CORE_LOCK_DESTROY(adapter);
747
748         return (error);
749 }
750
751 /*********************************************************************
752  *  Device removal routine
753  *
754  *  The detach entry point is called when the driver is being removed.
755  *  This routine stops the adapter and deallocates all the resources
756  *  that were allocated for driver operation.
757  *
758  *  return 0 on success, positive on failure
759  *********************************************************************/
760
761 static int
762 em_detach(device_t dev)
763 {
764         struct adapter  *adapter = device_get_softc(dev);
765         struct ifnet    *ifp = adapter->ifp;
766
767         INIT_DEBUGOUT("em_detach: begin");
768
769         /* Make sure VLANS are not using driver */
770         if (adapter->ifp->if_vlantrunk != NULL) {
771                 device_printf(dev,"Vlan in use, detach first\n");
772                 return (EBUSY);
773         }
774
775 #ifdef DEVICE_POLLING
776         if (ifp->if_capenable & IFCAP_POLLING)
777                 ether_poll_deregister(ifp);
778 #endif
779
780         if (adapter->led_dev != NULL)
781                 led_destroy(adapter->led_dev);
782
783         EM_CORE_LOCK(adapter);
784         adapter->in_detach = 1;
785         em_stop(adapter);
786         EM_CORE_UNLOCK(adapter);
787         EM_CORE_LOCK_DESTROY(adapter);
788
789         e1000_phy_hw_reset(&adapter->hw);
790
791         em_release_manageability(adapter);
792         em_release_hw_control(adapter);
793
794         /* Unregister VLAN events */
795         if (adapter->vlan_attach != NULL)
796                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
797         if (adapter->vlan_detach != NULL)
798                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
799
800         ether_ifdetach(adapter->ifp);
801         callout_drain(&adapter->timer);
802
803 #ifdef DEV_NETMAP
804         netmap_detach(ifp);
805 #endif /* DEV_NETMAP */
806
807         em_free_pci_resources(adapter);
808         bus_generic_detach(dev);
809         if_free(ifp);
810
811         em_free_transmit_structures(adapter);
812         em_free_receive_structures(adapter);
813
814         em_release_hw_control(adapter);
815         free(adapter->mta, M_DEVBUF);
816
817         return (0);
818 }
819
820 /*********************************************************************
821  *
822  *  Shutdown entry point
823  *
824  **********************************************************************/
825
826 static int
827 em_shutdown(device_t dev)
828 {
829         return em_suspend(dev);
830 }
831
832 /*
833  * Suspend/resume device methods.
834  */
835 static int
836 em_suspend(device_t dev)
837 {
838         struct adapter *adapter = device_get_softc(dev);
839
840         EM_CORE_LOCK(adapter);
841
842         em_release_manageability(adapter);
843         em_release_hw_control(adapter);
844         em_enable_wakeup(dev);
845
846         EM_CORE_UNLOCK(adapter);
847
848         return bus_generic_suspend(dev);
849 }
850
851 static int
852 em_resume(device_t dev)
853 {
854         struct adapter *adapter = device_get_softc(dev);
855         struct tx_ring  *txr = adapter->tx_rings;
856         struct ifnet *ifp = adapter->ifp;
857
858         EM_CORE_LOCK(adapter);
859         if (adapter->hw.mac.type == e1000_pch2lan)
860                 e1000_resume_workarounds_pchlan(&adapter->hw);
861         em_init_locked(adapter);
862         em_init_manageability(adapter);
863
864         if ((ifp->if_flags & IFF_UP) &&
865             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
866                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
867                         EM_TX_LOCK(txr);
868 #ifdef EM_MULTIQUEUE
869                         if (!drbr_empty(ifp, txr->br))
870                                 em_mq_start_locked(ifp, txr, NULL);
871 #else
872                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
873                                 em_start_locked(ifp, txr);
874 #endif
875                         EM_TX_UNLOCK(txr);
876                 }
877         }
878         EM_CORE_UNLOCK(adapter);
879
880         return bus_generic_resume(dev);
881 }
882
883
884 #ifdef EM_MULTIQUEUE
885 /*********************************************************************
886  *  Multiqueue Transmit routines 
887  *
888  *  em_mq_start is called by the stack to initiate a transmit.
889  *  however, if busy the driver can queue the request rather
890  *  than do an immediate send. It is this that is an advantage
891  *  in this driver, rather than also having multiple tx queues.
892  **********************************************************************/
893 static int
894 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
895 {
896         struct adapter  *adapter = txr->adapter;
897         struct mbuf     *next;
898         int             err = 0, enq = 0;
899
900         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
901             IFF_DRV_RUNNING || adapter->link_active == 0) {
902                 if (m != NULL)
903                         err = drbr_enqueue(ifp, txr->br, m);
904                 return (err);
905         }
906
907         enq = 0;
908         if (m == NULL) {
909                 next = drbr_dequeue(ifp, txr->br);
910         } else if (drbr_needs_enqueue(ifp, txr->br)) {
911                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
912                         return (err);
913                 next = drbr_dequeue(ifp, txr->br);
914         } else
915                 next = m;
916
917         /* Process the queue */
918         while (next != NULL) {
919                 if ((err = em_xmit(txr, &next)) != 0) {
920                         if (next != NULL)
921                                 err = drbr_enqueue(ifp, txr->br, next);
922                         break;
923                 }
924                 enq++;
925                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
926                 ETHER_BPF_MTAP(ifp, next);
927                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
928                         break;
929                 next = drbr_dequeue(ifp, txr->br);
930         }
931
932         if (enq > 0) {
933                 /* Set the watchdog */
934                 txr->queue_status = EM_QUEUE_WORKING;
935                 txr->watchdog_time = ticks;
936         }
937
938         if (txr->tx_avail < EM_MAX_SCATTER)
939                 em_txeof(txr);
940         if (txr->tx_avail < EM_MAX_SCATTER)
941                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
942         return (err);
943 }
944
945 /*
946 ** Multiqueue capable stack interface
947 */
948 static int
949 em_mq_start(struct ifnet *ifp, struct mbuf *m)
950 {
951         struct adapter  *adapter = ifp->if_softc;
952         struct tx_ring  *txr = adapter->tx_rings;
953         int             error;
954
955         if (EM_TX_TRYLOCK(txr)) {
956                 error = em_mq_start_locked(ifp, txr, m);
957                 EM_TX_UNLOCK(txr);
958         } else 
959                 error = drbr_enqueue(ifp, txr->br, m);
960
961         return (error);
962 }
963
964 /*
965 ** Flush all ring buffers
966 */
967 static void
968 em_qflush(struct ifnet *ifp)
969 {
970         struct adapter  *adapter = ifp->if_softc;
971         struct tx_ring  *txr = adapter->tx_rings;
972         struct mbuf     *m;
973
974         for (int i = 0; i < adapter->num_queues; i++, txr++) {
975                 EM_TX_LOCK(txr);
976                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
977                         m_freem(m);
978                 EM_TX_UNLOCK(txr);
979         }
980         if_qflush(ifp);
981 }
982 #else  /* !EM_MULTIQUEUE */
983
984 static void
985 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
986 {
987         struct adapter  *adapter = ifp->if_softc;
988         struct mbuf     *m_head;
989
990         EM_TX_LOCK_ASSERT(txr);
991
992         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
993             IFF_DRV_RUNNING)
994                 return;
995
996         if (!adapter->link_active)
997                 return;
998
999         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1000                 /* Call cleanup if number of TX descriptors low */
1001                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1002                         em_txeof(txr);
1003                 if (txr->tx_avail < EM_MAX_SCATTER) {
1004                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1005                         break;
1006                 }
1007                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1008                 if (m_head == NULL)
1009                         break;
1010                 /*
1011                  *  Encapsulation can modify our pointer, and or make it
1012                  *  NULL on failure.  In that event, we can't requeue.
1013                  */
1014                 if (em_xmit(txr, &m_head)) {
1015                         if (m_head == NULL)
1016                                 break;
1017                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1018                         break;
1019                 }
1020
1021                 /* Send a copy of the frame to the BPF listener */
1022                 ETHER_BPF_MTAP(ifp, m_head);
1023
1024                 /* Set timeout in case hardware has problems transmitting. */
1025                 txr->watchdog_time = ticks;
1026                 txr->queue_status = EM_QUEUE_WORKING;
1027         }
1028
1029         return;
1030 }
1031
1032 static void
1033 em_start(struct ifnet *ifp)
1034 {
1035         struct adapter  *adapter = ifp->if_softc;
1036         struct tx_ring  *txr = adapter->tx_rings;
1037
1038         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1039                 EM_TX_LOCK(txr);
1040                 em_start_locked(ifp, txr);
1041                 EM_TX_UNLOCK(txr);
1042         }
1043         return;
1044 }
1045 #endif /* EM_MULTIQUEUE */
1046
1047 /*********************************************************************
1048  *  Ioctl entry point
1049  *
1050  *  em_ioctl is called when the user wants to configure the
1051  *  interface.
1052  *
1053  *  return 0 on success, positive on failure
1054  **********************************************************************/
1055
1056 static int
1057 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1058 {
1059         struct adapter  *adapter = ifp->if_softc;
1060         struct ifreq    *ifr = (struct ifreq *)data;
1061 #if defined(INET) || defined(INET6)
1062         struct ifaddr   *ifa = (struct ifaddr *)data;
1063 #endif
1064         bool            avoid_reset = FALSE;
1065         int             error = 0;
1066
1067         if (adapter->in_detach)
1068                 return (error);
1069
1070         switch (command) {
1071         case SIOCSIFADDR:
1072 #ifdef INET
1073                 if (ifa->ifa_addr->sa_family == AF_INET)
1074                         avoid_reset = TRUE;
1075 #endif
1076 #ifdef INET6
1077                 if (ifa->ifa_addr->sa_family == AF_INET6)
1078                         avoid_reset = TRUE;
1079 #endif
1080                 /*
1081                 ** Calling init results in link renegotiation,
1082                 ** so we avoid doing it when possible.
1083                 */
1084                 if (avoid_reset) {
1085                         ifp->if_flags |= IFF_UP;
1086                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1087                                 em_init(adapter);
1088 #ifdef INET
1089                         if (!(ifp->if_flags & IFF_NOARP))
1090                                 arp_ifinit(ifp, ifa);
1091 #endif
1092                 } else
1093                         error = ether_ioctl(ifp, command, data);
1094                 break;
1095         case SIOCSIFMTU:
1096             {
1097                 int max_frame_size;
1098
1099                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1100
1101                 EM_CORE_LOCK(adapter);
1102                 switch (adapter->hw.mac.type) {
1103                 case e1000_82571:
1104                 case e1000_82572:
1105                 case e1000_ich9lan:
1106                 case e1000_ich10lan:
1107                 case e1000_pch2lan:
1108                 case e1000_82574:
1109                 case e1000_82583:
1110                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1111                         max_frame_size = 9234;
1112                         break;
1113                 case e1000_pchlan:
1114                         max_frame_size = 4096;
1115                         break;
1116                         /* Adapters that do not support jumbo frames */
1117                 case e1000_ich8lan:
1118                         max_frame_size = ETHER_MAX_LEN;
1119                         break;
1120                 default:
1121                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1122                 }
1123                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1124                     ETHER_CRC_LEN) {
1125                         EM_CORE_UNLOCK(adapter);
1126                         error = EINVAL;
1127                         break;
1128                 }
1129
1130                 ifp->if_mtu = ifr->ifr_mtu;
1131                 adapter->max_frame_size =
1132                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1133                 em_init_locked(adapter);
1134                 EM_CORE_UNLOCK(adapter);
1135                 break;
1136             }
1137         case SIOCSIFFLAGS:
1138                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1139                     SIOCSIFFLAGS (Set Interface Flags)");
1140                 EM_CORE_LOCK(adapter);
1141                 if (ifp->if_flags & IFF_UP) {
1142                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1143                                 if ((ifp->if_flags ^ adapter->if_flags) &
1144                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1145                                         em_disable_promisc(adapter);
1146                                         em_set_promisc(adapter);
1147                                 }
1148                         } else
1149                                 em_init_locked(adapter);
1150                 } else
1151                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1152                                 em_stop(adapter);
1153                 adapter->if_flags = ifp->if_flags;
1154                 EM_CORE_UNLOCK(adapter);
1155                 break;
1156         case SIOCADDMULTI:
1157         case SIOCDELMULTI:
1158                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1159                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1160                         EM_CORE_LOCK(adapter);
1161                         em_disable_intr(adapter);
1162                         em_set_multi(adapter);
1163 #ifdef DEVICE_POLLING
1164                         if (!(ifp->if_capenable & IFCAP_POLLING))
1165 #endif
1166                                 em_enable_intr(adapter);
1167                         EM_CORE_UNLOCK(adapter);
1168                 }
1169                 break;
1170         case SIOCSIFMEDIA:
1171                 /* Check SOL/IDER usage */
1172                 EM_CORE_LOCK(adapter);
1173                 if (e1000_check_reset_block(&adapter->hw)) {
1174                         EM_CORE_UNLOCK(adapter);
1175                         device_printf(adapter->dev, "Media change is"
1176                             " blocked due to SOL/IDER session.\n");
1177                         break;
1178                 }
1179                 EM_CORE_UNLOCK(adapter);
1180                 /* falls thru */
1181         case SIOCGIFMEDIA:
1182                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1183                     SIOCxIFMEDIA (Get/Set Interface Media)");
1184                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1185                 break;
1186         case SIOCSIFCAP:
1187             {
1188                 int mask, reinit;
1189
1190                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1191                 reinit = 0;
1192                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1193 #ifdef DEVICE_POLLING
1194                 if (mask & IFCAP_POLLING) {
1195                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1196                                 error = ether_poll_register(em_poll, ifp);
1197                                 if (error)
1198                                         return (error);
1199                                 EM_CORE_LOCK(adapter);
1200                                 em_disable_intr(adapter);
1201                                 ifp->if_capenable |= IFCAP_POLLING;
1202                                 EM_CORE_UNLOCK(adapter);
1203                         } else {
1204                                 error = ether_poll_deregister(ifp);
1205                                 /* Enable interrupt even in error case */
1206                                 EM_CORE_LOCK(adapter);
1207                                 em_enable_intr(adapter);
1208                                 ifp->if_capenable &= ~IFCAP_POLLING;
1209                                 EM_CORE_UNLOCK(adapter);
1210                         }
1211                 }
1212 #endif
1213                 if (mask & IFCAP_HWCSUM) {
1214                         ifp->if_capenable ^= IFCAP_HWCSUM;
1215                         reinit = 1;
1216                 }
1217                 if (mask & IFCAP_TSO4) {
1218                         ifp->if_capenable ^= IFCAP_TSO4;
1219                         reinit = 1;
1220                 }
1221                 if (mask & IFCAP_VLAN_HWTAGGING) {
1222                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1223                         reinit = 1;
1224                 }
1225                 if (mask & IFCAP_VLAN_HWFILTER) {
1226                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1227                         reinit = 1;
1228                 }
1229                 if (mask & IFCAP_VLAN_HWTSO) {
1230                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1231                         reinit = 1;
1232                 }
1233                 if ((mask & IFCAP_WOL) &&
1234                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1235                         if (mask & IFCAP_WOL_MCAST)
1236                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1237                         if (mask & IFCAP_WOL_MAGIC)
1238                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1239                 }
1240                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1241                         em_init(adapter);
1242                 VLAN_CAPABILITIES(ifp);
1243                 break;
1244             }
1245
1246         default:
1247                 error = ether_ioctl(ifp, command, data);
1248                 break;
1249         }
1250
1251         return (error);
1252 }
1253
1254
1255 /*********************************************************************
1256  *  Init entry point
1257  *
1258  *  This routine is used in two ways. It is used by the stack as
1259  *  init entry point in network interface structure. It is also used
1260  *  by the driver as a hw/sw initialization routine to get to a
1261  *  consistent state.
1262  *
1263  *  return 0 on success, positive on failure
1264  **********************************************************************/
1265
1266 static void
1267 em_init_locked(struct adapter *adapter)
1268 {
1269         struct ifnet    *ifp = adapter->ifp;
1270         device_t        dev = adapter->dev;
1271
1272         INIT_DEBUGOUT("em_init: begin");
1273
1274         EM_CORE_LOCK_ASSERT(adapter);
1275
1276         em_disable_intr(adapter);
1277         callout_stop(&adapter->timer);
1278
1279         /* Get the latest mac address, User can use a LAA */
1280         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1281               ETHER_ADDR_LEN);
1282
1283         /* Put the address into the Receive Address Array */
1284         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1285
1286         /*
1287          * With the 82571 adapter, RAR[0] may be overwritten
1288          * when the other port is reset, we make a duplicate
1289          * in RAR[14] for that eventuality, this assures
1290          * the interface continues to function.
1291          */
1292         if (adapter->hw.mac.type == e1000_82571) {
1293                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1294                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1295                     E1000_RAR_ENTRIES - 1);
1296         }
1297
1298         /* Initialize the hardware */
1299         em_reset(adapter);
1300         em_update_link_status(adapter);
1301
1302         /* Setup VLAN support, basic and offload if available */
1303         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1304
1305         /* Set hardware offload abilities */
1306         ifp->if_hwassist = 0;
1307         if (ifp->if_capenable & IFCAP_TXCSUM)
1308                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1309         if (ifp->if_capenable & IFCAP_TSO4)
1310                 ifp->if_hwassist |= CSUM_TSO;
1311
1312         /* Configure for OS presence */
1313         em_init_manageability(adapter);
1314
1315         /* Prepare transmit descriptors and buffers */
1316         em_setup_transmit_structures(adapter);
1317         em_initialize_transmit_unit(adapter);
1318
1319         /* Setup Multicast table */
1320         em_set_multi(adapter);
1321
1322         /*
1323         ** Figure out the desired mbuf
1324         ** pool for doing jumbos
1325         */
1326         if (adapter->max_frame_size <= 2048)
1327                 adapter->rx_mbuf_sz = MCLBYTES;
1328         else if (adapter->max_frame_size <= 4096)
1329                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1330         else
1331                 adapter->rx_mbuf_sz = MJUM9BYTES;
1332
1333         /* Prepare receive descriptors and buffers */
1334         if (em_setup_receive_structures(adapter)) {
1335                 device_printf(dev, "Could not setup receive structures\n");
1336                 em_stop(adapter);
1337                 return;
1338         }
1339         em_initialize_receive_unit(adapter);
1340
1341         /* Use real VLAN Filter support? */
1342         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1343                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1344                         /* Use real VLAN Filter support */
1345                         em_setup_vlan_hw_support(adapter);
1346                 else {
1347                         u32 ctrl;
1348                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1349                         ctrl |= E1000_CTRL_VME;
1350                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1351                 }
1352         }
1353
1354         /* Don't lose promiscuous settings */
1355         em_set_promisc(adapter);
1356
1357         /* Set the interface as ACTIVE */
1358         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1359         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1360
1361         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1362         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1363
1364         /* MSI/X configuration for 82574 */
1365         if (adapter->hw.mac.type == e1000_82574) {
1366                 int tmp;
1367                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1368                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1369                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1370                 /* Set the IVAR - interrupt vector routing. */
1371                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1372         }
1373
1374 #ifdef DEVICE_POLLING
1375         /*
1376          * Only enable interrupts if we are not polling, make sure
1377          * they are off otherwise.
1378          */
1379         if (ifp->if_capenable & IFCAP_POLLING)
1380                 em_disable_intr(adapter);
1381         else
1382 #endif /* DEVICE_POLLING */
1383                 em_enable_intr(adapter);
1384
1385         /* AMT based hardware can now take control from firmware */
1386         if (adapter->has_manage && adapter->has_amt)
1387                 em_get_hw_control(adapter);
1388 }
1389
1390 static void
1391 em_init(void *arg)
1392 {
1393         struct adapter *adapter = arg;
1394
1395         EM_CORE_LOCK(adapter);
1396         em_init_locked(adapter);
1397         EM_CORE_UNLOCK(adapter);
1398 }
1399
1400
1401 #ifdef DEVICE_POLLING
1402 /*********************************************************************
1403  *
1404  *  Legacy polling routine: note this only works with single queue
1405  *
1406  *********************************************************************/
1407 static int
1408 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1409 {
1410         struct adapter *adapter = ifp->if_softc;
1411         struct tx_ring  *txr = adapter->tx_rings;
1412         struct rx_ring  *rxr = adapter->rx_rings;
1413         u32             reg_icr;
1414         int             rx_done;
1415
1416         EM_CORE_LOCK(adapter);
1417         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1418                 EM_CORE_UNLOCK(adapter);
1419                 return (0);
1420         }
1421
1422         if (cmd == POLL_AND_CHECK_STATUS) {
1423                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1424                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1425                         callout_stop(&adapter->timer);
1426                         adapter->hw.mac.get_link_status = 1;
1427                         em_update_link_status(adapter);
1428                         callout_reset(&adapter->timer, hz,
1429                             em_local_timer, adapter);
1430                 }
1431         }
1432         EM_CORE_UNLOCK(adapter);
1433
1434         em_rxeof(rxr, count, &rx_done);
1435
1436         EM_TX_LOCK(txr);
1437         em_txeof(txr);
1438 #ifdef EM_MULTIQUEUE
1439         if (!drbr_empty(ifp, txr->br))
1440                 em_mq_start_locked(ifp, txr, NULL);
1441 #else
1442         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1443                 em_start_locked(ifp, txr);
1444 #endif
1445         EM_TX_UNLOCK(txr);
1446
1447         return (rx_done);
1448 }
1449 #endif /* DEVICE_POLLING */
1450
1451
1452 /*********************************************************************
1453  *
1454  *  Fast Legacy/MSI Combined Interrupt Service routine  
1455  *
1456  *********************************************************************/
1457 static int
1458 em_irq_fast(void *arg)
1459 {
1460         struct adapter  *adapter = arg;
1461         struct ifnet    *ifp;
1462         u32             reg_icr;
1463
1464         ifp = adapter->ifp;
1465
1466         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1467
1468         /* Hot eject?  */
1469         if (reg_icr == 0xffffffff)
1470                 return FILTER_STRAY;
1471
1472         /* Definitely not our interrupt.  */
1473         if (reg_icr == 0x0)
1474                 return FILTER_STRAY;
1475
1476         /*
1477          * Starting with the 82571 chip, bit 31 should be used to
1478          * determine whether the interrupt belongs to us.
1479          */
1480         if (adapter->hw.mac.type >= e1000_82571 &&
1481             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1482                 return FILTER_STRAY;
1483
1484         em_disable_intr(adapter);
1485         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1486
1487         /* Link status change */
1488         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1489                 adapter->hw.mac.get_link_status = 1;
1490                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1491         }
1492
1493         if (reg_icr & E1000_ICR_RXO)
1494                 adapter->rx_overruns++;
1495         return FILTER_HANDLED;
1496 }
1497
1498 /* Combined RX/TX handler, used by Legacy and MSI */
1499 static void
1500 em_handle_que(void *context, int pending)
1501 {
1502         struct adapter  *adapter = context;
1503         struct ifnet    *ifp = adapter->ifp;
1504         struct tx_ring  *txr = adapter->tx_rings;
1505         struct rx_ring  *rxr = adapter->rx_rings;
1506
1507
1508         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1509                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1510                 EM_TX_LOCK(txr);
1511                 em_txeof(txr);
1512 #ifdef EM_MULTIQUEUE
1513                 if (!drbr_empty(ifp, txr->br))
1514                         em_mq_start_locked(ifp, txr, NULL);
1515 #else
1516                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1517                         em_start_locked(ifp, txr);
1518 #endif
1519                 EM_TX_UNLOCK(txr);
1520                 if (more) {
1521                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1522                         return;
1523                 }
1524         }
1525
1526         em_enable_intr(adapter);
1527         return;
1528 }
1529
1530
1531 /*********************************************************************
1532  *
1533  *  MSIX Interrupt Service Routines
1534  *
1535  **********************************************************************/
1536 static void
1537 em_msix_tx(void *arg)
1538 {
1539         struct tx_ring *txr = arg;
1540         struct adapter *adapter = txr->adapter;
1541         struct ifnet    *ifp = adapter->ifp;
1542
1543         ++txr->tx_irq;
1544         EM_TX_LOCK(txr);
1545         em_txeof(txr);
1546 #ifdef EM_MULTIQUEUE
1547         if (!drbr_empty(ifp, txr->br))
1548                 em_mq_start_locked(ifp, txr, NULL);
1549 #else
1550         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1551                 em_start_locked(ifp, txr);
1552 #endif
1553         /* Reenable this interrupt */
1554         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1555         EM_TX_UNLOCK(txr);
1556         return;
1557 }
1558
1559 /*********************************************************************
1560  *
1561  *  MSIX RX Interrupt Service routine
1562  *
1563  **********************************************************************/
1564
1565 static void
1566 em_msix_rx(void *arg)
1567 {
1568         struct rx_ring  *rxr = arg;
1569         struct adapter  *adapter = rxr->adapter;
1570         bool            more;
1571
1572         ++rxr->rx_irq;
1573         if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1574                 return;
1575         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1576         if (more)
1577                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1578         else
1579                 /* Reenable this interrupt */
1580                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1581         return;
1582 }
1583
1584 /*********************************************************************
1585  *
1586  *  MSIX Link Fast Interrupt Service routine
1587  *
1588  **********************************************************************/
1589 static void
1590 em_msix_link(void *arg)
1591 {
1592         struct adapter  *adapter = arg;
1593         u32             reg_icr;
1594
1595         ++adapter->link_irq;
1596         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1597
1598         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1599                 adapter->hw.mac.get_link_status = 1;
1600                 em_handle_link(adapter, 0);
1601         } else
1602                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1603                     EM_MSIX_LINK | E1000_IMS_LSC);
1604         return;
1605 }
1606
1607 static void
1608 em_handle_rx(void *context, int pending)
1609 {
1610         struct rx_ring  *rxr = context;
1611         struct adapter  *adapter = rxr->adapter;
1612         bool            more;
1613
1614         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1615         if (more)
1616                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1617         else
1618                 /* Reenable this interrupt */
1619                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1620 }
1621
1622 static void
1623 em_handle_tx(void *context, int pending)
1624 {
1625         struct tx_ring  *txr = context;
1626         struct adapter  *adapter = txr->adapter;
1627         struct ifnet    *ifp = adapter->ifp;
1628
1629         EM_TX_LOCK(txr);
1630         em_txeof(txr);
1631 #ifdef EM_MULTIQUEUE
1632         if (!drbr_empty(ifp, txr->br))
1633                 em_mq_start_locked(ifp, txr, NULL);
1634 #else
1635         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1636                 em_start_locked(ifp, txr);
1637 #endif
1638         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1639         EM_TX_UNLOCK(txr);
1640 }
1641
1642 static void
1643 em_handle_link(void *context, int pending)
1644 {
1645         struct adapter  *adapter = context;
1646         struct tx_ring  *txr = adapter->tx_rings;
1647         struct ifnet *ifp = adapter->ifp;
1648
1649         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1650                 return;
1651
1652         EM_CORE_LOCK(adapter);
1653         callout_stop(&adapter->timer);
1654         em_update_link_status(adapter);
1655         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1656         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1657             EM_MSIX_LINK | E1000_IMS_LSC);
1658         if (adapter->link_active) {
1659                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1660                         EM_TX_LOCK(txr);
1661 #ifdef EM_MULTIQUEUE
1662                         if (!drbr_empty(ifp, txr->br))
1663                                 em_mq_start_locked(ifp, txr, NULL);
1664 #else
1665                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1666                                 em_start_locked(ifp, txr);
1667 #endif
1668                         EM_TX_UNLOCK(txr);
1669                 }
1670         }
1671         EM_CORE_UNLOCK(adapter);
1672 }
1673
1674
1675 /*********************************************************************
1676  *
1677  *  Media Ioctl callback
1678  *
1679  *  This routine is called whenever the user queries the status of
1680  *  the interface using ifconfig.
1681  *
1682  **********************************************************************/
1683 static void
1684 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1685 {
1686         struct adapter *adapter = ifp->if_softc;
1687         u_char fiber_type = IFM_1000_SX;
1688
1689         INIT_DEBUGOUT("em_media_status: begin");
1690
1691         EM_CORE_LOCK(adapter);
1692         em_update_link_status(adapter);
1693
1694         ifmr->ifm_status = IFM_AVALID;
1695         ifmr->ifm_active = IFM_ETHER;
1696
1697         if (!adapter->link_active) {
1698                 EM_CORE_UNLOCK(adapter);
1699                 return;
1700         }
1701
1702         ifmr->ifm_status |= IFM_ACTIVE;
1703
1704         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1705             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1706                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1707         } else {
1708                 switch (adapter->link_speed) {
1709                 case 10:
1710                         ifmr->ifm_active |= IFM_10_T;
1711                         break;
1712                 case 100:
1713                         ifmr->ifm_active |= IFM_100_TX;
1714                         break;
1715                 case 1000:
1716                         ifmr->ifm_active |= IFM_1000_T;
1717                         break;
1718                 }
1719                 if (adapter->link_duplex == FULL_DUPLEX)
1720                         ifmr->ifm_active |= IFM_FDX;
1721                 else
1722                         ifmr->ifm_active |= IFM_HDX;
1723         }
1724         EM_CORE_UNLOCK(adapter);
1725 }
1726
1727 /*********************************************************************
1728  *
1729  *  Media Ioctl callback
1730  *
1731  *  This routine is called when the user changes speed/duplex using
1732  *  media/mediopt option with ifconfig.
1733  *
1734  **********************************************************************/
1735 static int
1736 em_media_change(struct ifnet *ifp)
1737 {
1738         struct adapter *adapter = ifp->if_softc;
1739         struct ifmedia  *ifm = &adapter->media;
1740
1741         INIT_DEBUGOUT("em_media_change: begin");
1742
1743         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1744                 return (EINVAL);
1745
1746         EM_CORE_LOCK(adapter);
1747         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1748         case IFM_AUTO:
1749                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1750                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1751                 break;
1752         case IFM_1000_LX:
1753         case IFM_1000_SX:
1754         case IFM_1000_T:
1755                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1756                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1757                 break;
1758         case IFM_100_TX:
1759                 adapter->hw.mac.autoneg = FALSE;
1760                 adapter->hw.phy.autoneg_advertised = 0;
1761                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1762                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1763                 else
1764                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1765                 break;
1766         case IFM_10_T:
1767                 adapter->hw.mac.autoneg = FALSE;
1768                 adapter->hw.phy.autoneg_advertised = 0;
1769                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1770                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1771                 else
1772                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1773                 break;
1774         default:
1775                 device_printf(adapter->dev, "Unsupported media type\n");
1776         }
1777
1778         em_init_locked(adapter);
1779         EM_CORE_UNLOCK(adapter);
1780
1781         return (0);
1782 }
1783
1784 /*********************************************************************
1785  *
1786  *  This routine maps the mbufs to tx descriptors.
1787  *
1788  *  return 0 on success, positive on failure
1789  **********************************************************************/
1790
1791 static int
1792 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1793 {
1794         struct adapter          *adapter = txr->adapter;
1795         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1796         bus_dmamap_t            map;
1797         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1798         struct e1000_tx_desc    *ctxd = NULL;
1799         struct mbuf             *m_head;
1800         struct ether_header     *eh;
1801         struct ip               *ip = NULL;
1802         struct tcphdr           *tp = NULL;
1803         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1804         int                     ip_off, poff;
1805         int                     nsegs, i, j, first, last = 0;
1806         int                     error, do_tso, tso_desc = 0, remap = 1;
1807
1808 retry:
1809         m_head = *m_headp;
1810         txd_upper = txd_lower = txd_used = txd_saved = 0;
1811         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1812         ip_off = poff = 0;
1813
1814         /*
1815          * Intel recommends entire IP/TCP header length reside in a single
1816          * buffer. If multiple descriptors are used to describe the IP and
1817          * TCP header, each descriptor should describe one or more
1818          * complete headers; descriptors referencing only parts of headers
1819          * are not supported. If all layer headers are not coalesced into
1820          * a single buffer, each buffer should not cross a 4KB boundary,
1821          * or be larger than the maximum read request size.
1822          * Controller also requires modifing IP/TCP header to make TSO work
1823          * so we firstly get a writable mbuf chain then coalesce ethernet/
1824          * IP/TCP header into a single buffer to meet the requirement of
1825          * controller. This also simplifies IP/TCP/UDP checksum offloading
1826          * which also has similiar restrictions.
1827          */
1828         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1829                 if (do_tso || (m_head->m_next != NULL && 
1830                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1831                         if (M_WRITABLE(*m_headp) == 0) {
1832                                 m_head = m_dup(*m_headp, M_DONTWAIT);
1833                                 m_freem(*m_headp);
1834                                 if (m_head == NULL) {
1835                                         *m_headp = NULL;
1836                                         return (ENOBUFS);
1837                                 }
1838                                 *m_headp = m_head;
1839                         }
1840                 }
1841                 /*
1842                  * XXX
1843                  * Assume IPv4, we don't have TSO/checksum offload support
1844                  * for IPv6 yet.
1845                  */
1846                 ip_off = sizeof(struct ether_header);
1847                 m_head = m_pullup(m_head, ip_off);
1848                 if (m_head == NULL) {
1849                         *m_headp = NULL;
1850                         return (ENOBUFS);
1851                 }
1852                 eh = mtod(m_head, struct ether_header *);
1853                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1854                         ip_off = sizeof(struct ether_vlan_header);
1855                         m_head = m_pullup(m_head, ip_off);
1856                         if (m_head == NULL) {
1857                                 *m_headp = NULL;
1858                                 return (ENOBUFS);
1859                         }
1860                 }
1861                 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1862                 if (m_head == NULL) {
1863                         *m_headp = NULL;
1864                         return (ENOBUFS);
1865                 }
1866                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1867                 poff = ip_off + (ip->ip_hl << 2);
1868                 if (do_tso) {
1869                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1870                         if (m_head == NULL) {
1871                                 *m_headp = NULL;
1872                                 return (ENOBUFS);
1873                         }
1874                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1875                         /*
1876                          * TSO workaround:
1877                          *   pull 4 more bytes of data into it.
1878                          */
1879                         m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1880                         if (m_head == NULL) {
1881                                 *m_headp = NULL;
1882                                 return (ENOBUFS);
1883                         }
1884                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1885                         ip->ip_len = 0;
1886                         ip->ip_sum = 0;
1887                         /*
1888                          * The pseudo TCP checksum does not include TCP payload
1889                          * length so driver should recompute the checksum here
1890                          * what hardware expect to see. This is adherence of
1891                          * Microsoft's Large Send specification.
1892                          */
1893                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1894                         tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1895                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1896                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1897                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1898                         if (m_head == NULL) {
1899                                 *m_headp = NULL;
1900                                 return (ENOBUFS);
1901                         }
1902                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1903                         m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1904                         if (m_head == NULL) {
1905                                 *m_headp = NULL;
1906                                 return (ENOBUFS);
1907                         }
1908                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1909                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1910                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1911                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1912                         if (m_head == NULL) {
1913                                 *m_headp = NULL;
1914                                 return (ENOBUFS);
1915                         }
1916                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1917                 }
1918                 *m_headp = m_head;
1919         }
1920
1921         /*
1922          * Map the packet for DMA
1923          *
1924          * Capture the first descriptor index,
1925          * this descriptor will have the index
1926          * of the EOP which is the only one that
1927          * now gets a DONE bit writeback.
1928          */
1929         first = txr->next_avail_desc;
1930         tx_buffer = &txr->tx_buffers[first];
1931         tx_buffer_mapped = tx_buffer;
1932         map = tx_buffer->map;
1933
1934         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1935             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1936
1937         /*
1938          * There are two types of errors we can (try) to handle:
1939          * - EFBIG means the mbuf chain was too long and bus_dma ran
1940          *   out of segments.  Defragment the mbuf chain and try again.
1941          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1942          *   at this point in time.  Defer sending and try again later.
1943          * All other errors, in particular EINVAL, are fatal and prevent the
1944          * mbuf chain from ever going through.  Drop it and report error.
1945          */
1946         if (error == EFBIG && remap) {
1947                 struct mbuf *m;
1948
1949                 m = m_defrag(*m_headp, M_DONTWAIT);
1950                 if (m == NULL) {
1951                         adapter->mbuf_alloc_failed++;
1952                         m_freem(*m_headp);
1953                         *m_headp = NULL;
1954                         return (ENOBUFS);
1955                 }
1956                 *m_headp = m;
1957
1958                 /* Try it again, but only once */
1959                 remap = 0;
1960                 goto retry;
1961         } else if (error == ENOMEM) {
1962                 adapter->no_tx_dma_setup++;
1963                 return (error);
1964         } else if (error != 0) {
1965                 adapter->no_tx_dma_setup++;
1966                 m_freem(*m_headp);
1967                 *m_headp = NULL;
1968                 return (error);
1969         }
1970
1971         /*
1972          * TSO Hardware workaround, if this packet is not
1973          * TSO, and is only a single descriptor long, and
1974          * it follows a TSO burst, then we need to add a
1975          * sentinel descriptor to prevent premature writeback.
1976          */
1977         if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1978                 if (nsegs == 1)
1979                         tso_desc = TRUE;
1980                 txr->tx_tso = FALSE;
1981         }
1982
1983         if (nsegs > (txr->tx_avail - 2)) {
1984                 txr->no_desc_avail++;
1985                 bus_dmamap_unload(txr->txtag, map);
1986                 return (ENOBUFS);
1987         }
1988         m_head = *m_headp;
1989
1990         /* Do hardware assists */
1991         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1992                 em_tso_setup(txr, m_head, ip_off, ip, tp,
1993                     &txd_upper, &txd_lower);
1994                 /* we need to make a final sentinel transmit desc */
1995                 tso_desc = TRUE;
1996         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1997                 em_transmit_checksum_setup(txr, m_head,
1998                     ip_off, ip, &txd_upper, &txd_lower);
1999
2000         if (m_head->m_flags & M_VLANTAG) {
2001                 /* Set the vlan id. */
2002                 txd_upper |=
2003                     (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2004                 /* Tell hardware to add tag */
2005                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2006         }
2007
2008         i = txr->next_avail_desc;
2009
2010         /* Set up our transmit descriptors */
2011         for (j = 0; j < nsegs; j++) {
2012                 bus_size_t seg_len;
2013                 bus_addr_t seg_addr;
2014
2015                 tx_buffer = &txr->tx_buffers[i];
2016                 ctxd = &txr->tx_base[i];
2017                 seg_addr = segs[j].ds_addr;
2018                 seg_len  = segs[j].ds_len;
2019                 /*
2020                 ** TSO Workaround:
2021                 ** If this is the last descriptor, we want to
2022                 ** split it so we have a small final sentinel
2023                 */
2024                 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2025                         seg_len -= 4;
2026                         ctxd->buffer_addr = htole64(seg_addr);
2027                         ctxd->lower.data = htole32(
2028                         adapter->txd_cmd | txd_lower | seg_len);
2029                         ctxd->upper.data =
2030                             htole32(txd_upper);
2031                         if (++i == adapter->num_tx_desc)
2032                                 i = 0;
2033                         /* Now make the sentinel */     
2034                         ++txd_used; /* using an extra txd */
2035                         ctxd = &txr->tx_base[i];
2036                         tx_buffer = &txr->tx_buffers[i];
2037                         ctxd->buffer_addr =
2038                             htole64(seg_addr + seg_len);
2039                         ctxd->lower.data = htole32(
2040                         adapter->txd_cmd | txd_lower | 4);
2041                         ctxd->upper.data =
2042                             htole32(txd_upper);
2043                         last = i;
2044                         if (++i == adapter->num_tx_desc)
2045                                 i = 0;
2046                 } else {
2047                         ctxd->buffer_addr = htole64(seg_addr);
2048                         ctxd->lower.data = htole32(
2049                         adapter->txd_cmd | txd_lower | seg_len);
2050                         ctxd->upper.data =
2051                             htole32(txd_upper);
2052                         last = i;
2053                         if (++i == adapter->num_tx_desc)
2054                                 i = 0;
2055                 }
2056                 tx_buffer->m_head = NULL;
2057                 tx_buffer->next_eop = -1;
2058         }
2059
2060         txr->next_avail_desc = i;
2061         txr->tx_avail -= nsegs;
2062         if (tso_desc) /* TSO used an extra for sentinel */
2063                 txr->tx_avail -= txd_used;
2064
2065         tx_buffer->m_head = m_head;
2066         /*
2067         ** Here we swap the map so the last descriptor,
2068         ** which gets the completion interrupt has the
2069         ** real map, and the first descriptor gets the
2070         ** unused map from this descriptor.
2071         */
2072         tx_buffer_mapped->map = tx_buffer->map;
2073         tx_buffer->map = map;
2074         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2075
2076         /*
2077          * Last Descriptor of Packet
2078          * needs End Of Packet (EOP)
2079          * and Report Status (RS)
2080          */
2081         ctxd->lower.data |=
2082             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2083         /*
2084          * Keep track in the first buffer which
2085          * descriptor will be written back
2086          */
2087         tx_buffer = &txr->tx_buffers[first];
2088         tx_buffer->next_eop = last;
2089         /* Update the watchdog time early and often */
2090         txr->watchdog_time = ticks;
2091
2092         /*
2093          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2094          * that this frame is available to transmit.
2095          */
2096         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2097             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2098         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2099
2100         return (0);
2101 }
2102
2103 static void
2104 em_set_promisc(struct adapter *adapter)
2105 {
2106         struct ifnet    *ifp = adapter->ifp;
2107         u32             reg_rctl;
2108
2109         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2110
2111         if (ifp->if_flags & IFF_PROMISC) {
2112                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2113                 /* Turn this on if you want to see bad packets */
2114                 if (em_debug_sbp)
2115                         reg_rctl |= E1000_RCTL_SBP;
2116                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2117         } else if (ifp->if_flags & IFF_ALLMULTI) {
2118                 reg_rctl |= E1000_RCTL_MPE;
2119                 reg_rctl &= ~E1000_RCTL_UPE;
2120                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2121         }
2122 }
2123
2124 static void
2125 em_disable_promisc(struct adapter *adapter)
2126 {
2127         u32     reg_rctl;
2128
2129         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2130
2131         reg_rctl &=  (~E1000_RCTL_UPE);
2132         reg_rctl &=  (~E1000_RCTL_MPE);
2133         reg_rctl &=  (~E1000_RCTL_SBP);
2134         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2135 }
2136
2137
2138 /*********************************************************************
2139  *  Multicast Update
2140  *
2141  *  This routine is called whenever multicast address list is updated.
2142  *
2143  **********************************************************************/
2144
2145 static void
2146 em_set_multi(struct adapter *adapter)
2147 {
2148         struct ifnet    *ifp = adapter->ifp;
2149         struct ifmultiaddr *ifma;
2150         u32 reg_rctl = 0;
2151         u8  *mta; /* Multicast array memory */
2152         int mcnt = 0;
2153
2154         IOCTL_DEBUGOUT("em_set_multi: begin");
2155
2156         mta = adapter->mta;
2157         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2158
2159         if (adapter->hw.mac.type == e1000_82542 && 
2160             adapter->hw.revision_id == E1000_REVISION_2) {
2161                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2162                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2163                         e1000_pci_clear_mwi(&adapter->hw);
2164                 reg_rctl |= E1000_RCTL_RST;
2165                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2166                 msec_delay(5);
2167         }
2168
2169 #if __FreeBSD_version < 800000
2170         IF_ADDR_LOCK(ifp);
2171 #else
2172         if_maddr_rlock(ifp);
2173 #endif
2174         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2175                 if (ifma->ifma_addr->sa_family != AF_LINK)
2176                         continue;
2177
2178                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2179                         break;
2180
2181                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2182                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2183                 mcnt++;
2184         }
2185 #if __FreeBSD_version < 800000
2186         IF_ADDR_UNLOCK(ifp);
2187 #else
2188         if_maddr_runlock(ifp);
2189 #endif
2190         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2191                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2192                 reg_rctl |= E1000_RCTL_MPE;
2193                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2194         } else
2195                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2196
2197         if (adapter->hw.mac.type == e1000_82542 && 
2198             adapter->hw.revision_id == E1000_REVISION_2) {
2199                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2200                 reg_rctl &= ~E1000_RCTL_RST;
2201                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2202                 msec_delay(5);
2203                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2204                         e1000_pci_set_mwi(&adapter->hw);
2205         }
2206 }
2207
2208
2209 /*********************************************************************
2210  *  Timer routine
2211  *
2212  *  This routine checks for link status and updates statistics.
2213  *
2214  **********************************************************************/
2215
2216 static void
2217 em_local_timer(void *arg)
2218 {
2219         struct adapter  *adapter = arg;
2220         struct ifnet    *ifp = adapter->ifp;
2221         struct tx_ring  *txr = adapter->tx_rings;
2222         struct rx_ring  *rxr = adapter->rx_rings;
2223         u32             trigger;
2224
2225         EM_CORE_LOCK_ASSERT(adapter);
2226
2227         em_update_link_status(adapter);
2228         em_update_stats_counters(adapter);
2229
2230         /* Reset LAA into RAR[0] on 82571 */
2231         if ((adapter->hw.mac.type == e1000_82571) &&
2232             e1000_get_laa_state_82571(&adapter->hw))
2233                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2234
2235         /* Mask to use in the irq trigger */
2236         if (adapter->msix_mem)
2237                 trigger = rxr->ims; /* RX for 82574 */
2238         else
2239                 trigger = E1000_ICS_RXDMT0;
2240
2241         /*
2242         ** Check on the state of the TX queue(s), this 
2243         ** can be done without the lock because its RO
2244         ** and the HUNG state will be static if set.
2245         */
2246         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2247                 if ((txr->queue_status == EM_QUEUE_HUNG) &&
2248                     (adapter->pause_frames == 0))
2249                         goto hung;
2250                 /* Schedule a TX tasklet if needed */
2251                 if (txr->tx_avail <= EM_MAX_SCATTER)
2252                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2253         }
2254         
2255         adapter->pause_frames = 0;
2256         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2257 #ifndef DEVICE_POLLING
2258         /* Trigger an RX interrupt to guarantee mbuf refresh */
2259         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2260 #endif
2261         return;
2262 hung:
2263         /* Looks like we're hung */
2264         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2265         device_printf(adapter->dev,
2266             "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2267             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2268             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2269         device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2270             "Next TX to Clean = %d\n",
2271             txr->me, txr->tx_avail, txr->next_to_clean);
2272         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2273         adapter->watchdog_events++;
2274         adapter->pause_frames = 0;
2275         em_init_locked(adapter);
2276 }
2277
2278
2279 static void
2280 em_update_link_status(struct adapter *adapter)
2281 {
2282         struct e1000_hw *hw = &adapter->hw;
2283         struct ifnet *ifp = adapter->ifp;
2284         device_t dev = adapter->dev;
2285         struct tx_ring *txr = adapter->tx_rings;
2286         u32 link_check = 0;
2287
2288         /* Get the cached link value or read phy for real */
2289         switch (hw->phy.media_type) {
2290         case e1000_media_type_copper:
2291                 if (hw->mac.get_link_status) {
2292                         /* Do the work to read phy */
2293                         e1000_check_for_link(hw);
2294                         link_check = !hw->mac.get_link_status;
2295                         if (link_check) /* ESB2 fix */
2296                                 e1000_cfg_on_link_up(hw);
2297                 } else
2298                         link_check = TRUE;
2299                 break;
2300         case e1000_media_type_fiber:
2301                 e1000_check_for_link(hw);
2302                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2303                                  E1000_STATUS_LU);
2304                 break;
2305         case e1000_media_type_internal_serdes:
2306                 e1000_check_for_link(hw);
2307                 link_check = adapter->hw.mac.serdes_has_link;
2308                 break;
2309         default:
2310         case e1000_media_type_unknown:
2311                 break;
2312         }
2313
2314         /* Now check for a transition */
2315         if (link_check && (adapter->link_active == 0)) {
2316                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2317                     &adapter->link_duplex);
2318                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2319                 if ((adapter->link_speed != SPEED_1000) &&
2320                     ((hw->mac.type == e1000_82571) ||
2321                     (hw->mac.type == e1000_82572))) {
2322                         int tarc0;
2323                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2324                         tarc0 &= ~SPEED_MODE_BIT;
2325                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2326                 }
2327                 if (bootverbose)
2328                         device_printf(dev, "Link is up %d Mbps %s\n",
2329                             adapter->link_speed,
2330                             ((adapter->link_duplex == FULL_DUPLEX) ?
2331                             "Full Duplex" : "Half Duplex"));
2332                 adapter->link_active = 1;
2333                 adapter->smartspeed = 0;
2334                 ifp->if_baudrate = adapter->link_speed * 1000000;
2335                 if_link_state_change(ifp, LINK_STATE_UP);
2336         } else if (!link_check && (adapter->link_active == 1)) {
2337                 ifp->if_baudrate = adapter->link_speed = 0;
2338                 adapter->link_duplex = 0;
2339                 if (bootverbose)
2340                         device_printf(dev, "Link is Down\n");
2341                 adapter->link_active = 0;
2342                 /* Link down, disable watchdog */
2343                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2344                         txr->queue_status = EM_QUEUE_IDLE;
2345                 if_link_state_change(ifp, LINK_STATE_DOWN);
2346         }
2347 }
2348
2349 /*********************************************************************
2350  *
2351  *  This routine disables all traffic on the adapter by issuing a
2352  *  global reset on the MAC and deallocates TX/RX buffers.
2353  *
2354  *  This routine should always be called with BOTH the CORE
2355  *  and TX locks.
2356  **********************************************************************/
2357
2358 static void
2359 em_stop(void *arg)
2360 {
2361         struct adapter  *adapter = arg;
2362         struct ifnet    *ifp = adapter->ifp;
2363         struct tx_ring  *txr = adapter->tx_rings;
2364
2365         EM_CORE_LOCK_ASSERT(adapter);
2366
2367         INIT_DEBUGOUT("em_stop: begin");
2368
2369         em_disable_intr(adapter);
2370         callout_stop(&adapter->timer);
2371
2372         /* Tell the stack that the interface is no longer active */
2373         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2374         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2375
2376         /* Unarm watchdog timer. */
2377         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2378                 EM_TX_LOCK(txr);
2379                 txr->queue_status = EM_QUEUE_IDLE;
2380                 EM_TX_UNLOCK(txr);
2381         }
2382
2383         e1000_reset_hw(&adapter->hw);
2384         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2385
2386         e1000_led_off(&adapter->hw);
2387         e1000_cleanup_led(&adapter->hw);
2388 }
2389
2390
2391 /*********************************************************************
2392  *
2393  *  Determine hardware revision.
2394  *
2395  **********************************************************************/
2396 static void
2397 em_identify_hardware(struct adapter *adapter)
2398 {
2399         device_t dev = adapter->dev;
2400
2401         /* Make sure our PCI config space has the necessary stuff set */
2402         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2403         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2404             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2405                 device_printf(dev, "Memory Access and/or Bus Master bits "
2406                     "were not set!\n");
2407                 adapter->hw.bus.pci_cmd_word |=
2408                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2409                 pci_write_config(dev, PCIR_COMMAND,
2410                     adapter->hw.bus.pci_cmd_word, 2);
2411         }
2412
2413         /* Save off the information about this board */
2414         adapter->hw.vendor_id = pci_get_vendor(dev);
2415         adapter->hw.device_id = pci_get_device(dev);
2416         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2417         adapter->hw.subsystem_vendor_id =
2418             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2419         adapter->hw.subsystem_device_id =
2420             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2421
2422         /* Do Shared Code Init and Setup */
2423         if (e1000_set_mac_type(&adapter->hw)) {
2424                 device_printf(dev, "Setup init failure\n");
2425                 return;
2426         }
2427 }
2428
2429 static int
2430 em_allocate_pci_resources(struct adapter *adapter)
2431 {
2432         device_t        dev = adapter->dev;
2433         int             rid;
2434
2435         rid = PCIR_BAR(0);
2436         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2437             &rid, RF_ACTIVE);
2438         if (adapter->memory == NULL) {
2439                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2440                 return (ENXIO);
2441         }
2442         adapter->osdep.mem_bus_space_tag =
2443             rman_get_bustag(adapter->memory);
2444         adapter->osdep.mem_bus_space_handle =
2445             rman_get_bushandle(adapter->memory);
2446         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2447
2448         /* Default to a single queue */
2449         adapter->num_queues = 1;
2450
2451         /*
2452          * Setup MSI/X or MSI if PCI Express
2453          */
2454         adapter->msix = em_setup_msix(adapter);
2455
2456         adapter->hw.back = &adapter->osdep;
2457
2458         return (0);
2459 }
2460
2461 /*********************************************************************
2462  *
2463  *  Setup the Legacy or MSI Interrupt handler
2464  *
2465  **********************************************************************/
2466 int
2467 em_allocate_legacy(struct adapter *adapter)
2468 {
2469         device_t dev = adapter->dev;
2470         struct tx_ring  *txr = adapter->tx_rings;
2471         int error, rid = 0;
2472
2473         /* Manually turn off all interrupts */
2474         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2475
2476         if (adapter->msix == 1) /* using MSI */
2477                 rid = 1;
2478         /* We allocate a single interrupt resource */
2479         adapter->res = bus_alloc_resource_any(dev,
2480             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2481         if (adapter->res == NULL) {
2482                 device_printf(dev, "Unable to allocate bus resource: "
2483                     "interrupt\n");
2484                 return (ENXIO);
2485         }
2486
2487         /*
2488          * Allocate a fast interrupt and the associated
2489          * deferred processing contexts.
2490          */
2491         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2492         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2493             taskqueue_thread_enqueue, &adapter->tq);
2494         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2495             device_get_nameunit(adapter->dev));
2496         /* Use a TX only tasklet for local timer */
2497         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2498         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2499             taskqueue_thread_enqueue, &txr->tq);
2500         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2501             device_get_nameunit(adapter->dev));
2502         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2503         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2504             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2505                 device_printf(dev, "Failed to register fast interrupt "
2506                             "handler: %d\n", error);
2507                 taskqueue_free(adapter->tq);
2508                 adapter->tq = NULL;
2509                 return (error);
2510         }
2511         
2512         return (0);
2513 }
2514
2515 /*********************************************************************
2516  *
2517  *  Setup the MSIX Interrupt handlers
2518  *   This is not really Multiqueue, rather
2519  *   its just seperate interrupt vectors
2520  *   for TX, RX, and Link.
2521  *
2522  **********************************************************************/
2523 int
2524 em_allocate_msix(struct adapter *adapter)
2525 {
2526         device_t        dev = adapter->dev;
2527         struct          tx_ring *txr = adapter->tx_rings;
2528         struct          rx_ring *rxr = adapter->rx_rings;
2529         int             error, rid, vector = 0;
2530
2531
2532         /* Make sure all interrupts are disabled */
2533         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2534
2535         /* First set up ring resources */
2536         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2537
2538                 /* RX ring */
2539                 rid = vector + 1;
2540
2541                 rxr->res = bus_alloc_resource_any(dev,
2542                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2543                 if (rxr->res == NULL) {
2544                         device_printf(dev,
2545                             "Unable to allocate bus resource: "
2546                             "RX MSIX Interrupt %d\n", i);
2547                         return (ENXIO);
2548                 }
2549                 if ((error = bus_setup_intr(dev, rxr->res,
2550                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2551                     rxr, &rxr->tag)) != 0) {
2552                         device_printf(dev, "Failed to register RX handler");
2553                         return (error);
2554                 }
2555 #if __FreeBSD_version >= 800504
2556                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2557 #endif
2558                 rxr->msix = vector++; /* NOTE increment vector for TX */
2559                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2560                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2561                     taskqueue_thread_enqueue, &rxr->tq);
2562                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2563                     device_get_nameunit(adapter->dev));
2564                 /*
2565                 ** Set the bit to enable interrupt
2566                 ** in E1000_IMS -- bits 20 and 21
2567                 ** are for RX0 and RX1, note this has
2568                 ** NOTHING to do with the MSIX vector
2569                 */
2570                 rxr->ims = 1 << (20 + i);
2571                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2572
2573                 /* TX ring */
2574                 rid = vector + 1;
2575                 txr->res = bus_alloc_resource_any(dev,
2576                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2577                 if (txr->res == NULL) {
2578                         device_printf(dev,
2579                             "Unable to allocate bus resource: "
2580                             "TX MSIX Interrupt %d\n", i);
2581                         return (ENXIO);
2582                 }
2583                 if ((error = bus_setup_intr(dev, txr->res,
2584                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2585                     txr, &txr->tag)) != 0) {
2586                         device_printf(dev, "Failed to register TX handler");
2587                         return (error);
2588                 }
2589 #if __FreeBSD_version >= 800504
2590                 bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2591 #endif
2592                 txr->msix = vector++; /* Increment vector for next pass */
2593                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2594                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2595                     taskqueue_thread_enqueue, &txr->tq);
2596                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2597                     device_get_nameunit(adapter->dev));
2598                 /*
2599                 ** Set the bit to enable interrupt
2600                 ** in E1000_IMS -- bits 22 and 23
2601                 ** are for TX0 and TX1, note this has
2602                 ** NOTHING to do with the MSIX vector
2603                 */
2604                 txr->ims = 1 << (22 + i);
2605                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2606         }
2607
2608         /* Link interrupt */
2609         ++rid;
2610         adapter->res = bus_alloc_resource_any(dev,
2611             SYS_RES_IRQ, &rid, RF_ACTIVE);
2612         if (!adapter->res) {
2613                 device_printf(dev,"Unable to allocate "
2614                     "bus resource: Link interrupt [%d]\n", rid);
2615                 return (ENXIO);
2616         }
2617         /* Set the link handler function */
2618         error = bus_setup_intr(dev, adapter->res,
2619             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2620             em_msix_link, adapter, &adapter->tag);
2621         if (error) {
2622                 adapter->res = NULL;
2623                 device_printf(dev, "Failed to register LINK handler");
2624                 return (error);
2625         }
2626 #if __FreeBSD_version >= 800504
2627                 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2628 #endif
2629         adapter->linkvec = vector;
2630         adapter->ivars |=  (8 | vector) << 16;
2631         adapter->ivars |= 0x80000000;
2632
2633         return (0);
2634 }
2635
2636
2637 static void
2638 em_free_pci_resources(struct adapter *adapter)
2639 {
2640         device_t        dev = adapter->dev;
2641         struct tx_ring  *txr;
2642         struct rx_ring  *rxr;
2643         int             rid;
2644
2645
2646         /*
2647         ** Release all the queue interrupt resources:
2648         */
2649         for (int i = 0; i < adapter->num_queues; i++) {
2650                 txr = &adapter->tx_rings[i];
2651                 rxr = &adapter->rx_rings[i];
2652                 /* an early abort? */
2653                 if ((txr == NULL) || (rxr == NULL))
2654                         break;
2655                 rid = txr->msix +1;
2656                 if (txr->tag != NULL) {
2657                         bus_teardown_intr(dev, txr->res, txr->tag);
2658                         txr->tag = NULL;
2659                 }
2660                 if (txr->res != NULL)
2661                         bus_release_resource(dev, SYS_RES_IRQ,
2662                             rid, txr->res);
2663                 rid = rxr->msix +1;
2664                 if (rxr->tag != NULL) {
2665                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2666                         rxr->tag = NULL;
2667                 }
2668                 if (rxr->res != NULL)
2669                         bus_release_resource(dev, SYS_RES_IRQ,
2670                             rid, rxr->res);
2671         }
2672
2673         if (adapter->linkvec) /* we are doing MSIX */
2674                 rid = adapter->linkvec + 1;
2675         else
2676                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2677
2678         if (adapter->tag != NULL) {
2679                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2680                 adapter->tag = NULL;
2681         }
2682
2683         if (adapter->res != NULL)
2684                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2685
2686
2687         if (adapter->msix)
2688                 pci_release_msi(dev);
2689
2690         if (adapter->msix_mem != NULL)
2691                 bus_release_resource(dev, SYS_RES_MEMORY,
2692                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2693
2694         if (adapter->memory != NULL)
2695                 bus_release_resource(dev, SYS_RES_MEMORY,
2696                     PCIR_BAR(0), adapter->memory);
2697
2698         if (adapter->flash != NULL)
2699                 bus_release_resource(dev, SYS_RES_MEMORY,
2700                     EM_FLASH, adapter->flash);
2701 }
2702
2703 /*
2704  * Setup MSI or MSI/X
2705  */
2706 static int
2707 em_setup_msix(struct adapter *adapter)
2708 {
2709         device_t dev = adapter->dev;
2710         int val = 0;
2711
2712         /*
2713         ** Setup MSI/X for Hartwell: tests have shown
2714         ** use of two queues to be unstable, and to
2715         ** provide no great gain anyway, so we simply
2716         ** seperate the interrupts and use a single queue.
2717         */
2718         if ((adapter->hw.mac.type == e1000_82574) &&
2719             (em_enable_msix == TRUE)) {
2720                 /* Map the MSIX BAR */
2721                 int rid = PCIR_BAR(EM_MSIX_BAR);
2722                 adapter->msix_mem = bus_alloc_resource_any(dev,
2723                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2724                 if (!adapter->msix_mem) {
2725                         /* May not be enabled */
2726                         device_printf(adapter->dev,
2727                             "Unable to map MSIX table \n");
2728                         goto msi;
2729                 }
2730                 val = pci_msix_count(dev); 
2731                 /* We only need 3 vectors */
2732                 if (val > 3)
2733                         val = 3;
2734                 if ((val != 3) && (val != 5)) {
2735                         bus_release_resource(dev, SYS_RES_MEMORY,
2736                             PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2737                         adapter->msix_mem = NULL;
2738                         device_printf(adapter->dev,
2739                             "MSIX: incorrect vectors, using MSI\n");
2740                         goto msi;
2741                 }
2742
2743                 if (pci_alloc_msix(dev, &val) == 0) {
2744                         device_printf(adapter->dev,
2745                             "Using MSIX interrupts "
2746                             "with %d vectors\n", val);
2747                 }
2748
2749                 return (val);
2750         }
2751 msi:
2752         val = pci_msi_count(dev);
2753         if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2754                 adapter->msix = 1;
2755                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2756                 return (val);
2757         } 
2758         /* Should only happen due to manual configuration */
2759         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2760         return (0);
2761 }
2762
2763
2764 /*********************************************************************
2765  *
2766  *  Initialize the hardware to a configuration
2767  *  as specified by the adapter structure.
2768  *
2769  **********************************************************************/
2770 static void
2771 em_reset(struct adapter *adapter)
2772 {
2773         device_t        dev = adapter->dev;
2774         struct ifnet    *ifp = adapter->ifp;
2775         struct e1000_hw *hw = &adapter->hw;
2776         u16             rx_buffer_size;
2777         u32             pba;
2778
2779         INIT_DEBUGOUT("em_reset: begin");
2780
2781         /* Set up smart power down as default off on newer adapters. */
2782         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2783             hw->mac.type == e1000_82572)) {
2784                 u16 phy_tmp = 0;
2785
2786                 /* Speed up time to link by disabling smart power down. */
2787                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2788                 phy_tmp &= ~IGP02E1000_PM_SPD;
2789                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2790         }
2791
2792         /*
2793          * Packet Buffer Allocation (PBA)
2794          * Writing PBA sets the receive portion of the buffer
2795          * the remainder is used for the transmit buffer.
2796          */
2797         switch (hw->mac.type) {
2798         /* Total Packet Buffer on these is 48K */
2799         case e1000_82571:
2800         case e1000_82572:
2801         case e1000_80003es2lan:
2802                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2803                 break;
2804         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2805                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2806                 break;
2807         case e1000_82574:
2808         case e1000_82583:
2809                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2810                 break;
2811         case e1000_ich8lan:
2812                 pba = E1000_PBA_8K;
2813                 break;
2814         case e1000_ich9lan:
2815         case e1000_ich10lan:
2816                 /* Boost Receive side for jumbo frames */
2817                 if (adapter->max_frame_size > 4096)
2818                         pba = E1000_PBA_14K;
2819                 else
2820                         pba = E1000_PBA_10K;
2821                 break;
2822         case e1000_pchlan:
2823         case e1000_pch2lan:
2824                 pba = E1000_PBA_26K;
2825                 break;
2826         default:
2827                 if (adapter->max_frame_size > 8192)
2828                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2829                 else
2830                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2831         }
2832         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2833
2834         /*
2835          * These parameters control the automatic generation (Tx) and
2836          * response (Rx) to Ethernet PAUSE frames.
2837          * - High water mark should allow for at least two frames to be
2838          *   received after sending an XOFF.
2839          * - Low water mark works best when it is very near the high water mark.
2840          *   This allows the receiver to restart by sending XON when it has
2841          *   drained a bit. Here we use an arbitary value of 1500 which will
2842          *   restart after one full frame is pulled from the buffer. There
2843          *   could be several smaller frames in the buffer and if so they will
2844          *   not trigger the XON until their total number reduces the buffer
2845          *   by 1500.
2846          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2847          */
2848         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2849         hw->fc.high_water = rx_buffer_size -
2850             roundup2(adapter->max_frame_size, 1024);
2851         hw->fc.low_water = hw->fc.high_water - 1500;
2852
2853         if (adapter->fc) /* locally set flow control value? */
2854                 hw->fc.requested_mode = adapter->fc;
2855         else
2856                 hw->fc.requested_mode = e1000_fc_full;
2857
2858         if (hw->mac.type == e1000_80003es2lan)
2859                 hw->fc.pause_time = 0xFFFF;
2860         else
2861                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2862
2863         hw->fc.send_xon = TRUE;
2864
2865         /* Device specific overrides/settings */
2866         switch (hw->mac.type) {
2867         case e1000_pchlan:
2868                 /* Workaround: no TX flow ctrl for PCH */
2869                 hw->fc.requested_mode = e1000_fc_rx_pause;
2870                 hw->fc.pause_time = 0xFFFF; /* override */
2871                 if (ifp->if_mtu > ETHERMTU) {
2872                         hw->fc.high_water = 0x3500;
2873                         hw->fc.low_water = 0x1500;
2874                 } else {
2875                         hw->fc.high_water = 0x5000;
2876                         hw->fc.low_water = 0x3000;
2877                 }
2878                 hw->fc.refresh_time = 0x1000;
2879                 break;
2880         case e1000_pch2lan:
2881                 hw->fc.high_water = 0x5C20;
2882                 hw->fc.low_water = 0x5048;
2883                 hw->fc.pause_time = 0x0650;
2884                 hw->fc.refresh_time = 0x0400;
2885                 /* Jumbos need adjusted PBA */
2886                 if (ifp->if_mtu > ETHERMTU)
2887                         E1000_WRITE_REG(hw, E1000_PBA, 12);
2888                 else
2889                         E1000_WRITE_REG(hw, E1000_PBA, 26);
2890                 break;
2891         case e1000_ich9lan:
2892         case e1000_ich10lan:
2893                 if (ifp->if_mtu > ETHERMTU) {
2894                         hw->fc.high_water = 0x2800;
2895                         hw->fc.low_water = hw->fc.high_water - 8;
2896                         break;
2897                 } 
2898                 /* else fall thru */
2899         default:
2900                 if (hw->mac.type == e1000_80003es2lan)
2901                         hw->fc.pause_time = 0xFFFF;
2902                 break;
2903         }
2904
2905         /* Issue a global reset */
2906         e1000_reset_hw(hw);
2907         E1000_WRITE_REG(hw, E1000_WUC, 0);
2908         em_disable_aspm(adapter);
2909         /* and a re-init */
2910         if (e1000_init_hw(hw) < 0) {
2911                 device_printf(dev, "Hardware Initialization Failed\n");
2912                 return;
2913         }
2914
2915         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2916         e1000_get_phy_info(hw);
2917         e1000_check_for_link(hw);
2918         return;
2919 }
2920
2921 /*********************************************************************
2922  *
2923  *  Setup networking device structure and register an interface.
2924  *
2925  **********************************************************************/
2926 static int
2927 em_setup_interface(device_t dev, struct adapter *adapter)
2928 {
2929         struct ifnet   *ifp;
2930
2931         INIT_DEBUGOUT("em_setup_interface: begin");
2932
2933         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2934         if (ifp == NULL) {
2935                 device_printf(dev, "can not allocate ifnet structure\n");
2936                 return (-1);
2937         }
2938         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2939         ifp->if_init =  em_init;
2940         ifp->if_softc = adapter;
2941         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2942         ifp->if_ioctl = em_ioctl;
2943 #ifdef EM_MULTIQUEUE
2944         /* Multiqueue stack interface */
2945         ifp->if_transmit = em_mq_start;
2946         ifp->if_qflush = em_qflush;
2947 #else
2948         ifp->if_start = em_start;
2949         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2950         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2951         IFQ_SET_READY(&ifp->if_snd);
2952 #endif  
2953
2954         ether_ifattach(ifp, adapter->hw.mac.addr);
2955
2956         ifp->if_capabilities = ifp->if_capenable = 0;
2957
2958
2959         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2960         ifp->if_capabilities |= IFCAP_TSO4;
2961         /*
2962          * Tell the upper layer(s) we
2963          * support full VLAN capability
2964          */
2965         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2966         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2967                              |  IFCAP_VLAN_HWTSO
2968                              |  IFCAP_VLAN_MTU;
2969         ifp->if_capenable = ifp->if_capabilities;
2970
2971         /*
2972         ** Don't turn this on by default, if vlans are
2973         ** created on another pseudo device (eg. lagg)
2974         ** then vlan events are not passed thru, breaking
2975         ** operation, but with HW FILTER off it works. If
2976         ** using vlans directly on the em driver you can
2977         ** enable this and get full hardware tag filtering.
2978         */
2979         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2980
2981 #ifdef DEVICE_POLLING
2982         ifp->if_capabilities |= IFCAP_POLLING;
2983 #endif
2984
2985         /* Enable only WOL MAGIC by default */
2986         if (adapter->wol) {
2987                 ifp->if_capabilities |= IFCAP_WOL;
2988                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
2989         }
2990                 
2991         /*
2992          * Specify the media types supported by this adapter and register
2993          * callbacks to update media and link information
2994          */
2995         ifmedia_init(&adapter->media, IFM_IMASK,
2996             em_media_change, em_media_status);
2997         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2998             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2999                 u_char fiber_type = IFM_1000_SX;        /* default type */
3000
3001                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
3002                             0, NULL);
3003                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3004         } else {
3005                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3006                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3007                             0, NULL);
3008                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3009                             0, NULL);
3010                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3011                             0, NULL);
3012                 if (adapter->hw.phy.type != e1000_phy_ife) {
3013                         ifmedia_add(&adapter->media,
3014                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3015                         ifmedia_add(&adapter->media,
3016                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3017                 }
3018         }
3019         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3020         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3021         return (0);
3022 }
3023
3024
3025 /*
3026  * Manage DMA'able memory.
3027  */
3028 static void
3029 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3030 {
3031         if (error)
3032                 return;
3033         *(bus_addr_t *) arg = segs[0].ds_addr;
3034 }
3035
3036 static int
3037 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3038         struct em_dma_alloc *dma, int mapflags)
3039 {
3040         int error;
3041
3042         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3043                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3044                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3045                                 BUS_SPACE_MAXADDR,      /* highaddr */
3046                                 NULL, NULL,             /* filter, filterarg */
3047                                 size,                   /* maxsize */
3048                                 1,                      /* nsegments */
3049                                 size,                   /* maxsegsize */
3050                                 0,                      /* flags */
3051                                 NULL,                   /* lockfunc */
3052                                 NULL,                   /* lockarg */
3053                                 &dma->dma_tag);
3054         if (error) {
3055                 device_printf(adapter->dev,
3056                     "%s: bus_dma_tag_create failed: %d\n",
3057                     __func__, error);
3058                 goto fail_0;
3059         }
3060
3061         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3062             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3063         if (error) {
3064                 device_printf(adapter->dev,
3065                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3066                     __func__, (uintmax_t)size, error);
3067                 goto fail_2;
3068         }
3069
3070         dma->dma_paddr = 0;
3071         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3072             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3073         if (error || dma->dma_paddr == 0) {
3074                 device_printf(adapter->dev,
3075                     "%s: bus_dmamap_load failed: %d\n",
3076                     __func__, error);
3077                 goto fail_3;
3078         }
3079
3080         return (0);
3081
3082 fail_3:
3083         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3084 fail_2:
3085         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3086         bus_dma_tag_destroy(dma->dma_tag);
3087 fail_0:
3088         dma->dma_map = NULL;
3089         dma->dma_tag = NULL;
3090
3091         return (error);
3092 }
3093
3094 static void
3095 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3096 {
3097         if (dma->dma_tag == NULL)
3098                 return;
3099         if (dma->dma_map != NULL) {
3100                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3101                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3102                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3103                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3104                 dma->dma_map = NULL;
3105         }
3106         bus_dma_tag_destroy(dma->dma_tag);
3107         dma->dma_tag = NULL;
3108 }
3109
3110
3111 /*********************************************************************
3112  *
3113  *  Allocate memory for the transmit and receive rings, and then
3114  *  the descriptors associated with each, called only once at attach.
3115  *
3116  **********************************************************************/
3117 static int
3118 em_allocate_queues(struct adapter *adapter)
3119 {
3120         device_t                dev = adapter->dev;
3121         struct tx_ring          *txr = NULL;
3122         struct rx_ring          *rxr = NULL;
3123         int rsize, tsize, error = E1000_SUCCESS;
3124         int txconf = 0, rxconf = 0;
3125
3126
3127         /* Allocate the TX ring struct memory */
3128         if (!(adapter->tx_rings =
3129             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3130             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3131                 device_printf(dev, "Unable to allocate TX ring memory\n");
3132                 error = ENOMEM;
3133                 goto fail;
3134         }
3135
3136         /* Now allocate the RX */
3137         if (!(adapter->rx_rings =
3138             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3139             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3140                 device_printf(dev, "Unable to allocate RX ring memory\n");
3141                 error = ENOMEM;
3142                 goto rx_fail;
3143         }
3144
3145         tsize = roundup2(adapter->num_tx_desc *
3146             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3147         /*
3148          * Now set up the TX queues, txconf is needed to handle the
3149          * possibility that things fail midcourse and we need to
3150          * undo memory gracefully
3151          */ 
3152         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3153                 /* Set up some basics */
3154                 txr = &adapter->tx_rings[i];
3155                 txr->adapter = adapter;
3156                 txr->me = i;
3157
3158                 /* Initialize the TX lock */
3159                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3160                     device_get_nameunit(dev), txr->me);
3161                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3162
3163                 if (em_dma_malloc(adapter, tsize,
3164                         &txr->txdma, BUS_DMA_NOWAIT)) {
3165                         device_printf(dev,
3166                             "Unable to allocate TX Descriptor memory\n");
3167                         error = ENOMEM;
3168                         goto err_tx_desc;
3169                 }
3170                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3171                 bzero((void *)txr->tx_base, tsize);
3172
3173                 if (em_allocate_transmit_buffers(txr)) {
3174                         device_printf(dev,
3175                             "Critical Failure setting up transmit buffers\n");
3176                         error = ENOMEM;
3177                         goto err_tx_desc;
3178                 }
3179 #if __FreeBSD_version >= 800000
3180                 /* Allocate a buf ring */
3181                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3182                     M_WAITOK, &txr->tx_mtx);
3183 #endif
3184         }
3185
3186         /*
3187          * Next the RX queues...
3188          */ 
3189         rsize = roundup2(adapter->num_rx_desc *
3190             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3191         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3192                 rxr = &adapter->rx_rings[i];
3193                 rxr->adapter = adapter;
3194                 rxr->me = i;
3195
3196                 /* Initialize the RX lock */
3197                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3198                     device_get_nameunit(dev), txr->me);
3199                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3200
3201                 if (em_dma_malloc(adapter, rsize,
3202                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3203                         device_printf(dev,
3204                             "Unable to allocate RxDescriptor memory\n");
3205                         error = ENOMEM;
3206                         goto err_rx_desc;
3207                 }
3208                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3209                 bzero((void *)rxr->rx_base, rsize);
3210
3211                 /* Allocate receive buffers for the ring*/
3212                 if (em_allocate_receive_buffers(rxr)) {
3213                         device_printf(dev,
3214                             "Critical Failure setting up receive buffers\n");
3215                         error = ENOMEM;
3216                         goto err_rx_desc;
3217                 }
3218         }
3219
3220         return (0);
3221
3222 err_rx_desc:
3223         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3224                 em_dma_free(adapter, &rxr->rxdma);
3225 err_tx_desc:
3226         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3227                 em_dma_free(adapter, &txr->txdma);
3228         free(adapter->rx_rings, M_DEVBUF);
3229 rx_fail:
3230 #if __FreeBSD_version >= 800000
3231         buf_ring_free(txr->br, M_DEVBUF);
3232 #endif
3233         free(adapter->tx_rings, M_DEVBUF);
3234 fail:
3235         return (error);
3236 }
3237
3238
3239 /*********************************************************************
3240  *
3241  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3242  *  the information needed to transmit a packet on the wire. This is
3243  *  called only once at attach, setup is done every reset.
3244  *
3245  **********************************************************************/
3246 static int
3247 em_allocate_transmit_buffers(struct tx_ring *txr)
3248 {
3249         struct adapter *adapter = txr->adapter;
3250         device_t dev = adapter->dev;
3251         struct em_buffer *txbuf;
3252         int error, i;
3253
3254         /*
3255          * Setup DMA descriptor areas.
3256          */
3257         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3258                                1, 0,                    /* alignment, bounds */
3259                                BUS_SPACE_MAXADDR,       /* lowaddr */
3260                                BUS_SPACE_MAXADDR,       /* highaddr */
3261                                NULL, NULL,              /* filter, filterarg */
3262                                EM_TSO_SIZE,             /* maxsize */
3263                                EM_MAX_SCATTER,          /* nsegments */
3264                                PAGE_SIZE,               /* maxsegsize */
3265                                0,                       /* flags */
3266                                NULL,                    /* lockfunc */
3267                                NULL,                    /* lockfuncarg */
3268                                &txr->txtag))) {
3269                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3270                 goto fail;
3271         }
3272
3273         if (!(txr->tx_buffers =
3274             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3275             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3276                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3277                 error = ENOMEM;
3278                 goto fail;
3279         }
3280
3281         /* Create the descriptor buffer dma maps */
3282         txbuf = txr->tx_buffers;
3283         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3284                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3285                 if (error != 0) {
3286                         device_printf(dev, "Unable to create TX DMA map\n");
3287                         goto fail;
3288                 }
3289         }
3290
3291         return 0;
3292 fail:
3293         /* We free all, it handles case where we are in the middle */
3294         em_free_transmit_structures(adapter);
3295         return (error);
3296 }
3297
3298 /*********************************************************************
3299  *
3300  *  Initialize a transmit ring.
3301  *
3302  **********************************************************************/
3303 static void
3304 em_setup_transmit_ring(struct tx_ring *txr)
3305 {
3306         struct adapter *adapter = txr->adapter;
3307         struct em_buffer *txbuf;
3308         int i;
3309 #ifdef DEV_NETMAP
3310         struct netmap_adapter *na = NA(adapter->ifp);
3311         struct netmap_slot *slot;
3312 #endif /* DEV_NETMAP */
3313
3314         /* Clear the old descriptor contents */
3315         EM_TX_LOCK(txr);
3316 #ifdef DEV_NETMAP
3317         slot = netmap_reset(na, NR_TX, txr->me, 0);
3318 #endif /* DEV_NETMAP */
3319
3320         bzero((void *)txr->tx_base,
3321               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3322         /* Reset indices */
3323         txr->next_avail_desc = 0;
3324         txr->next_to_clean = 0;
3325
3326         /* Free any existing tx buffers. */
3327         txbuf = txr->tx_buffers;
3328         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3329                 if (txbuf->m_head != NULL) {
3330                         bus_dmamap_sync(txr->txtag, txbuf->map,
3331                             BUS_DMASYNC_POSTWRITE);
3332                         bus_dmamap_unload(txr->txtag, txbuf->map);
3333                         m_freem(txbuf->m_head);
3334                         txbuf->m_head = NULL;
3335                 }
3336 #ifdef DEV_NETMAP
3337                 if (slot) {
3338                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3339                         uint64_t paddr;
3340                         void *addr;
3341
3342                         addr = PNMB(slot + si, &paddr);
3343                         txr->tx_base[i].buffer_addr = htole64(paddr);
3344                         /* reload the map for netmap mode */
3345                         netmap_load_map(txr->txtag, txbuf->map, addr);
3346                 }
3347 #endif /* DEV_NETMAP */
3348
3349                 /* clear the watch index */
3350                 txbuf->next_eop = -1;
3351         }
3352
3353         /* Set number of descriptors available */
3354         txr->tx_avail = adapter->num_tx_desc;
3355         txr->queue_status = EM_QUEUE_IDLE;
3356
3357         /* Clear checksum offload context. */
3358         txr->last_hw_offload = 0;
3359         txr->last_hw_ipcss = 0;
3360         txr->last_hw_ipcso = 0;
3361         txr->last_hw_tucss = 0;
3362         txr->last_hw_tucso = 0;
3363
3364         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3365             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3366         EM_TX_UNLOCK(txr);
3367 }
3368
3369 /*********************************************************************
3370  *
3371  *  Initialize all transmit rings.
3372  *
3373  **********************************************************************/
3374 static void
3375 em_setup_transmit_structures(struct adapter *adapter)
3376 {
3377         struct tx_ring *txr = adapter->tx_rings;
3378
3379         for (int i = 0; i < adapter->num_queues; i++, txr++)
3380                 em_setup_transmit_ring(txr);
3381
3382         return;
3383 }
3384
3385 /*********************************************************************
3386  *
3387  *  Enable transmit unit.
3388  *
3389  **********************************************************************/
3390 static void
3391 em_initialize_transmit_unit(struct adapter *adapter)
3392 {
3393         struct tx_ring  *txr = adapter->tx_rings;
3394         struct e1000_hw *hw = &adapter->hw;
3395         u32     tctl, tarc, tipg = 0;
3396
3397          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3398
3399         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3400                 u64 bus_addr = txr->txdma.dma_paddr;
3401                 /* Base and Len of TX Ring */
3402                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3403                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3404                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3405                     (u32)(bus_addr >> 32));
3406                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3407                     (u32)bus_addr);
3408                 /* Init the HEAD/TAIL indices */
3409                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3410                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3411
3412                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3413                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3414                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3415
3416                 txr->queue_status = EM_QUEUE_IDLE;
3417         }
3418
3419         /* Set the default values for the Tx Inter Packet Gap timer */
3420         switch (adapter->hw.mac.type) {
3421         case e1000_80003es2lan:
3422                 tipg = DEFAULT_82543_TIPG_IPGR1;
3423                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3424                     E1000_TIPG_IPGR2_SHIFT;
3425                 break;
3426         default:
3427                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3428                     (adapter->hw.phy.media_type ==
3429                     e1000_media_type_internal_serdes))
3430                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3431                 else
3432                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3433                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3434                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3435         }
3436
3437         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3438         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3439
3440         if(adapter->hw.mac.type >= e1000_82540)
3441                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3442                     adapter->tx_abs_int_delay.value);
3443
3444         if ((adapter->hw.mac.type == e1000_82571) ||
3445             (adapter->hw.mac.type == e1000_82572)) {
3446                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3447                 tarc |= SPEED_MODE_BIT;
3448                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3449         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3450                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3451                 tarc |= 1;
3452                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3453                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3454                 tarc |= 1;
3455                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3456         }
3457
3458         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3459         if (adapter->tx_int_delay.value > 0)
3460                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3461
3462         /* Program the Transmit Control Register */
3463         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3464         tctl &= ~E1000_TCTL_CT;
3465         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3466                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3467
3468         if (adapter->hw.mac.type >= e1000_82571)
3469                 tctl |= E1000_TCTL_MULR;
3470
3471         /* This write will effectively turn on the transmit unit. */
3472         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3473
3474 }
3475
3476
3477 /*********************************************************************
3478  *
3479  *  Free all transmit rings.
3480  *
3481  **********************************************************************/
3482 static void
3483 em_free_transmit_structures(struct adapter *adapter)
3484 {
3485         struct tx_ring *txr = adapter->tx_rings;
3486
3487         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3488                 EM_TX_LOCK(txr);
3489                 em_free_transmit_buffers(txr);
3490                 em_dma_free(adapter, &txr->txdma);
3491                 EM_TX_UNLOCK(txr);
3492                 EM_TX_LOCK_DESTROY(txr);
3493         }
3494
3495         free(adapter->tx_rings, M_DEVBUF);
3496 }
3497
3498 /*********************************************************************
3499  *
3500  *  Free transmit ring related data structures.
3501  *
3502  **********************************************************************/
3503 static void
3504 em_free_transmit_buffers(struct tx_ring *txr)
3505 {
3506         struct adapter          *adapter = txr->adapter;
3507         struct em_buffer        *txbuf;
3508
3509         INIT_DEBUGOUT("free_transmit_ring: begin");
3510
3511         if (txr->tx_buffers == NULL)
3512                 return;
3513
3514         for (int i = 0; i < adapter->num_tx_desc; i++) {
3515                 txbuf = &txr->tx_buffers[i];
3516                 if (txbuf->m_head != NULL) {
3517                         bus_dmamap_sync(txr->txtag, txbuf->map,
3518                             BUS_DMASYNC_POSTWRITE);
3519                         bus_dmamap_unload(txr->txtag,
3520                             txbuf->map);
3521                         m_freem(txbuf->m_head);
3522                         txbuf->m_head = NULL;
3523                         if (txbuf->map != NULL) {
3524                                 bus_dmamap_destroy(txr->txtag,
3525                                     txbuf->map);
3526                                 txbuf->map = NULL;
3527                         }
3528                 } else if (txbuf->map != NULL) {
3529                         bus_dmamap_unload(txr->txtag,
3530                             txbuf->map);
3531                         bus_dmamap_destroy(txr->txtag,
3532                             txbuf->map);
3533                         txbuf->map = NULL;
3534                 }
3535         }
3536 #if __FreeBSD_version >= 800000
3537         if (txr->br != NULL)
3538                 buf_ring_free(txr->br, M_DEVBUF);
3539 #endif
3540         if (txr->tx_buffers != NULL) {
3541                 free(txr->tx_buffers, M_DEVBUF);
3542                 txr->tx_buffers = NULL;
3543         }
3544         if (txr->txtag != NULL) {
3545                 bus_dma_tag_destroy(txr->txtag);
3546                 txr->txtag = NULL;
3547         }
3548         return;
3549 }
3550
3551
3552 /*********************************************************************
3553  *  The offload context is protocol specific (TCP/UDP) and thus
3554  *  only needs to be set when the protocol changes. The occasion
3555  *  of a context change can be a performance detriment, and
3556  *  might be better just disabled. The reason arises in the way
3557  *  in which the controller supports pipelined requests from the
3558  *  Tx data DMA. Up to four requests can be pipelined, and they may
3559  *  belong to the same packet or to multiple packets. However all
3560  *  requests for one packet are issued before a request is issued
3561  *  for a subsequent packet and if a request for the next packet
3562  *  requires a context change, that request will be stalled
3563  *  until the previous request completes. This means setting up
3564  *  a new context effectively disables pipelined Tx data DMA which
3565  *  in turn greatly slow down performance to send small sized
3566  *  frames. 
3567  **********************************************************************/
3568 static void
3569 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3570     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3571 {
3572         struct adapter                  *adapter = txr->adapter;
3573         struct e1000_context_desc       *TXD = NULL;
3574         struct em_buffer                *tx_buffer;
3575         int                             cur, hdr_len;
3576         u32                             cmd = 0;
3577         u16                             offload = 0;
3578         u8                              ipcso, ipcss, tucso, tucss;
3579
3580         ipcss = ipcso = tucss = tucso = 0;
3581         hdr_len = ip_off + (ip->ip_hl << 2);
3582         cur = txr->next_avail_desc;
3583
3584         /* Setup of IP header checksum. */
3585         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3586                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3587                 offload |= CSUM_IP;
3588                 ipcss = ip_off;
3589                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3590                 /*
3591                  * Start offset for header checksum calculation.
3592                  * End offset for header checksum calculation.
3593                  * Offset of place to put the checksum.
3594                  */
3595                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3596                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3597                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3598                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3599                 cmd |= E1000_TXD_CMD_IP;
3600         }
3601
3602         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3603                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3604                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3605                 offload |= CSUM_TCP;
3606                 tucss = hdr_len;
3607                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3608                 /*
3609                  * Setting up new checksum offload context for every frames
3610                  * takes a lot of processing time for hardware. This also
3611                  * reduces performance a lot for small sized frames so avoid
3612                  * it if driver can use previously configured checksum
3613                  * offload context.
3614                  */
3615                 if (txr->last_hw_offload == offload) {
3616                         if (offload & CSUM_IP) {
3617                                 if (txr->last_hw_ipcss == ipcss &&
3618                                     txr->last_hw_ipcso == ipcso &&
3619                                     txr->last_hw_tucss == tucss &&
3620                                     txr->last_hw_tucso == tucso)
3621                                         return;
3622                         } else {
3623                                 if (txr->last_hw_tucss == tucss &&
3624                                     txr->last_hw_tucso == tucso)
3625                                         return;
3626                         }
3627                 }
3628                 txr->last_hw_offload = offload;
3629                 txr->last_hw_tucss = tucss;
3630                 txr->last_hw_tucso = tucso;
3631                 /*
3632                  * Start offset for payload checksum calculation.
3633                  * End offset for payload checksum calculation.
3634                  * Offset of place to put the checksum.
3635                  */
3636                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3637                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3638                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3639                 TXD->upper_setup.tcp_fields.tucso = tucso;
3640                 cmd |= E1000_TXD_CMD_TCP;
3641         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3642                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3643                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3644                 tucss = hdr_len;
3645                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3646                 /*
3647                  * Setting up new checksum offload context for every frames
3648                  * takes a lot of processing time for hardware. This also
3649                  * reduces performance a lot for small sized frames so avoid
3650                  * it if driver can use previously configured checksum
3651                  * offload context.
3652                  */
3653                 if (txr->last_hw_offload == offload) {
3654                         if (offload & CSUM_IP) {
3655                                 if (txr->last_hw_ipcss == ipcss &&
3656                                     txr->last_hw_ipcso == ipcso &&
3657                                     txr->last_hw_tucss == tucss &&
3658                                     txr->last_hw_tucso == tucso)
3659                                         return;
3660                         } else {
3661                                 if (txr->last_hw_tucss == tucss &&
3662                                     txr->last_hw_tucso == tucso)
3663                                         return;
3664                         }
3665                 }
3666                 txr->last_hw_offload = offload;
3667                 txr->last_hw_tucss = tucss;
3668                 txr->last_hw_tucso = tucso;
3669                 /*
3670                  * Start offset for header checksum calculation.
3671                  * End offset for header checksum calculation.
3672                  * Offset of place to put the checksum.
3673                  */
3674                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3675                 TXD->upper_setup.tcp_fields.tucss = tucss;
3676                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3677                 TXD->upper_setup.tcp_fields.tucso = tucso;
3678         }
3679   
3680         if (offload & CSUM_IP) {
3681                 txr->last_hw_ipcss = ipcss;
3682                 txr->last_hw_ipcso = ipcso;
3683         }
3684
3685         TXD->tcp_seg_setup.data = htole32(0);
3686         TXD->cmd_and_length =
3687             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3688         tx_buffer = &txr->tx_buffers[cur];
3689         tx_buffer->m_head = NULL;
3690         tx_buffer->next_eop = -1;
3691
3692         if (++cur == adapter->num_tx_desc)
3693                 cur = 0;
3694
3695         txr->tx_avail--;
3696         txr->next_avail_desc = cur;
3697 }
3698
3699
3700 /**********************************************************************
3701  *
3702  *  Setup work for hardware segmentation offload (TSO)
3703  *
3704  **********************************************************************/
3705 static void
3706 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3707     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3708 {
3709         struct adapter                  *adapter = txr->adapter;
3710         struct e1000_context_desc       *TXD;
3711         struct em_buffer                *tx_buffer;
3712         int cur, hdr_len;
3713
3714         /*
3715          * In theory we can use the same TSO context if and only if
3716          * frame is the same type(IP/TCP) and the same MSS. However
3717          * checking whether a frame has the same IP/TCP structure is
3718          * hard thing so just ignore that and always restablish a
3719          * new TSO context.
3720          */
3721         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3722         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3723                       E1000_TXD_DTYP_D |        /* Data descr type */
3724                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3725
3726         /* IP and/or TCP header checksum calculation and insertion. */
3727         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3728
3729         cur = txr->next_avail_desc;
3730         tx_buffer = &txr->tx_buffers[cur];
3731         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3732
3733         /*
3734          * Start offset for header checksum calculation.
3735          * End offset for header checksum calculation.
3736          * Offset of place put the checksum.
3737          */
3738         TXD->lower_setup.ip_fields.ipcss = ip_off;
3739         TXD->lower_setup.ip_fields.ipcse =
3740             htole16(ip_off + (ip->ip_hl << 2) - 1);
3741         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3742         /*
3743          * Start offset for payload checksum calculation.
3744          * End offset for payload checksum calculation.
3745          * Offset of place to put the checksum.
3746          */
3747         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3748         TXD->upper_setup.tcp_fields.tucse = 0;
3749         TXD->upper_setup.tcp_fields.tucso =
3750             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3751         /*
3752          * Payload size per packet w/o any headers.
3753          * Length of all headers up to payload.
3754          */
3755         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3756         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3757
3758         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3759                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3760                                 E1000_TXD_CMD_TSE |     /* TSE context */
3761                                 E1000_TXD_CMD_IP |      /* Do IP csum */
3762                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3763                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3764
3765         tx_buffer->m_head = NULL;
3766         tx_buffer->next_eop = -1;
3767
3768         if (++cur == adapter->num_tx_desc)
3769                 cur = 0;
3770
3771         txr->tx_avail--;
3772         txr->next_avail_desc = cur;
3773         txr->tx_tso = TRUE;
3774 }
3775
3776
3777 /**********************************************************************
3778  *
3779  *  Examine each tx_buffer in the used queue. If the hardware is done
3780  *  processing the packet then free associated resources. The
3781  *  tx_buffer is put back on the free queue.
3782  *
3783  **********************************************************************/
3784 static void
3785 em_txeof(struct tx_ring *txr)
3786 {
3787         struct adapter  *adapter = txr->adapter;
3788         int first, last, done, processed;
3789         struct em_buffer *tx_buffer;
3790         struct e1000_tx_desc   *tx_desc, *eop_desc;
3791         struct ifnet   *ifp = adapter->ifp;
3792
3793         EM_TX_LOCK_ASSERT(txr);
3794 #ifdef DEV_NETMAP
3795         if (ifp->if_capenable & IFCAP_NETMAP) {
3796                 struct netmap_adapter *na = NA(ifp);
3797
3798                 selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3799                 EM_TX_UNLOCK(txr);
3800                 EM_CORE_LOCK(adapter);
3801                 selwakeuppri(&na->tx_si, PI_NET);
3802                 EM_CORE_UNLOCK(adapter);
3803                 EM_TX_LOCK(txr);
3804                 return;
3805         }
3806 #endif /* DEV_NETMAP */
3807
3808         /* No work, make sure watchdog is off */
3809         if (txr->tx_avail == adapter->num_tx_desc) {
3810                 txr->queue_status = EM_QUEUE_IDLE;
3811                 return;
3812         }
3813
3814         processed = 0;
3815         first = txr->next_to_clean;
3816         tx_desc = &txr->tx_base[first];
3817         tx_buffer = &txr->tx_buffers[first];
3818         last = tx_buffer->next_eop;
3819         eop_desc = &txr->tx_base[last];
3820
3821         /*
3822          * What this does is get the index of the
3823          * first descriptor AFTER the EOP of the 
3824          * first packet, that way we can do the
3825          * simple comparison on the inner while loop.
3826          */
3827         if (++last == adapter->num_tx_desc)
3828                 last = 0;
3829         done = last;
3830
3831         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3832             BUS_DMASYNC_POSTREAD);
3833
3834         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3835                 /* We clean the range of the packet */
3836                 while (first != done) {
3837                         tx_desc->upper.data = 0;
3838                         tx_desc->lower.data = 0;
3839                         tx_desc->buffer_addr = 0;
3840                         ++txr->tx_avail;
3841                         ++processed;
3842
3843                         if (tx_buffer->m_head) {
3844                                 bus_dmamap_sync(txr->txtag,
3845                                     tx_buffer->map,
3846                                     BUS_DMASYNC_POSTWRITE);
3847                                 bus_dmamap_unload(txr->txtag,
3848                                     tx_buffer->map);
3849                                 m_freem(tx_buffer->m_head);
3850                                 tx_buffer->m_head = NULL;
3851                         }
3852                         tx_buffer->next_eop = -1;
3853                         txr->watchdog_time = ticks;
3854
3855                         if (++first == adapter->num_tx_desc)
3856                                 first = 0;
3857
3858                         tx_buffer = &txr->tx_buffers[first];
3859                         tx_desc = &txr->tx_base[first];
3860                 }
3861                 ++ifp->if_opackets;
3862                 /* See if we can continue to the next packet */
3863                 last = tx_buffer->next_eop;
3864                 if (last != -1) {
3865                         eop_desc = &txr->tx_base[last];
3866                         /* Get new done point */
3867                         if (++last == adapter->num_tx_desc) last = 0;
3868                         done = last;
3869                 } else
3870                         break;
3871         }
3872         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3873             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3874
3875         txr->next_to_clean = first;
3876
3877         /*
3878         ** Watchdog calculation, we know there's
3879         ** work outstanding or the first return
3880         ** would have been taken, so none processed
3881         ** for too long indicates a hang. local timer
3882         ** will examine this and do a reset if needed.
3883         */
3884         if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3885                 txr->queue_status = EM_QUEUE_HUNG;
3886
3887         /*
3888          * If we have a minimum free, clear IFF_DRV_OACTIVE
3889          * to tell the stack that it is OK to send packets.
3890          * Notice that all writes of OACTIVE happen under the
3891          * TX lock which, with a single queue, guarantees 
3892          * sanity.
3893          */
3894         if (txr->tx_avail >= EM_MAX_SCATTER)
3895                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3896
3897         /* Disable watchdog if all clean */
3898         if (txr->tx_avail == adapter->num_tx_desc) {
3899                 txr->queue_status = EM_QUEUE_IDLE;
3900         } 
3901 }
3902
3903
3904 /*********************************************************************
3905  *
3906  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3907  *
3908  **********************************************************************/
3909 static void
3910 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3911 {
3912         struct adapter          *adapter = rxr->adapter;
3913         struct mbuf             *m;
3914         bus_dma_segment_t       segs[1];
3915         struct em_buffer        *rxbuf;
3916         int                     i, j, error, nsegs;
3917         bool                    cleaned = FALSE;
3918
3919         i = j = rxr->next_to_refresh;
3920         /*
3921         ** Get one descriptor beyond
3922         ** our work mark to control
3923         ** the loop.
3924         */
3925         if (++j == adapter->num_rx_desc)
3926                 j = 0;
3927
3928         while (j != limit) {
3929                 rxbuf = &rxr->rx_buffers[i];
3930                 if (rxbuf->m_head == NULL) {
3931                         m = m_getjcl(M_DONTWAIT, MT_DATA,
3932                             M_PKTHDR, adapter->rx_mbuf_sz);
3933                         /*
3934                         ** If we have a temporary resource shortage
3935                         ** that causes a failure, just abort refresh
3936                         ** for now, we will return to this point when
3937                         ** reinvoked from em_rxeof.
3938                         */
3939                         if (m == NULL)
3940                                 goto update;
3941                 } else
3942                         m = rxbuf->m_head;
3943
3944                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3945                 m->m_flags |= M_PKTHDR;
3946                 m->m_data = m->m_ext.ext_buf;
3947
3948                 /* Use bus_dma machinery to setup the memory mapping  */
3949                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3950                     m, segs, &nsegs, BUS_DMA_NOWAIT);
3951                 if (error != 0) {
3952                         printf("Refresh mbufs: hdr dmamap load"
3953                             " failure - %d\n", error);
3954                         m_free(m);
3955                         rxbuf->m_head = NULL;
3956                         goto update;
3957                 }
3958                 rxbuf->m_head = m;
3959                 bus_dmamap_sync(rxr->rxtag,
3960                     rxbuf->map, BUS_DMASYNC_PREREAD);
3961                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3962                 cleaned = TRUE;
3963
3964                 i = j; /* Next is precalulated for us */
3965                 rxr->next_to_refresh = i;
3966                 /* Calculate next controlling index */
3967                 if (++j == adapter->num_rx_desc)
3968                         j = 0;
3969         }
3970 update:
3971         /*
3972         ** Update the tail pointer only if,
3973         ** and as far as we have refreshed.
3974         */
3975         if (cleaned)
3976                 E1000_WRITE_REG(&adapter->hw,
3977                     E1000_RDT(rxr->me), rxr->next_to_refresh);
3978
3979         return;
3980 }
3981
3982
3983 /*********************************************************************
3984  *
3985  *  Allocate memory for rx_buffer structures. Since we use one
3986  *  rx_buffer per received packet, the maximum number of rx_buffer's
3987  *  that we'll need is equal to the number of receive descriptors
3988  *  that we've allocated.
3989  *
3990  **********************************************************************/
3991 static int
3992 em_allocate_receive_buffers(struct rx_ring *rxr)
3993 {
3994         struct adapter          *adapter = rxr->adapter;
3995         device_t                dev = adapter->dev;
3996         struct em_buffer        *rxbuf;
3997         int                     error;
3998
3999         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4000             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4001         if (rxr->rx_buffers == NULL) {
4002                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4003                 return (ENOMEM);
4004         }
4005
4006         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4007                                 1, 0,                   /* alignment, bounds */
4008                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4009                                 BUS_SPACE_MAXADDR,      /* highaddr */
4010                                 NULL, NULL,             /* filter, filterarg */
4011                                 MJUM9BYTES,             /* maxsize */
4012                                 1,                      /* nsegments */
4013                                 MJUM9BYTES,             /* maxsegsize */
4014                                 0,                      /* flags */
4015                                 NULL,                   /* lockfunc */
4016                                 NULL,                   /* lockarg */
4017                                 &rxr->rxtag);
4018         if (error) {
4019                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4020                     __func__, error);
4021                 goto fail;
4022         }
4023
4024         rxbuf = rxr->rx_buffers;
4025         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4026                 rxbuf = &rxr->rx_buffers[i];
4027                 error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4028                     &rxbuf->map);
4029                 if (error) {
4030                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4031                             __func__, error);
4032                         goto fail;
4033                 }
4034         }
4035
4036         return (0);
4037
4038 fail:
4039         em_free_receive_structures(adapter);
4040         return (error);
4041 }
4042
4043
4044 /*********************************************************************
4045  *
4046  *  Initialize a receive ring and its buffers.
4047  *
4048  **********************************************************************/
4049 static int
4050 em_setup_receive_ring(struct rx_ring *rxr)
4051 {
4052         struct  adapter         *adapter = rxr->adapter;
4053         struct em_buffer        *rxbuf;
4054         bus_dma_segment_t       seg[1];
4055         int                     rsize, nsegs, error = 0;
4056 #ifdef DEV_NETMAP
4057         struct netmap_adapter *na = NA(adapter->ifp);
4058         struct netmap_slot *slot;
4059 #endif
4060
4061
4062         /* Clear the ring contents */
4063         EM_RX_LOCK(rxr);
4064         rsize = roundup2(adapter->num_rx_desc *
4065             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4066         bzero((void *)rxr->rx_base, rsize);
4067 #ifdef DEV_NETMAP
4068         slot = netmap_reset(na, NR_RX, 0, 0);
4069 #endif
4070
4071         /*
4072         ** Free current RX buffer structs and their mbufs
4073         */
4074         for (int i = 0; i < adapter->num_rx_desc; i++) {
4075                 rxbuf = &rxr->rx_buffers[i];
4076                 if (rxbuf->m_head != NULL) {
4077                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4078                             BUS_DMASYNC_POSTREAD);
4079                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4080                         m_freem(rxbuf->m_head);
4081                         rxbuf->m_head = NULL; /* mark as freed */
4082                 }
4083         }
4084
4085         /* Now replenish the mbufs */
4086         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4087                 rxbuf = &rxr->rx_buffers[j];
4088 #ifdef DEV_NETMAP
4089                 if (slot) {
4090                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4091                         uint64_t paddr;
4092                         void *addr;
4093
4094                         addr = PNMB(slot + si, &paddr);
4095                         netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4096                         /* Update descriptor */
4097                         rxr->rx_base[j].buffer_addr = htole64(paddr);
4098                         continue;
4099                 }
4100 #endif /* DEV_NETMAP */
4101                 rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
4102                     M_PKTHDR, adapter->rx_mbuf_sz);
4103                 if (rxbuf->m_head == NULL) {
4104                         error = ENOBUFS;
4105                         goto fail;
4106                 }
4107                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4108                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4109                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4110
4111                 /* Get the memory mapping */
4112                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4113                     rxbuf->map, rxbuf->m_head, seg,
4114                     &nsegs, BUS_DMA_NOWAIT);
4115                 if (error != 0) {
4116                         m_freem(rxbuf->m_head);
4117                         rxbuf->m_head = NULL;
4118                         goto fail;
4119                 }
4120                 bus_dmamap_sync(rxr->rxtag,
4121                     rxbuf->map, BUS_DMASYNC_PREREAD);
4122
4123                 /* Update descriptor */
4124                 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4125         }
4126         rxr->next_to_check = 0;
4127         rxr->next_to_refresh = 0;
4128         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4129             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4130
4131 fail:
4132         EM_RX_UNLOCK(rxr);
4133         return (error);
4134 }
4135
4136 /*********************************************************************
4137  *
4138  *  Initialize all receive rings.
4139  *
4140  **********************************************************************/
4141 static int
4142 em_setup_receive_structures(struct adapter *adapter)
4143 {
4144         struct rx_ring *rxr = adapter->rx_rings;
4145         int q;
4146
4147         for (q = 0; q < adapter->num_queues; q++, rxr++)
4148                 if (em_setup_receive_ring(rxr))
4149                         goto fail;
4150
4151         return (0);
4152 fail:
4153         /*
4154          * Free RX buffers allocated so far, we will only handle
4155          * the rings that completed, the failing case will have
4156          * cleaned up for itself. 'q' failed, so its the terminus.
4157          */
4158         for (int i = 0; i < q; ++i) {
4159                 rxr = &adapter->rx_rings[i];
4160                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4161                         struct em_buffer *rxbuf;
4162                         rxbuf = &rxr->rx_buffers[n];
4163                         if (rxbuf->m_head != NULL) {
4164                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4165                                   BUS_DMASYNC_POSTREAD);
4166                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4167                                 m_freem(rxbuf->m_head);
4168                                 rxbuf->m_head = NULL;
4169                         }
4170                 }
4171                 rxr->next_to_check = 0;
4172                 rxr->next_to_refresh = 0;
4173         }
4174
4175         return (ENOBUFS);
4176 }
4177
4178 /*********************************************************************
4179  *
4180  *  Free all receive rings.
4181  *
4182  **********************************************************************/
4183 static void
4184 em_free_receive_structures(struct adapter *adapter)
4185 {
4186         struct rx_ring *rxr = adapter->rx_rings;
4187
4188         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4189                 em_free_receive_buffers(rxr);
4190                 /* Free the ring memory as well */
4191                 em_dma_free(adapter, &rxr->rxdma);
4192                 EM_RX_LOCK_DESTROY(rxr);
4193         }
4194
4195         free(adapter->rx_rings, M_DEVBUF);
4196 }
4197
4198
4199 /*********************************************************************
4200  *
4201  *  Free receive ring data structures
4202  *
4203  **********************************************************************/
4204 static void
4205 em_free_receive_buffers(struct rx_ring *rxr)
4206 {
4207         struct adapter          *adapter = rxr->adapter;
4208         struct em_buffer        *rxbuf = NULL;
4209
4210         INIT_DEBUGOUT("free_receive_buffers: begin");
4211
4212         if (rxr->rx_buffers != NULL) {
4213                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4214                         rxbuf = &rxr->rx_buffers[i];
4215                         if (rxbuf->map != NULL) {
4216                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4217                                     BUS_DMASYNC_POSTREAD);
4218                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4219                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4220                         }
4221                         if (rxbuf->m_head != NULL) {
4222                                 m_freem(rxbuf->m_head);
4223                                 rxbuf->m_head = NULL;
4224                         }
4225                 }
4226                 free(rxr->rx_buffers, M_DEVBUF);
4227                 rxr->rx_buffers = NULL;
4228                 rxr->next_to_check = 0;
4229                 rxr->next_to_refresh = 0;
4230         }
4231
4232         if (rxr->rxtag != NULL) {
4233                 bus_dma_tag_destroy(rxr->rxtag);
4234                 rxr->rxtag = NULL;
4235         }
4236
4237         return;
4238 }
4239
4240
4241 /*********************************************************************
4242  *
4243  *  Enable receive unit.
4244  *
4245  **********************************************************************/
4246 #define MAX_INTS_PER_SEC        8000
4247 #define DEFAULT_ITR          1000000000/(MAX_INTS_PER_SEC * 256)
4248
4249 static void
4250 em_initialize_receive_unit(struct adapter *adapter)
4251 {
4252         struct rx_ring  *rxr = adapter->rx_rings;
4253         struct ifnet    *ifp = adapter->ifp;
4254         struct e1000_hw *hw = &adapter->hw;
4255         u64     bus_addr;
4256         u32     rctl, rxcsum;
4257
4258         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4259
4260         /*
4261          * Make sure receives are disabled while setting
4262          * up the descriptor ring
4263          */
4264         rctl = E1000_READ_REG(hw, E1000_RCTL);
4265         /* Do not disable if ever enabled on this hardware */
4266         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4267                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4268
4269         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4270             adapter->rx_abs_int_delay.value);
4271         /*
4272          * Set the interrupt throttling rate. Value is calculated
4273          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4274          */
4275         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4276
4277         /*
4278         ** When using MSIX interrupts we need to throttle
4279         ** using the EITR register (82574 only)
4280         */
4281         if (hw->mac.type == e1000_82574) {
4282                 for (int i = 0; i < 4; i++)
4283                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4284                             DEFAULT_ITR);
4285                 /* Disable accelerated acknowledge */
4286                 E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4287         }
4288
4289         if (ifp->if_capenable & IFCAP_RXCSUM) {
4290                 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4291                 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4292                 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4293         }
4294
4295         /*
4296         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4297         ** long latencies are observed, like Lenovo X60. This
4298         ** change eliminates the problem, but since having positive
4299         ** values in RDTR is a known source of problems on other
4300         ** platforms another solution is being sought.
4301         */
4302         if (hw->mac.type == e1000_82573)
4303                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4304
4305         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4306                 /* Setup the Base and Length of the Rx Descriptor Ring */
4307                 bus_addr = rxr->rxdma.dma_paddr;
4308                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4309                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4310                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4311                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4312                 /* Setup the Head and Tail Descriptor Pointers */
4313                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4314 #ifdef DEV_NETMAP
4315                 /*
4316                  * an init() while a netmap client is active must
4317                  * preserve the rx buffers passed to userspace.
4318                  * In this driver it means we adjust RDT to
4319                  * something different from na->num_rx_desc - 1.
4320                  */
4321                 if (ifp->if_capenable & IFCAP_NETMAP) {
4322                         struct netmap_adapter *na = NA(adapter->ifp);
4323                         struct netmap_kring *kring = &na->rx_rings[i];
4324                         int t = na->num_rx_desc - 1 - kring->nr_hwavail;
4325
4326                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4327                 } else
4328 #endif /* DEV_NETMAP */
4329                 E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4330         }
4331
4332         /* Set PTHRESH for improved jumbo performance */
4333         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4334             (adapter->hw.mac.type == e1000_pch2lan) ||
4335             (adapter->hw.mac.type == e1000_ich10lan)) &&
4336             (ifp->if_mtu > ETHERMTU)) {
4337                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4338                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4339         }
4340                 
4341         if (adapter->hw.mac.type == e1000_pch2lan) {
4342                 if (ifp->if_mtu > ETHERMTU)
4343                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4344                 else
4345                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4346         }
4347
4348         /* Setup the Receive Control Register */
4349         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4350         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4351             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4352             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4353
4354         /* Strip the CRC */
4355         rctl |= E1000_RCTL_SECRC;
4356
4357         /* Make sure VLAN Filters are off */
4358         rctl &= ~E1000_RCTL_VFE;
4359         rctl &= ~E1000_RCTL_SBP;
4360
4361         if (adapter->rx_mbuf_sz == MCLBYTES)
4362                 rctl |= E1000_RCTL_SZ_2048;
4363         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4364                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4365         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4366                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4367
4368         if (ifp->if_mtu > ETHERMTU)
4369                 rctl |= E1000_RCTL_LPE;
4370         else
4371                 rctl &= ~E1000_RCTL_LPE;
4372
4373         /* Write out the settings */
4374         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4375
4376         return;
4377 }
4378
4379
4380 /*********************************************************************
4381  *
4382  *  This routine executes in interrupt context. It replenishes
4383  *  the mbufs in the descriptor and sends data which has been
4384  *  dma'ed into host memory to upper layer.
4385  *
4386  *  We loop at most count times if count is > 0, or until done if
4387  *  count < 0.
4388  *  
4389  *  For polling we also now return the number of cleaned packets
4390  *********************************************************************/
4391 static bool
4392 em_rxeof(struct rx_ring *rxr, int count, int *done)
4393 {
4394         struct adapter          *adapter = rxr->adapter;
4395         struct ifnet            *ifp = adapter->ifp;
4396         struct mbuf             *mp, *sendmp;
4397         u8                      status = 0;
4398         u16                     len;
4399         int                     i, processed, rxdone = 0;
4400         bool                    eop;
4401         struct e1000_rx_desc    *cur;
4402
4403         EM_RX_LOCK(rxr);
4404
4405 #ifdef DEV_NETMAP
4406         if (ifp->if_capenable & IFCAP_NETMAP) {
4407                 struct netmap_adapter *na = NA(ifp);
4408
4409                 na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4410                 selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4411                 EM_RX_UNLOCK(rxr);
4412                 EM_CORE_LOCK(adapter);
4413                 selwakeuppri(&na->rx_si, PI_NET);
4414                 EM_CORE_UNLOCK(adapter);
4415                 return (0);
4416         }
4417 #endif /* DEV_NETMAP */
4418
4419         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4420
4421                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4422                         break;
4423
4424                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4425                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4426
4427                 cur = &rxr->rx_base[i];
4428                 status = cur->status;
4429                 mp = sendmp = NULL;
4430
4431                 if ((status & E1000_RXD_STAT_DD) == 0)
4432                         break;
4433
4434                 len = le16toh(cur->length);
4435                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4436
4437                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4438                     (rxr->discard == TRUE)) {
4439                         ifp->if_ierrors++;
4440                         ++rxr->rx_discarded;
4441                         if (!eop) /* Catch subsequent segs */
4442                                 rxr->discard = TRUE;
4443                         else
4444                                 rxr->discard = FALSE;
4445                         em_rx_discard(rxr, i);
4446                         goto next_desc;
4447                 }
4448
4449                 /* Assign correct length to the current fragment */
4450                 mp = rxr->rx_buffers[i].m_head;
4451                 mp->m_len = len;
4452
4453                 /* Trigger for refresh */
4454                 rxr->rx_buffers[i].m_head = NULL;
4455
4456                 /* First segment? */
4457                 if (rxr->fmp == NULL) {
4458                         mp->m_pkthdr.len = len;
4459                         rxr->fmp = rxr->lmp = mp;
4460                 } else {
4461                         /* Chain mbuf's together */
4462                         mp->m_flags &= ~M_PKTHDR;
4463                         rxr->lmp->m_next = mp;
4464                         rxr->lmp = mp;
4465                         rxr->fmp->m_pkthdr.len += len;
4466                 }
4467
4468                 if (eop) {
4469                         --count;
4470                         sendmp = rxr->fmp;
4471                         sendmp->m_pkthdr.rcvif = ifp;
4472                         ifp->if_ipackets++;
4473                         em_receive_checksum(cur, sendmp);
4474 #ifndef __NO_STRICT_ALIGNMENT
4475                         if (adapter->max_frame_size >
4476                             (MCLBYTES - ETHER_ALIGN) &&
4477                             em_fixup_rx(rxr) != 0)
4478                                 goto skip;
4479 #endif
4480                         if (status & E1000_RXD_STAT_VP) {
4481                                 sendmp->m_pkthdr.ether_vtag =
4482                                     le16toh(cur->special);
4483                                 sendmp->m_flags |= M_VLANTAG;
4484                         }
4485 #ifndef __NO_STRICT_ALIGNMENT
4486 skip:
4487 #endif
4488                         rxr->fmp = rxr->lmp = NULL;
4489                 }
4490 next_desc:
4491                 /* Zero out the receive descriptors status. */
4492                 cur->status = 0;
4493                 ++rxdone;       /* cumulative for POLL */
4494                 ++processed;
4495
4496                 /* Advance our pointers to the next descriptor. */
4497                 if (++i == adapter->num_rx_desc)
4498                         i = 0;
4499
4500                 /* Send to the stack */
4501                 if (sendmp != NULL) {
4502                         rxr->next_to_check = i;
4503                         EM_RX_UNLOCK(rxr);
4504                         (*ifp->if_input)(ifp, sendmp);
4505                         EM_RX_LOCK(rxr);
4506                         i = rxr->next_to_check;
4507                 }
4508
4509                 /* Only refresh mbufs every 8 descriptors */
4510                 if (processed == 8) {
4511                         em_refresh_mbufs(rxr, i);
4512                         processed = 0;
4513                 }
4514         }
4515
4516         /* Catch any remaining refresh work */
4517         if (e1000_rx_unrefreshed(rxr))
4518                 em_refresh_mbufs(rxr, i);
4519
4520         rxr->next_to_check = i;
4521         if (done != NULL)
4522                 *done = rxdone;
4523         EM_RX_UNLOCK(rxr);
4524
4525         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4526 }
4527
4528 static __inline void
4529 em_rx_discard(struct rx_ring *rxr, int i)
4530 {
4531         struct em_buffer        *rbuf;
4532
4533         rbuf = &rxr->rx_buffers[i];
4534         /* Free any previous pieces */
4535         if (rxr->fmp != NULL) {
4536                 rxr->fmp->m_flags |= M_PKTHDR;
4537                 m_freem(rxr->fmp);
4538                 rxr->fmp = NULL;
4539                 rxr->lmp = NULL;
4540         }
4541         /*
4542         ** Free buffer and allow em_refresh_mbufs()
4543         ** to clean up and recharge buffer.
4544         */
4545         if (rbuf->m_head) {
4546                 m_free(rbuf->m_head);
4547                 rbuf->m_head = NULL;
4548         }
4549         return;
4550 }
4551
4552 #ifndef __NO_STRICT_ALIGNMENT
4553 /*
4554  * When jumbo frames are enabled we should realign entire payload on
4555  * architecures with strict alignment. This is serious design mistake of 8254x
4556  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4557  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4558  * payload. On architecures without strict alignment restrictions 8254x still
4559  * performs unaligned memory access which would reduce the performance too.
4560  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4561  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4562  * existing mbuf chain.
4563  *
4564  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4565  * not used at all on architectures with strict alignment.
4566  */
4567 static int
4568 em_fixup_rx(struct rx_ring *rxr)
4569 {
4570         struct adapter *adapter = rxr->adapter;
4571         struct mbuf *m, *n;
4572         int error;
4573
4574         error = 0;
4575         m = rxr->fmp;
4576         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4577                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4578                 m->m_data += ETHER_HDR_LEN;
4579         } else {
4580                 MGETHDR(n, M_DONTWAIT, MT_DATA);
4581                 if (n != NULL) {
4582                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4583                         m->m_data += ETHER_HDR_LEN;
4584                         m->m_len -= ETHER_HDR_LEN;
4585                         n->m_len = ETHER_HDR_LEN;
4586                         M_MOVE_PKTHDR(n, m);
4587                         n->m_next = m;
4588                         rxr->fmp = n;
4589                 } else {
4590                         adapter->dropped_pkts++;
4591                         m_freem(rxr->fmp);
4592                         rxr->fmp = NULL;
4593                         error = ENOMEM;
4594                 }
4595         }
4596
4597         return (error);
4598 }
4599 #endif
4600
4601 /*********************************************************************
4602  *
4603  *  Verify that the hardware indicated that the checksum is valid.
4604  *  Inform the stack about the status of checksum so that stack
4605  *  doesn't spend time verifying the checksum.
4606  *
4607  *********************************************************************/
4608 static void
4609 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4610 {
4611         /* Ignore Checksum bit is set */
4612         if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4613                 mp->m_pkthdr.csum_flags = 0;
4614                 return;
4615         }
4616
4617         if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4618                 /* Did it pass? */
4619                 if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4620                         /* IP Checksum Good */
4621                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4622                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4623
4624                 } else {
4625                         mp->m_pkthdr.csum_flags = 0;
4626                 }
4627         }
4628
4629         if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4630                 /* Did it pass? */
4631                 if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4632                         mp->m_pkthdr.csum_flags |=
4633                         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4634                         mp->m_pkthdr.csum_data = htons(0xffff);
4635                 }
4636         }
4637 }
4638
4639 /*
4640  * This routine is run via an vlan
4641  * config EVENT
4642  */
4643 static void
4644 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4645 {
4646         struct adapter  *adapter = ifp->if_softc;
4647         u32             index, bit;
4648
4649         if (ifp->if_softc !=  arg)   /* Not our event */
4650                 return;
4651
4652         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4653                 return;
4654
4655         EM_CORE_LOCK(adapter);
4656         index = (vtag >> 5) & 0x7F;
4657         bit = vtag & 0x1F;
4658         adapter->shadow_vfta[index] |= (1 << bit);
4659         ++adapter->num_vlans;
4660         /* Re-init to load the changes */
4661         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4662                 em_init_locked(adapter);
4663         EM_CORE_UNLOCK(adapter);
4664 }
4665
4666 /*
4667  * This routine is run via an vlan
4668  * unconfig EVENT
4669  */
4670 static void
4671 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4672 {
4673         struct adapter  *adapter = ifp->if_softc;
4674         u32             index, bit;
4675
4676         if (ifp->if_softc !=  arg)
4677                 return;
4678
4679         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4680                 return;
4681
4682         EM_CORE_LOCK(adapter);
4683         index = (vtag >> 5) & 0x7F;
4684         bit = vtag & 0x1F;
4685         adapter->shadow_vfta[index] &= ~(1 << bit);
4686         --adapter->num_vlans;
4687         /* Re-init to load the changes */
4688         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4689                 em_init_locked(adapter);
4690         EM_CORE_UNLOCK(adapter);
4691 }
4692
4693 static void
4694 em_setup_vlan_hw_support(struct adapter *adapter)
4695 {
4696         struct e1000_hw *hw = &adapter->hw;
4697         u32             reg;
4698
4699         /*
4700         ** We get here thru init_locked, meaning
4701         ** a soft reset, this has already cleared
4702         ** the VFTA and other state, so if there
4703         ** have been no vlan's registered do nothing.
4704         */
4705         if (adapter->num_vlans == 0)
4706                 return;
4707
4708         /*
4709         ** A soft reset zero's out the VFTA, so
4710         ** we need to repopulate it now.
4711         */
4712         for (int i = 0; i < EM_VFTA_SIZE; i++)
4713                 if (adapter->shadow_vfta[i] != 0)
4714                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4715                             i, adapter->shadow_vfta[i]);
4716
4717         reg = E1000_READ_REG(hw, E1000_CTRL);
4718         reg |= E1000_CTRL_VME;
4719         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4720
4721         /* Enable the Filter Table */
4722         reg = E1000_READ_REG(hw, E1000_RCTL);
4723         reg &= ~E1000_RCTL_CFIEN;
4724         reg |= E1000_RCTL_VFE;
4725         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4726 }
4727
4728 static void
4729 em_enable_intr(struct adapter *adapter)
4730 {
4731         struct e1000_hw *hw = &adapter->hw;
4732         u32 ims_mask = IMS_ENABLE_MASK;
4733
4734         if (hw->mac.type == e1000_82574) {
4735                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4736                 ims_mask |= EM_MSIX_MASK;
4737         } 
4738         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4739 }
4740
4741 static void
4742 em_disable_intr(struct adapter *adapter)
4743 {
4744         struct e1000_hw *hw = &adapter->hw;
4745
4746         if (hw->mac.type == e1000_82574)
4747                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4748         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4749 }
4750
4751 /*
4752  * Bit of a misnomer, what this really means is
4753  * to enable OS management of the system... aka
4754  * to disable special hardware management features 
4755  */
4756 static void
4757 em_init_manageability(struct adapter *adapter)
4758 {
4759         /* A shared code workaround */
4760 #define E1000_82542_MANC2H E1000_MANC2H
4761         if (adapter->has_manage) {
4762                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4763                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4764
4765                 /* disable hardware interception of ARP */
4766                 manc &= ~(E1000_MANC_ARP_EN);
4767
4768                 /* enable receiving management packets to the host */
4769                 manc |= E1000_MANC_EN_MNG2HOST;
4770 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4771 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4772                 manc2h |= E1000_MNG2HOST_PORT_623;
4773                 manc2h |= E1000_MNG2HOST_PORT_664;
4774                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4775                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4776         }
4777 }
4778
4779 /*
4780  * Give control back to hardware management
4781  * controller if there is one.
4782  */
4783 static void
4784 em_release_manageability(struct adapter *adapter)
4785 {
4786         if (adapter->has_manage) {
4787                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4788
4789                 /* re-enable hardware interception of ARP */
4790                 manc |= E1000_MANC_ARP_EN;
4791                 manc &= ~E1000_MANC_EN_MNG2HOST;
4792
4793                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4794         }
4795 }
4796
4797 /*
4798  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4799  * For ASF and Pass Through versions of f/w this means
4800  * that the driver is loaded. For AMT version type f/w
4801  * this means that the network i/f is open.
4802  */
4803 static void
4804 em_get_hw_control(struct adapter *adapter)
4805 {
4806         u32 ctrl_ext, swsm;
4807
4808         if (adapter->hw.mac.type == e1000_82573) {
4809                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4810                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4811                     swsm | E1000_SWSM_DRV_LOAD);
4812                 return;
4813         }
4814         /* else */
4815         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4816         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4817             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4818         return;
4819 }
4820
4821 /*
4822  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4823  * For ASF and Pass Through versions of f/w this means that
4824  * the driver is no longer loaded. For AMT versions of the
4825  * f/w this means that the network i/f is closed.
4826  */
4827 static void
4828 em_release_hw_control(struct adapter *adapter)
4829 {
4830         u32 ctrl_ext, swsm;
4831
4832         if (!adapter->has_manage)
4833                 return;
4834
4835         if (adapter->hw.mac.type == e1000_82573) {
4836                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4837                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4838                     swsm & ~E1000_SWSM_DRV_LOAD);
4839                 return;
4840         }
4841         /* else */
4842         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4843         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4844             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4845         return;
4846 }
4847
4848 static int
4849 em_is_valid_ether_addr(u8 *addr)
4850 {
4851         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4852
4853         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4854                 return (FALSE);
4855         }
4856
4857         return (TRUE);
4858 }
4859
4860 /*
4861 ** Parse the interface capabilities with regard
4862 ** to both system management and wake-on-lan for
4863 ** later use.
4864 */
4865 static void
4866 em_get_wakeup(device_t dev)
4867 {
4868         struct adapter  *adapter = device_get_softc(dev);
4869         u16             eeprom_data = 0, device_id, apme_mask;
4870
4871         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4872         apme_mask = EM_EEPROM_APME;
4873
4874         switch (adapter->hw.mac.type) {
4875         case e1000_82573:
4876         case e1000_82583:
4877                 adapter->has_amt = TRUE;
4878                 /* Falls thru */
4879         case e1000_82571:
4880         case e1000_82572:
4881         case e1000_80003es2lan:
4882                 if (adapter->hw.bus.func == 1) {
4883                         e1000_read_nvm(&adapter->hw,
4884                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4885                         break;
4886                 } else
4887                         e1000_read_nvm(&adapter->hw,
4888                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4889                 break;
4890         case e1000_ich8lan:
4891         case e1000_ich9lan:
4892         case e1000_ich10lan:
4893         case e1000_pchlan:
4894         case e1000_pch2lan:
4895                 apme_mask = E1000_WUC_APME;
4896                 adapter->has_amt = TRUE;
4897                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4898                 break;
4899         default:
4900                 e1000_read_nvm(&adapter->hw,
4901                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4902                 break;
4903         }
4904         if (eeprom_data & apme_mask)
4905                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4906         /*
4907          * We have the eeprom settings, now apply the special cases
4908          * where the eeprom may be wrong or the board won't support
4909          * wake on lan on a particular port
4910          */
4911         device_id = pci_get_device(dev);
4912         switch (device_id) {
4913         case E1000_DEV_ID_82571EB_FIBER:
4914                 /* Wake events only supported on port A for dual fiber
4915                  * regardless of eeprom setting */
4916                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4917                     E1000_STATUS_FUNC_1)
4918                         adapter->wol = 0;
4919                 break;
4920         case E1000_DEV_ID_82571EB_QUAD_COPPER:
4921         case E1000_DEV_ID_82571EB_QUAD_FIBER:
4922         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4923                 /* if quad port adapter, disable WoL on all but port A */
4924                 if (global_quad_port_a != 0)
4925                         adapter->wol = 0;
4926                 /* Reset for multiple quad port adapters */
4927                 if (++global_quad_port_a == 4)
4928                         global_quad_port_a = 0;
4929                 break;
4930         }
4931         return;
4932 }
4933
4934
4935 /*
4936  * Enable PCI Wake On Lan capability
4937  */
4938 static void
4939 em_enable_wakeup(device_t dev)
4940 {
4941         struct adapter  *adapter = device_get_softc(dev);
4942         struct ifnet    *ifp = adapter->ifp;
4943         u32             pmc, ctrl, ctrl_ext, rctl;
4944         u16             status;
4945
4946         if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4947                 return;
4948
4949         /* Advertise the wakeup capability */
4950         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4951         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4952         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4953         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4954
4955         if ((adapter->hw.mac.type == e1000_ich8lan) ||
4956             (adapter->hw.mac.type == e1000_pchlan) ||
4957             (adapter->hw.mac.type == e1000_ich9lan) ||
4958             (adapter->hw.mac.type == e1000_ich10lan))
4959                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
4960
4961         /* Keep the laser running on Fiber adapters */
4962         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4963             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4964                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4965                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4966                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4967         }
4968
4969         /*
4970         ** Determine type of Wakeup: note that wol
4971         ** is set with all bits on by default.
4972         */
4973         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4974                 adapter->wol &= ~E1000_WUFC_MAG;
4975
4976         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4977                 adapter->wol &= ~E1000_WUFC_MC;
4978         else {
4979                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4980                 rctl |= E1000_RCTL_MPE;
4981                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4982         }
4983
4984         if ((adapter->hw.mac.type == e1000_pchlan) ||
4985             (adapter->hw.mac.type == e1000_pch2lan)) {
4986                 if (em_enable_phy_wakeup(adapter))
4987                         return;
4988         } else {
4989                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4990                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4991         }
4992
4993         if (adapter->hw.phy.type == e1000_phy_igp_3)
4994                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4995
4996         /* Request PME */
4997         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4998         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4999         if (ifp->if_capenable & IFCAP_WOL)
5000                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5001         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5002
5003         return;
5004 }
5005
5006 /*
5007 ** WOL in the newer chipset interfaces (pchlan)
5008 ** require thing to be copied into the phy
5009 */
5010 static int
5011 em_enable_phy_wakeup(struct adapter *adapter)
5012 {
5013         struct e1000_hw *hw = &adapter->hw;
5014         u32 mreg, ret = 0;
5015         u16 preg;
5016
5017         /* copy MAC RARs to PHY RARs */
5018         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5019
5020         /* copy MAC MTA to PHY MTA */
5021         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5022                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5023                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5024                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5025                     (u16)((mreg >> 16) & 0xFFFF));
5026         }
5027
5028         /* configure PHY Rx Control register */
5029         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5030         mreg = E1000_READ_REG(hw, E1000_RCTL);
5031         if (mreg & E1000_RCTL_UPE)
5032                 preg |= BM_RCTL_UPE;
5033         if (mreg & E1000_RCTL_MPE)
5034                 preg |= BM_RCTL_MPE;
5035         preg &= ~(BM_RCTL_MO_MASK);
5036         if (mreg & E1000_RCTL_MO_3)
5037                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5038                                 << BM_RCTL_MO_SHIFT);
5039         if (mreg & E1000_RCTL_BAM)
5040                 preg |= BM_RCTL_BAM;
5041         if (mreg & E1000_RCTL_PMCF)
5042                 preg |= BM_RCTL_PMCF;
5043         mreg = E1000_READ_REG(hw, E1000_CTRL);
5044         if (mreg & E1000_CTRL_RFCE)
5045                 preg |= BM_RCTL_RFCE;
5046         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5047
5048         /* enable PHY wakeup in MAC register */
5049         E1000_WRITE_REG(hw, E1000_WUC,
5050             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5051         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5052
5053         /* configure and enable PHY wakeup in PHY registers */
5054         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5055         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5056
5057         /* activate PHY wakeup */
5058         ret = hw->phy.ops.acquire(hw);
5059         if (ret) {
5060                 printf("Could not acquire PHY\n");
5061                 return ret;
5062         }
5063         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5064                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5065         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5066         if (ret) {
5067                 printf("Could not read PHY page 769\n");
5068                 goto out;
5069         }
5070         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5071         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5072         if (ret)
5073                 printf("Could not set PHY Host Wakeup bit\n");
5074 out:
5075         hw->phy.ops.release(hw);
5076
5077         return ret;
5078 }
5079
5080 static void
5081 em_led_func(void *arg, int onoff)
5082 {
5083         struct adapter  *adapter = arg;
5084  
5085         EM_CORE_LOCK(adapter);
5086         if (onoff) {
5087                 e1000_setup_led(&adapter->hw);
5088                 e1000_led_on(&adapter->hw);
5089         } else {
5090                 e1000_led_off(&adapter->hw);
5091                 e1000_cleanup_led(&adapter->hw);
5092         }
5093         EM_CORE_UNLOCK(adapter);
5094 }
5095
5096 /*
5097 ** Disable the L0S and L1 LINK states
5098 */
5099 static void
5100 em_disable_aspm(struct adapter *adapter)
5101 {
5102         int             base, reg;
5103         u16             link_cap,link_ctrl;
5104         device_t        dev = adapter->dev;
5105
5106         switch (adapter->hw.mac.type) {
5107                 case e1000_82573:
5108                 case e1000_82574:
5109                 case e1000_82583:
5110                         break;
5111                 default:
5112                         return;
5113         }
5114         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5115                 return;
5116         reg = base + PCIER_LINK_CAP;
5117         link_cap = pci_read_config(dev, reg, 2);
5118         if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5119                 return;
5120         reg = base + PCIER_LINK_CTL;
5121         link_ctrl = pci_read_config(dev, reg, 2);
5122         link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5123         pci_write_config(dev, reg, link_ctrl, 2);
5124         return;
5125 }
5126
5127 /**********************************************************************
5128  *
5129  *  Update the board statistics counters.
5130  *
5131  **********************************************************************/
5132 static void
5133 em_update_stats_counters(struct adapter *adapter)
5134 {
5135         struct ifnet   *ifp;
5136
5137         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5138            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5139                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5140                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5141         }
5142         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5143         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5144         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5145         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5146
5147         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5148         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5149         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5150         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5151         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5152         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5153         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5154         /*
5155         ** For watchdog management we need to know if we have been
5156         ** paused during the last interval, so capture that here.
5157         */
5158         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5159         adapter->stats.xoffrxc += adapter->pause_frames;
5160         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5161         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5162         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5163         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5164         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5165         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5166         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5167         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5168         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5169         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5170         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5171         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5172
5173         /* For the 64-bit byte counters the low dword must be read first. */
5174         /* Both registers clear on the read of the high dword */
5175
5176         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5177             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5178         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5179             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5180
5181         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5182         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5183         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5184         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5185         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5186
5187         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5188         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5189
5190         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5191         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5192         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5193         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5194         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5195         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5196         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5197         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5198         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5199         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5200
5201         /* Interrupt Counts */
5202
5203         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5204         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5205         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5206         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5207         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5208         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5209         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5210         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5211         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5212
5213         if (adapter->hw.mac.type >= e1000_82543) {
5214                 adapter->stats.algnerrc += 
5215                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5216                 adapter->stats.rxerrc += 
5217                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5218                 adapter->stats.tncrs += 
5219                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5220                 adapter->stats.cexterr += 
5221                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5222                 adapter->stats.tsctc += 
5223                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5224                 adapter->stats.tsctfc += 
5225                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5226         }
5227         ifp = adapter->ifp;
5228
5229         ifp->if_collisions = adapter->stats.colc;
5230
5231         /* Rx Errors */
5232         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5233             adapter->stats.crcerrs + adapter->stats.algnerrc +
5234             adapter->stats.ruc + adapter->stats.roc +
5235             adapter->stats.mpc + adapter->stats.cexterr;
5236
5237         /* Tx Errors */
5238         ifp->if_oerrors = adapter->stats.ecol +
5239             adapter->stats.latecol + adapter->watchdog_events;
5240 }
5241
5242 /* Export a single 32-bit register via a read-only sysctl. */
5243 static int
5244 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5245 {
5246         struct adapter *adapter;
5247         u_int val;
5248
5249         adapter = oidp->oid_arg1;
5250         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5251         return (sysctl_handle_int(oidp, &val, 0, req));
5252 }
5253
5254 /*
5255  * Add sysctl variables, one per statistic, to the system.
5256  */
5257 static void
5258 em_add_hw_stats(struct adapter *adapter)
5259 {
5260         device_t dev = adapter->dev;
5261
5262         struct tx_ring *txr = adapter->tx_rings;
5263         struct rx_ring *rxr = adapter->rx_rings;
5264
5265         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5266         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5267         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5268         struct e1000_hw_stats *stats = &adapter->stats;
5269
5270         struct sysctl_oid *stat_node, *queue_node, *int_node;
5271         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5272
5273 #define QUEUE_NAME_LEN 32
5274         char namebuf[QUEUE_NAME_LEN];
5275         
5276         /* Driver Statistics */
5277         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5278                         CTLFLAG_RD, &adapter->link_irq,
5279                         "Link MSIX IRQ Handled");
5280         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", 
5281                          CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5282                          "Std mbuf failed");
5283         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
5284                          CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5285                          "Std mbuf cluster failed");
5286         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5287                         CTLFLAG_RD, &adapter->dropped_pkts,
5288                         "Driver dropped packets");
5289         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5290                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5291                         "Driver tx dma failure in xmit");
5292         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5293                         CTLFLAG_RD, &adapter->rx_overruns,
5294                         "RX overruns");
5295         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5296                         CTLFLAG_RD, &adapter->watchdog_events,
5297                         "Watchdog timeouts");
5298         
5299         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5300                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5301                         em_sysctl_reg_handler, "IU",
5302                         "Device Control Register");
5303         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5304                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5305                         em_sysctl_reg_handler, "IU",
5306                         "Receiver Control Register");
5307         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5308                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5309                         "Flow Control High Watermark");
5310         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5311                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5312                         "Flow Control Low Watermark");
5313
5314         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5315                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5316                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5317                                             CTLFLAG_RD, NULL, "Queue Name");
5318                 queue_list = SYSCTL_CHILDREN(queue_node);
5319
5320                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5321                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5322                                 E1000_TDH(txr->me),
5323                                 em_sysctl_reg_handler, "IU",
5324                                 "Transmit Descriptor Head");
5325                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5326                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5327                                 E1000_TDT(txr->me),
5328                                 em_sysctl_reg_handler, "IU",
5329                                 "Transmit Descriptor Tail");
5330                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5331                                 CTLFLAG_RD, &txr->tx_irq,
5332                                 "Queue MSI-X Transmit Interrupts");
5333                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5334                                 CTLFLAG_RD, &txr->no_desc_avail,
5335                                 "Queue No Descriptor Available");
5336                 
5337                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5338                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5339                                 E1000_RDH(rxr->me),
5340                                 em_sysctl_reg_handler, "IU",
5341                                 "Receive Descriptor Head");
5342                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5343                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5344                                 E1000_RDT(rxr->me),
5345                                 em_sysctl_reg_handler, "IU",
5346                                 "Receive Descriptor Tail");
5347                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5348                                 CTLFLAG_RD, &rxr->rx_irq,
5349                                 "Queue MSI-X Receive Interrupts");
5350         }
5351
5352         /* MAC stats get their own sub node */
5353
5354         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5355                                     CTLFLAG_RD, NULL, "Statistics");
5356         stat_list = SYSCTL_CHILDREN(stat_node);
5357
5358         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5359                         CTLFLAG_RD, &stats->ecol,
5360                         "Excessive collisions");
5361         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5362                         CTLFLAG_RD, &stats->scc,
5363                         "Single collisions");
5364         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5365                         CTLFLAG_RD, &stats->mcc,
5366                         "Multiple collisions");
5367         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5368                         CTLFLAG_RD, &stats->latecol,
5369                         "Late collisions");
5370         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5371                         CTLFLAG_RD, &stats->colc,
5372                         "Collision Count");
5373         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5374                         CTLFLAG_RD, &adapter->stats.symerrs,
5375                         "Symbol Errors");
5376         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5377                         CTLFLAG_RD, &adapter->stats.sec,
5378                         "Sequence Errors");
5379         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5380                         CTLFLAG_RD, &adapter->stats.dc,
5381                         "Defer Count");
5382         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5383                         CTLFLAG_RD, &adapter->stats.mpc,
5384                         "Missed Packets");
5385         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5386                         CTLFLAG_RD, &adapter->stats.rnbc,
5387                         "Receive No Buffers");
5388         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5389                         CTLFLAG_RD, &adapter->stats.ruc,
5390                         "Receive Undersize");
5391         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5392                         CTLFLAG_RD, &adapter->stats.rfc,
5393                         "Fragmented Packets Received ");
5394         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5395                         CTLFLAG_RD, &adapter->stats.roc,
5396                         "Oversized Packets Received");
5397         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5398                         CTLFLAG_RD, &adapter->stats.rjc,
5399                         "Recevied Jabber");
5400         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5401                         CTLFLAG_RD, &adapter->stats.rxerrc,
5402                         "Receive Errors");
5403         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5404                         CTLFLAG_RD, &adapter->stats.crcerrs,
5405                         "CRC errors");
5406         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5407                         CTLFLAG_RD, &adapter->stats.algnerrc,
5408                         "Alignment Errors");
5409         /* On 82575 these are collision counts */
5410         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5411                         CTLFLAG_RD, &adapter->stats.cexterr,
5412                         "Collision/Carrier extension errors");
5413         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5414                         CTLFLAG_RD, &adapter->stats.xonrxc,
5415                         "XON Received");
5416         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5417                         CTLFLAG_RD, &adapter->stats.xontxc,
5418                         "XON Transmitted");
5419         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5420                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5421                         "XOFF Received");
5422         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5423                         CTLFLAG_RD, &adapter->stats.xofftxc,
5424                         "XOFF Transmitted");
5425
5426         /* Packet Reception Stats */
5427         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5428                         CTLFLAG_RD, &adapter->stats.tpr,
5429                         "Total Packets Received ");
5430         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5431                         CTLFLAG_RD, &adapter->stats.gprc,
5432                         "Good Packets Received");
5433         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5434                         CTLFLAG_RD, &adapter->stats.bprc,
5435                         "Broadcast Packets Received");
5436         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5437                         CTLFLAG_RD, &adapter->stats.mprc,
5438                         "Multicast Packets Received");
5439         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5440                         CTLFLAG_RD, &adapter->stats.prc64,
5441                         "64 byte frames received ");
5442         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5443                         CTLFLAG_RD, &adapter->stats.prc127,
5444                         "65-127 byte frames received");
5445         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5446                         CTLFLAG_RD, &adapter->stats.prc255,
5447                         "128-255 byte frames received");
5448         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5449                         CTLFLAG_RD, &adapter->stats.prc511,
5450                         "256-511 byte frames received");
5451         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5452                         CTLFLAG_RD, &adapter->stats.prc1023,
5453                         "512-1023 byte frames received");
5454         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5455                         CTLFLAG_RD, &adapter->stats.prc1522,
5456                         "1023-1522 byte frames received");
5457         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5458                         CTLFLAG_RD, &adapter->stats.gorc, 
5459                         "Good Octets Received"); 
5460
5461         /* Packet Transmission Stats */
5462         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5463                         CTLFLAG_RD, &adapter->stats.gotc, 
5464                         "Good Octets Transmitted"); 
5465         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5466                         CTLFLAG_RD, &adapter->stats.tpt,
5467                         "Total Packets Transmitted");
5468         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5469                         CTLFLAG_RD, &adapter->stats.gptc,
5470                         "Good Packets Transmitted");
5471         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5472                         CTLFLAG_RD, &adapter->stats.bptc,
5473                         "Broadcast Packets Transmitted");
5474         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5475                         CTLFLAG_RD, &adapter->stats.mptc,
5476                         "Multicast Packets Transmitted");
5477         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5478                         CTLFLAG_RD, &adapter->stats.ptc64,
5479                         "64 byte frames transmitted ");
5480         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5481                         CTLFLAG_RD, &adapter->stats.ptc127,
5482                         "65-127 byte frames transmitted");
5483         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5484                         CTLFLAG_RD, &adapter->stats.ptc255,
5485                         "128-255 byte frames transmitted");
5486         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5487                         CTLFLAG_RD, &adapter->stats.ptc511,
5488                         "256-511 byte frames transmitted");
5489         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5490                         CTLFLAG_RD, &adapter->stats.ptc1023,
5491                         "512-1023 byte frames transmitted");
5492         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5493                         CTLFLAG_RD, &adapter->stats.ptc1522,
5494                         "1024-1522 byte frames transmitted");
5495         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5496                         CTLFLAG_RD, &adapter->stats.tsctc,
5497                         "TSO Contexts Transmitted");
5498         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5499                         CTLFLAG_RD, &adapter->stats.tsctfc,
5500                         "TSO Contexts Failed");
5501
5502
5503         /* Interrupt Stats */
5504
5505         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5506                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5507         int_list = SYSCTL_CHILDREN(int_node);
5508
5509         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5510                         CTLFLAG_RD, &adapter->stats.iac,
5511                         "Interrupt Assertion Count");
5512
5513         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5514                         CTLFLAG_RD, &adapter->stats.icrxptc,
5515                         "Interrupt Cause Rx Pkt Timer Expire Count");
5516
5517         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5518                         CTLFLAG_RD, &adapter->stats.icrxatc,
5519                         "Interrupt Cause Rx Abs Timer Expire Count");
5520
5521         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5522                         CTLFLAG_RD, &adapter->stats.ictxptc,
5523                         "Interrupt Cause Tx Pkt Timer Expire Count");
5524
5525         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5526                         CTLFLAG_RD, &adapter->stats.ictxatc,
5527                         "Interrupt Cause Tx Abs Timer Expire Count");
5528
5529         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5530                         CTLFLAG_RD, &adapter->stats.ictxqec,
5531                         "Interrupt Cause Tx Queue Empty Count");
5532
5533         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5534                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5535                         "Interrupt Cause Tx Queue Min Thresh Count");
5536
5537         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5538                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5539                         "Interrupt Cause Rx Desc Min Thresh Count");
5540
5541         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5542                         CTLFLAG_RD, &adapter->stats.icrxoc,
5543                         "Interrupt Cause Receiver Overrun Count");
5544 }
5545
5546 /**********************************************************************
5547  *
5548  *  This routine provides a way to dump out the adapter eeprom,
5549  *  often a useful debug/service tool. This only dumps the first
5550  *  32 words, stuff that matters is in that extent.
5551  *
5552  **********************************************************************/
5553 static int
5554 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5555 {
5556         struct adapter *adapter = (struct adapter *)arg1;
5557         int error;
5558         int result;
5559
5560         result = -1;
5561         error = sysctl_handle_int(oidp, &result, 0, req);
5562
5563         if (error || !req->newptr)
5564                 return (error);
5565
5566         /*
5567          * This value will cause a hex dump of the
5568          * first 32 16-bit words of the EEPROM to
5569          * the screen.
5570          */
5571         if (result == 1)
5572                 em_print_nvm_info(adapter);
5573
5574         return (error);
5575 }
5576
5577 static void
5578 em_print_nvm_info(struct adapter *adapter)
5579 {
5580         u16     eeprom_data;
5581         int     i, j, row = 0;
5582
5583         /* Its a bit crude, but it gets the job done */
5584         printf("\nInterface EEPROM Dump:\n");
5585         printf("Offset\n0x0000  ");
5586         for (i = 0, j = 0; i < 32; i++, j++) {
5587                 if (j == 8) { /* Make the offset block */
5588                         j = 0; ++row;
5589                         printf("\n0x00%x0  ",row);
5590                 }
5591                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5592                 printf("%04x ", eeprom_data);
5593         }
5594         printf("\n");
5595 }
5596
5597 static int
5598 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5599 {
5600         struct em_int_delay_info *info;
5601         struct adapter *adapter;
5602         u32 regval;
5603         int error, usecs, ticks;
5604
5605         info = (struct em_int_delay_info *)arg1;
5606         usecs = info->value;
5607         error = sysctl_handle_int(oidp, &usecs, 0, req);
5608         if (error != 0 || req->newptr == NULL)
5609                 return (error);
5610         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5611                 return (EINVAL);
5612         info->value = usecs;
5613         ticks = EM_USECS_TO_TICKS(usecs);
5614
5615         adapter = info->adapter;
5616         
5617         EM_CORE_LOCK(adapter);
5618         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5619         regval = (regval & ~0xffff) | (ticks & 0xffff);
5620         /* Handle a few special cases. */
5621         switch (info->offset) {
5622         case E1000_RDTR:
5623                 break;
5624         case E1000_TIDV:
5625                 if (ticks == 0) {
5626                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5627                         /* Don't write 0 into the TIDV register. */
5628                         regval++;
5629                 } else
5630                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5631                 break;
5632         }
5633         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5634         EM_CORE_UNLOCK(adapter);
5635         return (0);
5636 }
5637
5638 static void
5639 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5640         const char *description, struct em_int_delay_info *info,
5641         int offset, int value)
5642 {
5643         info->adapter = adapter;
5644         info->offset = offset;
5645         info->value = value;
5646         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5647             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5648             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5649             info, 0, em_sysctl_int_delay, "I", description);
5650 }
5651
5652 static void
5653 em_set_sysctl_value(struct adapter *adapter, const char *name,
5654         const char *description, int *limit, int value)
5655 {
5656         *limit = value;
5657         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5658             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5659             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5660 }
5661
5662
5663 /*
5664 ** Set flow control using sysctl:
5665 ** Flow control values:
5666 **      0 - off
5667 **      1 - rx pause
5668 **      2 - tx pause
5669 **      3 - full
5670 */
5671 static int
5672 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5673 {       
5674         int             error;
5675         static int      input = 3; /* default is full */
5676         struct adapter  *adapter = (struct adapter *) arg1;
5677                     
5678         error = sysctl_handle_int(oidp, &input, 0, req);
5679     
5680         if ((error) || (req->newptr == NULL))
5681                 return (error);
5682                 
5683         if (input == adapter->fc) /* no change? */
5684                 return (error);
5685
5686         switch (input) {
5687                 case e1000_fc_rx_pause:
5688                 case e1000_fc_tx_pause:
5689                 case e1000_fc_full:
5690                 case e1000_fc_none:
5691                         adapter->hw.fc.requested_mode = input;
5692                         adapter->fc = input;
5693                         break;
5694                 default:
5695                         /* Do nothing */
5696                         return (error);
5697         }
5698
5699         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5700         e1000_force_mac_fc(&adapter->hw);
5701         return (error);
5702 }
5703
5704 /*
5705 ** Manage Energy Efficient Ethernet:
5706 ** Control values:
5707 **     0/1 - enabled/disabled
5708 */
5709 static int
5710 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5711 {
5712        struct adapter *adapter = (struct adapter *) arg1;
5713        int             error, value;
5714
5715        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5716        error = sysctl_handle_int(oidp, &value, 0, req);
5717        if (error || req->newptr == NULL)
5718                return (error);
5719        EM_CORE_LOCK(adapter);
5720        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5721        em_init_locked(adapter);
5722        EM_CORE_UNLOCK(adapter);
5723        return (0);
5724 }
5725
5726 static int
5727 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5728 {
5729         struct adapter *adapter;
5730         int error;
5731         int result;
5732
5733         result = -1;
5734         error = sysctl_handle_int(oidp, &result, 0, req);
5735
5736         if (error || !req->newptr)
5737                 return (error);
5738
5739         if (result == 1) {
5740                 adapter = (struct adapter *)arg1;
5741                 em_print_debug_info(adapter);
5742         }
5743
5744         return (error);
5745 }
5746
5747 /*
5748 ** This routine is meant to be fluid, add whatever is
5749 ** needed for debugging a problem.  -jfv
5750 */
5751 static void
5752 em_print_debug_info(struct adapter *adapter)
5753 {
5754         device_t dev = adapter->dev;
5755         struct tx_ring *txr = adapter->tx_rings;
5756         struct rx_ring *rxr = adapter->rx_rings;
5757
5758         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5759                 printf("Interface is RUNNING ");
5760         else
5761                 printf("Interface is NOT RUNNING\n");
5762
5763         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5764                 printf("and INACTIVE\n");
5765         else
5766                 printf("and ACTIVE\n");
5767
5768         device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5769             E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5770             E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5771         device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5772             E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5773             E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5774         device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5775         device_printf(dev, "TX descriptors avail = %d\n",
5776             txr->tx_avail);
5777         device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5778             txr->no_desc_avail);
5779         device_printf(dev, "RX discarded packets = %ld\n",
5780             rxr->rx_discarded);
5781         device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5782         device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5783 }