]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/e1000/if_em.c
Now that device disabling is generic, remove extraneous code from the
[FreeBSD/FreeBSD.git] / sys / dev / e1000 / if_em.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2011, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #endif
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #if __FreeBSD_version >= 800000
44 #include <sys/buf_ring.h>
45 #endif
46 #include <sys/bus.h>
47 #include <sys/endian.h>
48 #include <sys/kernel.h>
49 #include <sys/kthread.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/rman.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/taskqueue.h>
58 #include <sys/eventhandler.h>
59 #include <machine/bus.h>
60 #include <machine/resource.h>
61
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_arp.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68
69 #include <net/if_types.h>
70 #include <net/if_vlan_var.h>
71
72 #include <netinet/in_systm.h>
73 #include <netinet/in.h>
74 #include <netinet/if_ether.h>
75 #include <netinet/ip.h>
76 #include <netinet/ip6.h>
77 #include <netinet/tcp.h>
78 #include <netinet/udp.h>
79
80 #include <machine/in_cksum.h>
81 #include <dev/led/led.h>
82 #include <dev/pci/pcivar.h>
83 #include <dev/pci/pcireg.h>
84
85 #include "e1000_api.h"
86 #include "e1000_82571.h"
87 #include "if_em.h"
88
89 /*********************************************************************
90  *  Set this to one to display debug statistics
91  *********************************************************************/
92 int     em_display_debug_stats = 0;
93
94 /*********************************************************************
95  *  Driver version:
96  *********************************************************************/
97 char em_driver_version[] = "7.3.2";
98
99 /*********************************************************************
100  *  PCI Device ID Table
101  *
102  *  Used by probe to select devices to load on
103  *  Last field stores an index into e1000_strings
104  *  Last entry must be all 0s
105  *
106  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107  *********************************************************************/
108
109 static em_vendor_info_t em_vendor_info_array[] =
110 {
111         /* Intel(R) PRO/1000 Network Connection */
112         { 0x8086, E1000_DEV_ID_82571EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
113         { 0x8086, E1000_DEV_ID_82571EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
114         { 0x8086, E1000_DEV_ID_82571EB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
115         { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
117         { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
119         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82572EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82572EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82572EI_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82572EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
131
132         { 0x8086, E1000_DEV_ID_82573E,          PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82573E_IAMT,     PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82573L,          PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82583V,          PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_ICH8_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_ICH8_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_ICH8_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_ICH8_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_ICH8_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_ICH8_82567V_3,   PCI_ANY_ID, PCI_ANY_ID, 0},
152         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,  PCI_ANY_ID, PCI_ANY_ID, 0},
153         { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,    PCI_ANY_ID, PCI_ANY_ID, 0},
154         { 0x8086, E1000_DEV_ID_ICH9_IGP_C,      PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_ICH9_IGP_M,      PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
157         { 0x8086, E1000_DEV_ID_ICH9_IFE,        PCI_ANY_ID, PCI_ANY_ID, 0},
158         { 0x8086, E1000_DEV_ID_ICH9_IFE_GT,     PCI_ANY_ID, PCI_ANY_ID, 0},
159         { 0x8086, E1000_DEV_ID_ICH9_IFE_G,      PCI_ANY_ID, PCI_ANY_ID, 0},
160         { 0x8086, E1000_DEV_ID_ICH9_BM,         PCI_ANY_ID, PCI_ANY_ID, 0},
161         { 0x8086, E1000_DEV_ID_82574L,          PCI_ANY_ID, PCI_ANY_ID, 0},
162         { 0x8086, E1000_DEV_ID_82574LA,         PCI_ANY_ID, PCI_ANY_ID, 0},
163         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
164         { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
165         { 0x8086, E1000_DEV_ID_ICH10_R_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
166         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,   PCI_ANY_ID, PCI_ANY_ID, 0},
167         { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,   PCI_ANY_ID, PCI_ANY_ID, 0},
168         { 0x8086, E1000_DEV_ID_ICH10_D_BM_V,    PCI_ANY_ID, PCI_ANY_ID, 0},
169         { 0x8086, E1000_DEV_ID_PCH_M_HV_LM,     PCI_ANY_ID, PCI_ANY_ID, 0},
170         { 0x8086, E1000_DEV_ID_PCH_M_HV_LC,     PCI_ANY_ID, PCI_ANY_ID, 0},
171         { 0x8086, E1000_DEV_ID_PCH_D_HV_DM,     PCI_ANY_ID, PCI_ANY_ID, 0},
172         { 0x8086, E1000_DEV_ID_PCH_D_HV_DC,     PCI_ANY_ID, PCI_ANY_ID, 0},
173         { 0x8086, E1000_DEV_ID_PCH2_LV_LM,      PCI_ANY_ID, PCI_ANY_ID, 0},
174         { 0x8086, E1000_DEV_ID_PCH2_LV_V,       PCI_ANY_ID, PCI_ANY_ID, 0},
175         /* required last entry */
176         { 0, 0, 0, 0, 0}
177 };
178
179 /*********************************************************************
180  *  Table of branding strings for all supported NICs.
181  *********************************************************************/
182
183 static char *em_strings[] = {
184         "Intel(R) PRO/1000 Network Connection"
185 };
186
187 /*********************************************************************
188  *  Function prototypes
189  *********************************************************************/
190 static int      em_probe(device_t);
191 static int      em_attach(device_t);
192 static int      em_detach(device_t);
193 static int      em_shutdown(device_t);
194 static int      em_suspend(device_t);
195 static int      em_resume(device_t);
196 #ifdef EM_MULTIQUEUE
197 static int      em_mq_start(struct ifnet *, struct mbuf *);
198 static int      em_mq_start_locked(struct ifnet *,
199                     struct tx_ring *, struct mbuf *);
200 static void     em_qflush(struct ifnet *);
201 #else
202 static void     em_start(struct ifnet *);
203 static void     em_start_locked(struct ifnet *, struct tx_ring *);
204 #endif
205 static int      em_ioctl(struct ifnet *, u_long, caddr_t);
206 static void     em_init(void *);
207 static void     em_init_locked(struct adapter *);
208 static void     em_stop(void *);
209 static void     em_media_status(struct ifnet *, struct ifmediareq *);
210 static int      em_media_change(struct ifnet *);
211 static void     em_identify_hardware(struct adapter *);
212 static int      em_allocate_pci_resources(struct adapter *);
213 static int      em_allocate_legacy(struct adapter *);
214 static int      em_allocate_msix(struct adapter *);
215 static int      em_allocate_queues(struct adapter *);
216 static int      em_setup_msix(struct adapter *);
217 static void     em_free_pci_resources(struct adapter *);
218 static void     em_local_timer(void *);
219 static void     em_reset(struct adapter *);
220 static int      em_setup_interface(device_t, struct adapter *);
221
222 static void     em_setup_transmit_structures(struct adapter *);
223 static void     em_initialize_transmit_unit(struct adapter *);
224 static int      em_allocate_transmit_buffers(struct tx_ring *);
225 static void     em_free_transmit_structures(struct adapter *);
226 static void     em_free_transmit_buffers(struct tx_ring *);
227
228 static int      em_setup_receive_structures(struct adapter *);
229 static int      em_allocate_receive_buffers(struct rx_ring *);
230 static void     em_initialize_receive_unit(struct adapter *);
231 static void     em_free_receive_structures(struct adapter *);
232 static void     em_free_receive_buffers(struct rx_ring *);
233
234 static void     em_enable_intr(struct adapter *);
235 static void     em_disable_intr(struct adapter *);
236 static void     em_update_stats_counters(struct adapter *);
237 static void     em_add_hw_stats(struct adapter *adapter);
238 static void     em_txeof(struct tx_ring *);
239 static bool     em_rxeof(struct rx_ring *, int, int *);
240 #ifndef __NO_STRICT_ALIGNMENT
241 static int      em_fixup_rx(struct rx_ring *);
242 #endif
243 static void     em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
244 static void     em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
245                     struct ip *, u32 *, u32 *);
246 static void     em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
247                     struct tcphdr *, u32 *, u32 *);
248 static void     em_set_promisc(struct adapter *);
249 static void     em_disable_promisc(struct adapter *);
250 static void     em_set_multi(struct adapter *);
251 static void     em_update_link_status(struct adapter *);
252 static void     em_refresh_mbufs(struct rx_ring *, int);
253 static void     em_register_vlan(void *, struct ifnet *, u16);
254 static void     em_unregister_vlan(void *, struct ifnet *, u16);
255 static void     em_setup_vlan_hw_support(struct adapter *);
256 static int      em_xmit(struct tx_ring *, struct mbuf **);
257 static int      em_dma_malloc(struct adapter *, bus_size_t,
258                     struct em_dma_alloc *, int);
259 static void     em_dma_free(struct adapter *, struct em_dma_alloc *);
260 static int      em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
261 static void     em_print_nvm_info(struct adapter *);
262 static int      em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
263 static void     em_print_debug_info(struct adapter *);
264 static int      em_is_valid_ether_addr(u8 *);
265 static int      em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
266 static void     em_add_int_delay_sysctl(struct adapter *, const char *,
267                     const char *, struct em_int_delay_info *, int, int);
268 /* Management and WOL Support */
269 static void     em_init_manageability(struct adapter *);
270 static void     em_release_manageability(struct adapter *);
271 static void     em_get_hw_control(struct adapter *);
272 static void     em_release_hw_control(struct adapter *);
273 static void     em_get_wakeup(device_t);
274 static void     em_enable_wakeup(device_t);
275 static int      em_enable_phy_wakeup(struct adapter *);
276 static void     em_led_func(void *, int);
277 static void     em_disable_aspm(struct adapter *);
278
279 static int      em_irq_fast(void *);
280
281 /* MSIX handlers */
282 static void     em_msix_tx(void *);
283 static void     em_msix_rx(void *);
284 static void     em_msix_link(void *);
285 static void     em_handle_tx(void *context, int pending);
286 static void     em_handle_rx(void *context, int pending);
287 static void     em_handle_link(void *context, int pending);
288
289 static void     em_set_sysctl_value(struct adapter *, const char *,
290                     const char *, int *, int);
291 static int      em_set_flowcntl(SYSCTL_HANDLER_ARGS);
292 static int      em_sysctl_eee(SYSCTL_HANDLER_ARGS);
293
294 static __inline void em_rx_discard(struct rx_ring *, int);
295
296 #ifdef DEVICE_POLLING
297 static poll_handler_t em_poll;
298 #endif /* POLLING */
299
300 /*********************************************************************
301  *  FreeBSD Device Interface Entry Points
302  *********************************************************************/
303
304 static device_method_t em_methods[] = {
305         /* Device interface */
306         DEVMETHOD(device_probe, em_probe),
307         DEVMETHOD(device_attach, em_attach),
308         DEVMETHOD(device_detach, em_detach),
309         DEVMETHOD(device_shutdown, em_shutdown),
310         DEVMETHOD(device_suspend, em_suspend),
311         DEVMETHOD(device_resume, em_resume),
312         {0, 0}
313 };
314
315 static driver_t em_driver = {
316         "em", em_methods, sizeof(struct adapter),
317 };
318
319 devclass_t em_devclass;
320 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
321 MODULE_DEPEND(em, pci, 1, 1, 1);
322 MODULE_DEPEND(em, ether, 1, 1, 1);
323
324 /*********************************************************************
325  *  Tunable default values.
326  *********************************************************************/
327
328 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
329 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
330 #define M_TSO_LEN                       66
331
332 /* Allow common code without TSO */
333 #ifndef CSUM_TSO
334 #define CSUM_TSO        0
335 #endif
336
337 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
338
339 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
340 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
341 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
342 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
343 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
344     0, "Default transmit interrupt delay in usecs");
345 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
346     0, "Default receive interrupt delay in usecs");
347
348 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
349 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
350 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
351 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
352 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
353     &em_tx_abs_int_delay_dflt, 0,
354     "Default transmit interrupt delay limit in usecs");
355 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
356     &em_rx_abs_int_delay_dflt, 0,
357     "Default receive interrupt delay limit in usecs");
358
359 static int em_rxd = EM_DEFAULT_RXD;
360 static int em_txd = EM_DEFAULT_TXD;
361 TUNABLE_INT("hw.em.rxd", &em_rxd);
362 TUNABLE_INT("hw.em.txd", &em_txd);
363 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
364     "Number of receive descriptors per queue");
365 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
366     "Number of transmit descriptors per queue");
367
368 static int em_smart_pwr_down = FALSE;
369 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
370 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
371     0, "Set to true to leave smart power down enabled on newer adapters");
372
373 /* Controls whether promiscuous also shows bad packets */
374 static int em_debug_sbp = FALSE;
375 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
376 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
377     "Show bad packets in promiscuous mode");
378
379 static int em_enable_msix = TRUE;
380 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
381 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
382     "Enable MSI-X interrupts");
383
384 /* How many packets rxeof tries to clean at a time */
385 static int em_rx_process_limit = 100;
386 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
387 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
388     &em_rx_process_limit, 0,
389     "Maximum number of received packets to process "
390     "at a time, -1 means unlimited");
391
392 /* Energy efficient ethernet - default to OFF */
393 static int eee_setting = 1;
394 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
395 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
396     "Enable Energy Efficient Ethernet");
397
398 /* Global used in WOL setup with multiport cards */
399 static int global_quad_port_a = 0;
400
401 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
402 #include <dev/netmap/if_em_netmap.h>
403 #endif /* DEV_NETMAP */
404
405 /*********************************************************************
406  *  Device identification routine
407  *
408  *  em_probe determines if the driver should be loaded on
409  *  adapter based on PCI vendor/device id of the adapter.
410  *
411  *  return BUS_PROBE_DEFAULT on success, positive on failure
412  *********************************************************************/
413
414 static int
415 em_probe(device_t dev)
416 {
417         char            adapter_name[60];
418         u16             pci_vendor_id = 0;
419         u16             pci_device_id = 0;
420         u16             pci_subvendor_id = 0;
421         u16             pci_subdevice_id = 0;
422         em_vendor_info_t *ent;
423
424         INIT_DEBUGOUT("em_probe: begin");
425
426         pci_vendor_id = pci_get_vendor(dev);
427         if (pci_vendor_id != EM_VENDOR_ID)
428                 return (ENXIO);
429
430         pci_device_id = pci_get_device(dev);
431         pci_subvendor_id = pci_get_subvendor(dev);
432         pci_subdevice_id = pci_get_subdevice(dev);
433
434         ent = em_vendor_info_array;
435         while (ent->vendor_id != 0) {
436                 if ((pci_vendor_id == ent->vendor_id) &&
437                     (pci_device_id == ent->device_id) &&
438
439                     ((pci_subvendor_id == ent->subvendor_id) ||
440                     (ent->subvendor_id == PCI_ANY_ID)) &&
441
442                     ((pci_subdevice_id == ent->subdevice_id) ||
443                     (ent->subdevice_id == PCI_ANY_ID))) {
444                         sprintf(adapter_name, "%s %s",
445                                 em_strings[ent->index],
446                                 em_driver_version);
447                         device_set_desc_copy(dev, adapter_name);
448                         return (BUS_PROBE_DEFAULT);
449                 }
450                 ent++;
451         }
452
453         return (ENXIO);
454 }
455
456 /*********************************************************************
457  *  Device initialization routine
458  *
459  *  The attach entry point is called when the driver is being loaded.
460  *  This routine identifies the type of hardware, allocates all resources
461  *  and initializes the hardware.
462  *
463  *  return 0 on success, positive on failure
464  *********************************************************************/
465
466 static int
467 em_attach(device_t dev)
468 {
469         struct adapter  *adapter;
470         struct e1000_hw *hw;
471         int             error = 0;
472
473         INIT_DEBUGOUT("em_attach: begin");
474
475         adapter = device_get_softc(dev);
476         adapter->dev = adapter->osdep.dev = dev;
477         hw = &adapter->hw;
478         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
479
480         /* SYSCTL stuff */
481         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
482             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
483             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
484             em_sysctl_nvm_info, "I", "NVM Information");
485
486         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
487             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
488             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
489             em_sysctl_debug_info, "I", "Debug Information");
490
491         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
492             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
493             OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
494             em_set_flowcntl, "I", "Flow Control");
495
496         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
497
498         /* Determine hardware and mac info */
499         em_identify_hardware(adapter);
500
501         /* Setup PCI resources */
502         if (em_allocate_pci_resources(adapter)) {
503                 device_printf(dev, "Allocation of PCI resources failed\n");
504                 error = ENXIO;
505                 goto err_pci;
506         }
507
508         /*
509         ** For ICH8 and family we need to
510         ** map the flash memory, and this
511         ** must happen after the MAC is 
512         ** identified
513         */
514         if ((hw->mac.type == e1000_ich8lan) ||
515             (hw->mac.type == e1000_ich9lan) ||
516             (hw->mac.type == e1000_ich10lan) ||
517             (hw->mac.type == e1000_pchlan) ||
518             (hw->mac.type == e1000_pch2lan)) {
519                 int rid = EM_BAR_TYPE_FLASH;
520                 adapter->flash = bus_alloc_resource_any(dev,
521                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
522                 if (adapter->flash == NULL) {
523                         device_printf(dev, "Mapping of Flash failed\n");
524                         error = ENXIO;
525                         goto err_pci;
526                 }
527                 /* This is used in the shared code */
528                 hw->flash_address = (u8 *)adapter->flash;
529                 adapter->osdep.flash_bus_space_tag =
530                     rman_get_bustag(adapter->flash);
531                 adapter->osdep.flash_bus_space_handle =
532                     rman_get_bushandle(adapter->flash);
533         }
534
535         /* Do Shared Code initialization */
536         if (e1000_setup_init_funcs(hw, TRUE)) {
537                 device_printf(dev, "Setup of Shared code failed\n");
538                 error = ENXIO;
539                 goto err_pci;
540         }
541
542         e1000_get_bus_info(hw);
543
544         /* Set up some sysctls for the tunable interrupt delays */
545         em_add_int_delay_sysctl(adapter, "rx_int_delay",
546             "receive interrupt delay in usecs", &adapter->rx_int_delay,
547             E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
548         em_add_int_delay_sysctl(adapter, "tx_int_delay",
549             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
550             E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
551         em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
552             "receive interrupt delay limit in usecs",
553             &adapter->rx_abs_int_delay,
554             E1000_REGISTER(hw, E1000_RADV),
555             em_rx_abs_int_delay_dflt);
556         em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
557             "transmit interrupt delay limit in usecs",
558             &adapter->tx_abs_int_delay,
559             E1000_REGISTER(hw, E1000_TADV),
560             em_tx_abs_int_delay_dflt);
561
562         /* Sysctl for limiting the amount of work done in the taskqueue */
563         em_set_sysctl_value(adapter, "rx_processing_limit",
564             "max number of rx packets to process", &adapter->rx_process_limit,
565             em_rx_process_limit);
566
567         /*
568          * Validate number of transmit and receive descriptors. It
569          * must not exceed hardware maximum, and must be multiple
570          * of E1000_DBA_ALIGN.
571          */
572         if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
573             (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
574                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
575                     EM_DEFAULT_TXD, em_txd);
576                 adapter->num_tx_desc = EM_DEFAULT_TXD;
577         } else
578                 adapter->num_tx_desc = em_txd;
579
580         if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
581             (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
582                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
583                     EM_DEFAULT_RXD, em_rxd);
584                 adapter->num_rx_desc = EM_DEFAULT_RXD;
585         } else
586                 adapter->num_rx_desc = em_rxd;
587
588         hw->mac.autoneg = DO_AUTO_NEG;
589         hw->phy.autoneg_wait_to_complete = FALSE;
590         hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
591
592         /* Copper options */
593         if (hw->phy.media_type == e1000_media_type_copper) {
594                 hw->phy.mdix = AUTO_ALL_MODES;
595                 hw->phy.disable_polarity_correction = FALSE;
596                 hw->phy.ms_type = EM_MASTER_SLAVE;
597         }
598
599         /*
600          * Set the frame limits assuming
601          * standard ethernet sized frames.
602          */
603         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
604         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
605
606         /*
607          * This controls when hardware reports transmit completion
608          * status.
609          */
610         hw->mac.report_tx_early = 1;
611
612         /* 
613         ** Get queue/ring memory
614         */
615         if (em_allocate_queues(adapter)) {
616                 error = ENOMEM;
617                 goto err_pci;
618         }
619
620         /* Allocate multicast array memory. */
621         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
622             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
623         if (adapter->mta == NULL) {
624                 device_printf(dev, "Can not allocate multicast setup array\n");
625                 error = ENOMEM;
626                 goto err_late;
627         }
628
629         /* Check SOL/IDER usage */
630         if (e1000_check_reset_block(hw))
631                 device_printf(dev, "PHY reset is blocked"
632                     " due to SOL/IDER session.\n");
633
634         /* Sysctl for setting Energy Efficient Ethernet */
635         hw->dev_spec.ich8lan.eee_disable = eee_setting;
636         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
637             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
638             OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
639             adapter, 0, em_sysctl_eee, "I",
640             "Disable Energy Efficient Ethernet");
641
642         /*
643         ** Start from a known state, this is
644         ** important in reading the nvm and
645         ** mac from that.
646         */
647         e1000_reset_hw(hw);
648
649
650         /* Make sure we have a good EEPROM before we read from it */
651         if (e1000_validate_nvm_checksum(hw) < 0) {
652                 /*
653                 ** Some PCI-E parts fail the first check due to
654                 ** the link being in sleep state, call it again,
655                 ** if it fails a second time its a real issue.
656                 */
657                 if (e1000_validate_nvm_checksum(hw) < 0) {
658                         device_printf(dev,
659                             "The EEPROM Checksum Is Not Valid\n");
660                         error = EIO;
661                         goto err_late;
662                 }
663         }
664
665         /* Copy the permanent MAC address out of the EEPROM */
666         if (e1000_read_mac_addr(hw) < 0) {
667                 device_printf(dev, "EEPROM read error while reading MAC"
668                     " address\n");
669                 error = EIO;
670                 goto err_late;
671         }
672
673         if (!em_is_valid_ether_addr(hw->mac.addr)) {
674                 device_printf(dev, "Invalid MAC address\n");
675                 error = EIO;
676                 goto err_late;
677         }
678
679         /*
680         **  Do interrupt configuration
681         */
682         if (adapter->msix > 1) /* Do MSIX */
683                 error = em_allocate_msix(adapter);
684         else  /* MSI or Legacy */
685                 error = em_allocate_legacy(adapter);
686         if (error)
687                 goto err_late;
688
689         /*
690          * Get Wake-on-Lan and Management info for later use
691          */
692         em_get_wakeup(dev);
693
694         /* Setup OS specific network interface */
695         if (em_setup_interface(dev, adapter) != 0)
696                 goto err_late;
697
698         em_reset(adapter);
699
700         /* Initialize statistics */
701         em_update_stats_counters(adapter);
702
703         hw->mac.get_link_status = 1;
704         em_update_link_status(adapter);
705
706         /* Register for VLAN events */
707         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
708             em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
709         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
710             em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
711
712         em_add_hw_stats(adapter);
713
714         /* Non-AMT based hardware can now take control from firmware */
715         if (adapter->has_manage && !adapter->has_amt)
716                 em_get_hw_control(adapter);
717
718         /* Tell the stack that the interface is not active */
719         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
720         adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
721
722         adapter->led_dev = led_create(em_led_func, adapter,
723             device_get_nameunit(dev));
724 #ifdef DEV_NETMAP
725         em_netmap_attach(adapter);
726 #endif /* DEV_NETMAP */
727
728         INIT_DEBUGOUT("em_attach: end");
729
730         return (0);
731
732 err_late:
733         em_free_transmit_structures(adapter);
734         em_free_receive_structures(adapter);
735         em_release_hw_control(adapter);
736         if (adapter->ifp != NULL)
737                 if_free(adapter->ifp);
738 err_pci:
739         em_free_pci_resources(adapter);
740         free(adapter->mta, M_DEVBUF);
741         EM_CORE_LOCK_DESTROY(adapter);
742
743         return (error);
744 }
745
746 /*********************************************************************
747  *  Device removal routine
748  *
749  *  The detach entry point is called when the driver is being removed.
750  *  This routine stops the adapter and deallocates all the resources
751  *  that were allocated for driver operation.
752  *
753  *  return 0 on success, positive on failure
754  *********************************************************************/
755
756 static int
757 em_detach(device_t dev)
758 {
759         struct adapter  *adapter = device_get_softc(dev);
760         struct ifnet    *ifp = adapter->ifp;
761
762         INIT_DEBUGOUT("em_detach: begin");
763
764         /* Make sure VLANS are not using driver */
765         if (adapter->ifp->if_vlantrunk != NULL) {
766                 device_printf(dev,"Vlan in use, detach first\n");
767                 return (EBUSY);
768         }
769
770 #ifdef DEVICE_POLLING
771         if (ifp->if_capenable & IFCAP_POLLING)
772                 ether_poll_deregister(ifp);
773 #endif
774
775         if (adapter->led_dev != NULL)
776                 led_destroy(adapter->led_dev);
777
778         EM_CORE_LOCK(adapter);
779         adapter->in_detach = 1;
780         em_stop(adapter);
781         EM_CORE_UNLOCK(adapter);
782         EM_CORE_LOCK_DESTROY(adapter);
783
784         e1000_phy_hw_reset(&adapter->hw);
785
786         em_release_manageability(adapter);
787         em_release_hw_control(adapter);
788
789         /* Unregister VLAN events */
790         if (adapter->vlan_attach != NULL)
791                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
792         if (adapter->vlan_detach != NULL)
793                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
794
795         ether_ifdetach(adapter->ifp);
796         callout_drain(&adapter->timer);
797
798 #ifdef DEV_NETMAP
799         netmap_detach(ifp);
800 #endif /* DEV_NETMAP */
801
802         em_free_pci_resources(adapter);
803         bus_generic_detach(dev);
804         if_free(ifp);
805
806         em_free_transmit_structures(adapter);
807         em_free_receive_structures(adapter);
808
809         em_release_hw_control(adapter);
810         free(adapter->mta, M_DEVBUF);
811
812         return (0);
813 }
814
815 /*********************************************************************
816  *
817  *  Shutdown entry point
818  *
819  **********************************************************************/
820
821 static int
822 em_shutdown(device_t dev)
823 {
824         return em_suspend(dev);
825 }
826
827 /*
828  * Suspend/resume device methods.
829  */
830 static int
831 em_suspend(device_t dev)
832 {
833         struct adapter *adapter = device_get_softc(dev);
834
835         EM_CORE_LOCK(adapter);
836
837         em_release_manageability(adapter);
838         em_release_hw_control(adapter);
839         em_enable_wakeup(dev);
840
841         EM_CORE_UNLOCK(adapter);
842
843         return bus_generic_suspend(dev);
844 }
845
846 static int
847 em_resume(device_t dev)
848 {
849         struct adapter *adapter = device_get_softc(dev);
850         struct tx_ring  *txr = adapter->tx_rings;
851         struct ifnet *ifp = adapter->ifp;
852
853         EM_CORE_LOCK(adapter);
854         if (adapter->hw.mac.type == e1000_pch2lan)
855                 e1000_resume_workarounds_pchlan(&adapter->hw);
856         em_init_locked(adapter);
857         em_init_manageability(adapter);
858
859         if ((ifp->if_flags & IFF_UP) &&
860             (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
861                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
862                         EM_TX_LOCK(txr);
863 #ifdef EM_MULTIQUEUE
864                         if (!drbr_empty(ifp, txr->br))
865                                 em_mq_start_locked(ifp, txr, NULL);
866 #else
867                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
868                                 em_start_locked(ifp, txr);
869 #endif
870                         EM_TX_UNLOCK(txr);
871                 }
872         }
873         EM_CORE_UNLOCK(adapter);
874
875         return bus_generic_resume(dev);
876 }
877
878
879 #ifdef EM_MULTIQUEUE
880 /*********************************************************************
881  *  Multiqueue Transmit routines 
882  *
883  *  em_mq_start is called by the stack to initiate a transmit.
884  *  however, if busy the driver can queue the request rather
885  *  than do an immediate send. It is this that is an advantage
886  *  in this driver, rather than also having multiple tx queues.
887  **********************************************************************/
888 static int
889 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
890 {
891         struct adapter  *adapter = txr->adapter;
892         struct mbuf     *next;
893         int             err = 0, enq = 0;
894
895         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
896             IFF_DRV_RUNNING || adapter->link_active == 0) {
897                 if (m != NULL)
898                         err = drbr_enqueue(ifp, txr->br, m);
899                 return (err);
900         }
901
902         enq = 0;
903         if (m == NULL) {
904                 next = drbr_dequeue(ifp, txr->br);
905         } else if (drbr_needs_enqueue(ifp, txr->br)) {
906                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
907                         return (err);
908                 next = drbr_dequeue(ifp, txr->br);
909         } else
910                 next = m;
911
912         /* Process the queue */
913         while (next != NULL) {
914                 if ((err = em_xmit(txr, &next)) != 0) {
915                         if (next != NULL)
916                                 err = drbr_enqueue(ifp, txr->br, next);
917                         break;
918                 }
919                 enq++;
920                 ifp->if_obytes += next->m_pkthdr.len;
921                 if (next->m_flags & M_MCAST)
922                         ifp->if_omcasts++;
923                 ETHER_BPF_MTAP(ifp, next);
924                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
925                         break;
926                 next = drbr_dequeue(ifp, txr->br);
927         }
928
929         if (enq > 0) {
930                 /* Set the watchdog */
931                 txr->queue_status = EM_QUEUE_WORKING;
932                 txr->watchdog_time = ticks;
933         }
934
935         if (txr->tx_avail < EM_MAX_SCATTER)
936                 em_txeof(txr);
937         if (txr->tx_avail < EM_MAX_SCATTER)
938                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
939         return (err);
940 }
941
942 /*
943 ** Multiqueue capable stack interface
944 */
945 static int
946 em_mq_start(struct ifnet *ifp, struct mbuf *m)
947 {
948         struct adapter  *adapter = ifp->if_softc;
949         struct tx_ring  *txr = adapter->tx_rings;
950         int             error;
951
952         if (EM_TX_TRYLOCK(txr)) {
953                 error = em_mq_start_locked(ifp, txr, m);
954                 EM_TX_UNLOCK(txr);
955         } else 
956                 error = drbr_enqueue(ifp, txr->br, m);
957
958         return (error);
959 }
960
961 /*
962 ** Flush all ring buffers
963 */
964 static void
965 em_qflush(struct ifnet *ifp)
966 {
967         struct adapter  *adapter = ifp->if_softc;
968         struct tx_ring  *txr = adapter->tx_rings;
969         struct mbuf     *m;
970
971         for (int i = 0; i < adapter->num_queues; i++, txr++) {
972                 EM_TX_LOCK(txr);
973                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
974                         m_freem(m);
975                 EM_TX_UNLOCK(txr);
976         }
977         if_qflush(ifp);
978 }
979 #else  /* !EM_MULTIQUEUE */
980
981 static void
982 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
983 {
984         struct adapter  *adapter = ifp->if_softc;
985         struct mbuf     *m_head;
986
987         EM_TX_LOCK_ASSERT(txr);
988
989         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
990             IFF_DRV_RUNNING)
991                 return;
992
993         if (!adapter->link_active)
994                 return;
995
996         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
997                 /* Call cleanup if number of TX descriptors low */
998                 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
999                         em_txeof(txr);
1000                 if (txr->tx_avail < EM_MAX_SCATTER) {
1001                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1002                         break;
1003                 }
1004                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1005                 if (m_head == NULL)
1006                         break;
1007                 /*
1008                  *  Encapsulation can modify our pointer, and or make it
1009                  *  NULL on failure.  In that event, we can't requeue.
1010                  */
1011                 if (em_xmit(txr, &m_head)) {
1012                         if (m_head == NULL)
1013                                 break;
1014                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1015                         break;
1016                 }
1017
1018                 /* Send a copy of the frame to the BPF listener */
1019                 ETHER_BPF_MTAP(ifp, m_head);
1020
1021                 /* Set timeout in case hardware has problems transmitting. */
1022                 txr->watchdog_time = ticks;
1023                 txr->queue_status = EM_QUEUE_WORKING;
1024         }
1025
1026         return;
1027 }
1028
1029 static void
1030 em_start(struct ifnet *ifp)
1031 {
1032         struct adapter  *adapter = ifp->if_softc;
1033         struct tx_ring  *txr = adapter->tx_rings;
1034
1035         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1036                 EM_TX_LOCK(txr);
1037                 em_start_locked(ifp, txr);
1038                 EM_TX_UNLOCK(txr);
1039         }
1040         return;
1041 }
1042 #endif /* EM_MULTIQUEUE */
1043
1044 /*********************************************************************
1045  *  Ioctl entry point
1046  *
1047  *  em_ioctl is called when the user wants to configure the
1048  *  interface.
1049  *
1050  *  return 0 on success, positive on failure
1051  **********************************************************************/
1052
1053 static int
1054 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1055 {
1056         struct adapter  *adapter = ifp->if_softc;
1057         struct ifreq    *ifr = (struct ifreq *)data;
1058 #if defined(INET) || defined(INET6)
1059         struct ifaddr   *ifa = (struct ifaddr *)data;
1060 #endif
1061         bool            avoid_reset = FALSE;
1062         int             error = 0;
1063
1064         if (adapter->in_detach)
1065                 return (error);
1066
1067         switch (command) {
1068         case SIOCSIFADDR:
1069 #ifdef INET
1070                 if (ifa->ifa_addr->sa_family == AF_INET)
1071                         avoid_reset = TRUE;
1072 #endif
1073 #ifdef INET6
1074                 if (ifa->ifa_addr->sa_family == AF_INET6)
1075                         avoid_reset = TRUE;
1076 #endif
1077                 /*
1078                 ** Calling init results in link renegotiation,
1079                 ** so we avoid doing it when possible.
1080                 */
1081                 if (avoid_reset) {
1082                         ifp->if_flags |= IFF_UP;
1083                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1084                                 em_init(adapter);
1085 #ifdef INET
1086                         if (!(ifp->if_flags & IFF_NOARP))
1087                                 arp_ifinit(ifp, ifa);
1088 #endif
1089                 } else
1090                         error = ether_ioctl(ifp, command, data);
1091                 break;
1092         case SIOCSIFMTU:
1093             {
1094                 int max_frame_size;
1095
1096                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1097
1098                 EM_CORE_LOCK(adapter);
1099                 switch (adapter->hw.mac.type) {
1100                 case e1000_82571:
1101                 case e1000_82572:
1102                 case e1000_ich9lan:
1103                 case e1000_ich10lan:
1104                 case e1000_pch2lan:
1105                 case e1000_82574:
1106                 case e1000_82583:
1107                 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1108                         max_frame_size = 9234;
1109                         break;
1110                 case e1000_pchlan:
1111                         max_frame_size = 4096;
1112                         break;
1113                         /* Adapters that do not support jumbo frames */
1114                 case e1000_ich8lan:
1115                         max_frame_size = ETHER_MAX_LEN;
1116                         break;
1117                 default:
1118                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1119                 }
1120                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1121                     ETHER_CRC_LEN) {
1122                         EM_CORE_UNLOCK(adapter);
1123                         error = EINVAL;
1124                         break;
1125                 }
1126
1127                 ifp->if_mtu = ifr->ifr_mtu;
1128                 adapter->max_frame_size =
1129                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1130                 em_init_locked(adapter);
1131                 EM_CORE_UNLOCK(adapter);
1132                 break;
1133             }
1134         case SIOCSIFFLAGS:
1135                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1136                     SIOCSIFFLAGS (Set Interface Flags)");
1137                 EM_CORE_LOCK(adapter);
1138                 if (ifp->if_flags & IFF_UP) {
1139                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1140                                 if ((ifp->if_flags ^ adapter->if_flags) &
1141                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1142                                         em_disable_promisc(adapter);
1143                                         em_set_promisc(adapter);
1144                                 }
1145                         } else
1146                                 em_init_locked(adapter);
1147                 } else
1148                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1149                                 em_stop(adapter);
1150                 adapter->if_flags = ifp->if_flags;
1151                 EM_CORE_UNLOCK(adapter);
1152                 break;
1153         case SIOCADDMULTI:
1154         case SIOCDELMULTI:
1155                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1156                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1157                         EM_CORE_LOCK(adapter);
1158                         em_disable_intr(adapter);
1159                         em_set_multi(adapter);
1160 #ifdef DEVICE_POLLING
1161                         if (!(ifp->if_capenable & IFCAP_POLLING))
1162 #endif
1163                                 em_enable_intr(adapter);
1164                         EM_CORE_UNLOCK(adapter);
1165                 }
1166                 break;
1167         case SIOCSIFMEDIA:
1168                 /* Check SOL/IDER usage */
1169                 EM_CORE_LOCK(adapter);
1170                 if (e1000_check_reset_block(&adapter->hw)) {
1171                         EM_CORE_UNLOCK(adapter);
1172                         device_printf(adapter->dev, "Media change is"
1173                             " blocked due to SOL/IDER session.\n");
1174                         break;
1175                 }
1176                 EM_CORE_UNLOCK(adapter);
1177                 /* falls thru */
1178         case SIOCGIFMEDIA:
1179                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1180                     SIOCxIFMEDIA (Get/Set Interface Media)");
1181                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1182                 break;
1183         case SIOCSIFCAP:
1184             {
1185                 int mask, reinit;
1186
1187                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1188                 reinit = 0;
1189                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1190 #ifdef DEVICE_POLLING
1191                 if (mask & IFCAP_POLLING) {
1192                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1193                                 error = ether_poll_register(em_poll, ifp);
1194                                 if (error)
1195                                         return (error);
1196                                 EM_CORE_LOCK(adapter);
1197                                 em_disable_intr(adapter);
1198                                 ifp->if_capenable |= IFCAP_POLLING;
1199                                 EM_CORE_UNLOCK(adapter);
1200                         } else {
1201                                 error = ether_poll_deregister(ifp);
1202                                 /* Enable interrupt even in error case */
1203                                 EM_CORE_LOCK(adapter);
1204                                 em_enable_intr(adapter);
1205                                 ifp->if_capenable &= ~IFCAP_POLLING;
1206                                 EM_CORE_UNLOCK(adapter);
1207                         }
1208                 }
1209 #endif
1210                 if (mask & IFCAP_HWCSUM) {
1211                         ifp->if_capenable ^= IFCAP_HWCSUM;
1212                         reinit = 1;
1213                 }
1214                 if (mask & IFCAP_TSO4) {
1215                         ifp->if_capenable ^= IFCAP_TSO4;
1216                         reinit = 1;
1217                 }
1218                 if (mask & IFCAP_VLAN_HWTAGGING) {
1219                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1220                         reinit = 1;
1221                 }
1222                 if (mask & IFCAP_VLAN_HWFILTER) {
1223                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1224                         reinit = 1;
1225                 }
1226                 if (mask & IFCAP_VLAN_HWTSO) {
1227                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1228                         reinit = 1;
1229                 }
1230                 if ((mask & IFCAP_WOL) &&
1231                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1232                         if (mask & IFCAP_WOL_MCAST)
1233                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1234                         if (mask & IFCAP_WOL_MAGIC)
1235                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1236                 }
1237                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1238                         em_init(adapter);
1239                 VLAN_CAPABILITIES(ifp);
1240                 break;
1241             }
1242
1243         default:
1244                 error = ether_ioctl(ifp, command, data);
1245                 break;
1246         }
1247
1248         return (error);
1249 }
1250
1251
1252 /*********************************************************************
1253  *  Init entry point
1254  *
1255  *  This routine is used in two ways. It is used by the stack as
1256  *  init entry point in network interface structure. It is also used
1257  *  by the driver as a hw/sw initialization routine to get to a
1258  *  consistent state.
1259  *
1260  *  return 0 on success, positive on failure
1261  **********************************************************************/
1262
1263 static void
1264 em_init_locked(struct adapter *adapter)
1265 {
1266         struct ifnet    *ifp = adapter->ifp;
1267         device_t        dev = adapter->dev;
1268
1269         INIT_DEBUGOUT("em_init: begin");
1270
1271         EM_CORE_LOCK_ASSERT(adapter);
1272
1273         em_disable_intr(adapter);
1274         callout_stop(&adapter->timer);
1275
1276         /* Get the latest mac address, User can use a LAA */
1277         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1278               ETHER_ADDR_LEN);
1279
1280         /* Put the address into the Receive Address Array */
1281         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1282
1283         /*
1284          * With the 82571 adapter, RAR[0] may be overwritten
1285          * when the other port is reset, we make a duplicate
1286          * in RAR[14] for that eventuality, this assures
1287          * the interface continues to function.
1288          */
1289         if (adapter->hw.mac.type == e1000_82571) {
1290                 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1291                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1292                     E1000_RAR_ENTRIES - 1);
1293         }
1294
1295         /* Initialize the hardware */
1296         em_reset(adapter);
1297         em_update_link_status(adapter);
1298
1299         /* Setup VLAN support, basic and offload if available */
1300         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1301
1302         /* Set hardware offload abilities */
1303         ifp->if_hwassist = 0;
1304         if (ifp->if_capenable & IFCAP_TXCSUM)
1305                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1306         if (ifp->if_capenable & IFCAP_TSO4)
1307                 ifp->if_hwassist |= CSUM_TSO;
1308
1309         /* Configure for OS presence */
1310         em_init_manageability(adapter);
1311
1312         /* Prepare transmit descriptors and buffers */
1313         em_setup_transmit_structures(adapter);
1314         em_initialize_transmit_unit(adapter);
1315
1316         /* Setup Multicast table */
1317         em_set_multi(adapter);
1318
1319         /*
1320         ** Figure out the desired mbuf
1321         ** pool for doing jumbos
1322         */
1323         if (adapter->max_frame_size <= 2048)
1324                 adapter->rx_mbuf_sz = MCLBYTES;
1325         else if (adapter->max_frame_size <= 4096)
1326                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1327         else
1328                 adapter->rx_mbuf_sz = MJUM9BYTES;
1329
1330         /* Prepare receive descriptors and buffers */
1331         if (em_setup_receive_structures(adapter)) {
1332                 device_printf(dev, "Could not setup receive structures\n");
1333                 em_stop(adapter);
1334                 return;
1335         }
1336         em_initialize_receive_unit(adapter);
1337
1338         /* Use real VLAN Filter support? */
1339         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1340                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1341                         /* Use real VLAN Filter support */
1342                         em_setup_vlan_hw_support(adapter);
1343                 else {
1344                         u32 ctrl;
1345                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1346                         ctrl |= E1000_CTRL_VME;
1347                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1348                 }
1349         }
1350
1351         /* Don't lose promiscuous settings */
1352         em_set_promisc(adapter);
1353
1354         /* Set the interface as ACTIVE */
1355         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1356         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1357
1358         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1359         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1360
1361         /* MSI/X configuration for 82574 */
1362         if (adapter->hw.mac.type == e1000_82574) {
1363                 int tmp;
1364                 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1365                 tmp |= E1000_CTRL_EXT_PBA_CLR;
1366                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1367                 /* Set the IVAR - interrupt vector routing. */
1368                 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1369         }
1370
1371 #ifdef DEVICE_POLLING
1372         /*
1373          * Only enable interrupts if we are not polling, make sure
1374          * they are off otherwise.
1375          */
1376         if (ifp->if_capenable & IFCAP_POLLING)
1377                 em_disable_intr(adapter);
1378         else
1379 #endif /* DEVICE_POLLING */
1380                 em_enable_intr(adapter);
1381
1382         /* AMT based hardware can now take control from firmware */
1383         if (adapter->has_manage && adapter->has_amt)
1384                 em_get_hw_control(adapter);
1385 }
1386
1387 static void
1388 em_init(void *arg)
1389 {
1390         struct adapter *adapter = arg;
1391
1392         EM_CORE_LOCK(adapter);
1393         em_init_locked(adapter);
1394         EM_CORE_UNLOCK(adapter);
1395 }
1396
1397
1398 #ifdef DEVICE_POLLING
1399 /*********************************************************************
1400  *
1401  *  Legacy polling routine: note this only works with single queue
1402  *
1403  *********************************************************************/
1404 static int
1405 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1406 {
1407         struct adapter *adapter = ifp->if_softc;
1408         struct tx_ring  *txr = adapter->tx_rings;
1409         struct rx_ring  *rxr = adapter->rx_rings;
1410         u32             reg_icr;
1411         int             rx_done;
1412
1413         EM_CORE_LOCK(adapter);
1414         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1415                 EM_CORE_UNLOCK(adapter);
1416                 return (0);
1417         }
1418
1419         if (cmd == POLL_AND_CHECK_STATUS) {
1420                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1421                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1422                         callout_stop(&adapter->timer);
1423                         adapter->hw.mac.get_link_status = 1;
1424                         em_update_link_status(adapter);
1425                         callout_reset(&adapter->timer, hz,
1426                             em_local_timer, adapter);
1427                 }
1428         }
1429         EM_CORE_UNLOCK(adapter);
1430
1431         em_rxeof(rxr, count, &rx_done);
1432
1433         EM_TX_LOCK(txr);
1434         em_txeof(txr);
1435 #ifdef EM_MULTIQUEUE
1436         if (!drbr_empty(ifp, txr->br))
1437                 em_mq_start_locked(ifp, txr, NULL);
1438 #else
1439         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1440                 em_start_locked(ifp, txr);
1441 #endif
1442         EM_TX_UNLOCK(txr);
1443
1444         return (rx_done);
1445 }
1446 #endif /* DEVICE_POLLING */
1447
1448
1449 /*********************************************************************
1450  *
1451  *  Fast Legacy/MSI Combined Interrupt Service routine  
1452  *
1453  *********************************************************************/
1454 static int
1455 em_irq_fast(void *arg)
1456 {
1457         struct adapter  *adapter = arg;
1458         struct ifnet    *ifp;
1459         u32             reg_icr;
1460
1461         ifp = adapter->ifp;
1462
1463         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1464
1465         /* Hot eject?  */
1466         if (reg_icr == 0xffffffff)
1467                 return FILTER_STRAY;
1468
1469         /* Definitely not our interrupt.  */
1470         if (reg_icr == 0x0)
1471                 return FILTER_STRAY;
1472
1473         /*
1474          * Starting with the 82571 chip, bit 31 should be used to
1475          * determine whether the interrupt belongs to us.
1476          */
1477         if (adapter->hw.mac.type >= e1000_82571 &&
1478             (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1479                 return FILTER_STRAY;
1480
1481         em_disable_intr(adapter);
1482         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1483
1484         /* Link status change */
1485         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1486                 adapter->hw.mac.get_link_status = 1;
1487                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1488         }
1489
1490         if (reg_icr & E1000_ICR_RXO)
1491                 adapter->rx_overruns++;
1492         return FILTER_HANDLED;
1493 }
1494
1495 /* Combined RX/TX handler, used by Legacy and MSI */
1496 static void
1497 em_handle_que(void *context, int pending)
1498 {
1499         struct adapter  *adapter = context;
1500         struct ifnet    *ifp = adapter->ifp;
1501         struct tx_ring  *txr = adapter->tx_rings;
1502         struct rx_ring  *rxr = adapter->rx_rings;
1503
1504
1505         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1506                 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1507                 EM_TX_LOCK(txr);
1508                 em_txeof(txr);
1509 #ifdef EM_MULTIQUEUE
1510                 if (!drbr_empty(ifp, txr->br))
1511                         em_mq_start_locked(ifp, txr, NULL);
1512 #else
1513                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1514                         em_start_locked(ifp, txr);
1515 #endif
1516                 EM_TX_UNLOCK(txr);
1517                 if (more) {
1518                         taskqueue_enqueue(adapter->tq, &adapter->que_task);
1519                         return;
1520                 }
1521         }
1522
1523         em_enable_intr(adapter);
1524         return;
1525 }
1526
1527
1528 /*********************************************************************
1529  *
1530  *  MSIX Interrupt Service Routines
1531  *
1532  **********************************************************************/
1533 static void
1534 em_msix_tx(void *arg)
1535 {
1536         struct tx_ring *txr = arg;
1537         struct adapter *adapter = txr->adapter;
1538         struct ifnet    *ifp = adapter->ifp;
1539
1540         ++txr->tx_irq;
1541         EM_TX_LOCK(txr);
1542         em_txeof(txr);
1543 #ifdef EM_MULTIQUEUE
1544         if (!drbr_empty(ifp, txr->br))
1545                 em_mq_start_locked(ifp, txr, NULL);
1546 #else
1547         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1548                 em_start_locked(ifp, txr);
1549 #endif
1550         /* Reenable this interrupt */
1551         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1552         EM_TX_UNLOCK(txr);
1553         return;
1554 }
1555
1556 /*********************************************************************
1557  *
1558  *  MSIX RX Interrupt Service routine
1559  *
1560  **********************************************************************/
1561
1562 static void
1563 em_msix_rx(void *arg)
1564 {
1565         struct rx_ring  *rxr = arg;
1566         struct adapter  *adapter = rxr->adapter;
1567         bool            more;
1568
1569         ++rxr->rx_irq;
1570         if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1571                 return;
1572         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1573         if (more)
1574                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1575         else
1576                 /* Reenable this interrupt */
1577                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1578         return;
1579 }
1580
1581 /*********************************************************************
1582  *
1583  *  MSIX Link Fast Interrupt Service routine
1584  *
1585  **********************************************************************/
1586 static void
1587 em_msix_link(void *arg)
1588 {
1589         struct adapter  *adapter = arg;
1590         u32             reg_icr;
1591
1592         ++adapter->link_irq;
1593         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1594
1595         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1596                 adapter->hw.mac.get_link_status = 1;
1597                 em_handle_link(adapter, 0);
1598         } else
1599                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1600                     EM_MSIX_LINK | E1000_IMS_LSC);
1601         return;
1602 }
1603
1604 static void
1605 em_handle_rx(void *context, int pending)
1606 {
1607         struct rx_ring  *rxr = context;
1608         struct adapter  *adapter = rxr->adapter;
1609         bool            more;
1610
1611         more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1612         if (more)
1613                 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1614         else
1615                 /* Reenable this interrupt */
1616                 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1617 }
1618
1619 static void
1620 em_handle_tx(void *context, int pending)
1621 {
1622         struct tx_ring  *txr = context;
1623         struct adapter  *adapter = txr->adapter;
1624         struct ifnet    *ifp = adapter->ifp;
1625
1626         EM_TX_LOCK(txr);
1627         em_txeof(txr);
1628 #ifdef EM_MULTIQUEUE
1629         if (!drbr_empty(ifp, txr->br))
1630                 em_mq_start_locked(ifp, txr, NULL);
1631 #else
1632         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1633                 em_start_locked(ifp, txr);
1634 #endif
1635         E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1636         EM_TX_UNLOCK(txr);
1637 }
1638
1639 static void
1640 em_handle_link(void *context, int pending)
1641 {
1642         struct adapter  *adapter = context;
1643         struct tx_ring  *txr = adapter->tx_rings;
1644         struct ifnet *ifp = adapter->ifp;
1645
1646         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1647                 return;
1648
1649         EM_CORE_LOCK(adapter);
1650         callout_stop(&adapter->timer);
1651         em_update_link_status(adapter);
1652         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1653         E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1654             EM_MSIX_LINK | E1000_IMS_LSC);
1655         if (adapter->link_active) {
1656                 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1657                         EM_TX_LOCK(txr);
1658 #ifdef EM_MULTIQUEUE
1659                         if (!drbr_empty(ifp, txr->br))
1660                                 em_mq_start_locked(ifp, txr, NULL);
1661 #else
1662                         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1663                                 em_start_locked(ifp, txr);
1664 #endif
1665                         EM_TX_UNLOCK(txr);
1666                 }
1667         }
1668         EM_CORE_UNLOCK(adapter);
1669 }
1670
1671
1672 /*********************************************************************
1673  *
1674  *  Media Ioctl callback
1675  *
1676  *  This routine is called whenever the user queries the status of
1677  *  the interface using ifconfig.
1678  *
1679  **********************************************************************/
1680 static void
1681 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1682 {
1683         struct adapter *adapter = ifp->if_softc;
1684         u_char fiber_type = IFM_1000_SX;
1685
1686         INIT_DEBUGOUT("em_media_status: begin");
1687
1688         EM_CORE_LOCK(adapter);
1689         em_update_link_status(adapter);
1690
1691         ifmr->ifm_status = IFM_AVALID;
1692         ifmr->ifm_active = IFM_ETHER;
1693
1694         if (!adapter->link_active) {
1695                 EM_CORE_UNLOCK(adapter);
1696                 return;
1697         }
1698
1699         ifmr->ifm_status |= IFM_ACTIVE;
1700
1701         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1702             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1703                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1704         } else {
1705                 switch (adapter->link_speed) {
1706                 case 10:
1707                         ifmr->ifm_active |= IFM_10_T;
1708                         break;
1709                 case 100:
1710                         ifmr->ifm_active |= IFM_100_TX;
1711                         break;
1712                 case 1000:
1713                         ifmr->ifm_active |= IFM_1000_T;
1714                         break;
1715                 }
1716                 if (adapter->link_duplex == FULL_DUPLEX)
1717                         ifmr->ifm_active |= IFM_FDX;
1718                 else
1719                         ifmr->ifm_active |= IFM_HDX;
1720         }
1721         EM_CORE_UNLOCK(adapter);
1722 }
1723
1724 /*********************************************************************
1725  *
1726  *  Media Ioctl callback
1727  *
1728  *  This routine is called when the user changes speed/duplex using
1729  *  media/mediopt option with ifconfig.
1730  *
1731  **********************************************************************/
1732 static int
1733 em_media_change(struct ifnet *ifp)
1734 {
1735         struct adapter *adapter = ifp->if_softc;
1736         struct ifmedia  *ifm = &adapter->media;
1737
1738         INIT_DEBUGOUT("em_media_change: begin");
1739
1740         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1741                 return (EINVAL);
1742
1743         EM_CORE_LOCK(adapter);
1744         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1745         case IFM_AUTO:
1746                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1747                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1748                 break;
1749         case IFM_1000_LX:
1750         case IFM_1000_SX:
1751         case IFM_1000_T:
1752                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1753                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1754                 break;
1755         case IFM_100_TX:
1756                 adapter->hw.mac.autoneg = FALSE;
1757                 adapter->hw.phy.autoneg_advertised = 0;
1758                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1759                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1760                 else
1761                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1762                 break;
1763         case IFM_10_T:
1764                 adapter->hw.mac.autoneg = FALSE;
1765                 adapter->hw.phy.autoneg_advertised = 0;
1766                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1767                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1768                 else
1769                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1770                 break;
1771         default:
1772                 device_printf(adapter->dev, "Unsupported media type\n");
1773         }
1774
1775         em_init_locked(adapter);
1776         EM_CORE_UNLOCK(adapter);
1777
1778         return (0);
1779 }
1780
1781 /*********************************************************************
1782  *
1783  *  This routine maps the mbufs to tx descriptors.
1784  *
1785  *  return 0 on success, positive on failure
1786  **********************************************************************/
1787
1788 static int
1789 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1790 {
1791         struct adapter          *adapter = txr->adapter;
1792         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1793         bus_dmamap_t            map;
1794         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1795         struct e1000_tx_desc    *ctxd = NULL;
1796         struct mbuf             *m_head;
1797         struct ether_header     *eh;
1798         struct ip               *ip = NULL;
1799         struct tcphdr           *tp = NULL;
1800         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1801         int                     ip_off, poff;
1802         int                     nsegs, i, j, first, last = 0;
1803         int                     error, do_tso, tso_desc = 0, remap = 1;
1804
1805 retry:
1806         m_head = *m_headp;
1807         txd_upper = txd_lower = txd_used = txd_saved = 0;
1808         do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1809         ip_off = poff = 0;
1810
1811         /*
1812          * Intel recommends entire IP/TCP header length reside in a single
1813          * buffer. If multiple descriptors are used to describe the IP and
1814          * TCP header, each descriptor should describe one or more
1815          * complete headers; descriptors referencing only parts of headers
1816          * are not supported. If all layer headers are not coalesced into
1817          * a single buffer, each buffer should not cross a 4KB boundary,
1818          * or be larger than the maximum read request size.
1819          * Controller also requires modifing IP/TCP header to make TSO work
1820          * so we firstly get a writable mbuf chain then coalesce ethernet/
1821          * IP/TCP header into a single buffer to meet the requirement of
1822          * controller. This also simplifies IP/TCP/UDP checksum offloading
1823          * which also has similiar restrictions.
1824          */
1825         if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1826                 if (do_tso || (m_head->m_next != NULL && 
1827                     m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1828                         if (M_WRITABLE(*m_headp) == 0) {
1829                                 m_head = m_dup(*m_headp, M_DONTWAIT);
1830                                 m_freem(*m_headp);
1831                                 if (m_head == NULL) {
1832                                         *m_headp = NULL;
1833                                         return (ENOBUFS);
1834                                 }
1835                                 *m_headp = m_head;
1836                         }
1837                 }
1838                 /*
1839                  * XXX
1840                  * Assume IPv4, we don't have TSO/checksum offload support
1841                  * for IPv6 yet.
1842                  */
1843                 ip_off = sizeof(struct ether_header);
1844                 m_head = m_pullup(m_head, ip_off);
1845                 if (m_head == NULL) {
1846                         *m_headp = NULL;
1847                         return (ENOBUFS);
1848                 }
1849                 eh = mtod(m_head, struct ether_header *);
1850                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1851                         ip_off = sizeof(struct ether_vlan_header);
1852                         m_head = m_pullup(m_head, ip_off);
1853                         if (m_head == NULL) {
1854                                 *m_headp = NULL;
1855                                 return (ENOBUFS);
1856                         }
1857                 }
1858                 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1859                 if (m_head == NULL) {
1860                         *m_headp = NULL;
1861                         return (ENOBUFS);
1862                 }
1863                 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1864                 poff = ip_off + (ip->ip_hl << 2);
1865                 if (do_tso) {
1866                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1867                         if (m_head == NULL) {
1868                                 *m_headp = NULL;
1869                                 return (ENOBUFS);
1870                         }
1871                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1872                         /*
1873                          * TSO workaround:
1874                          *   pull 4 more bytes of data into it.
1875                          */
1876                         m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1877                         if (m_head == NULL) {
1878                                 *m_headp = NULL;
1879                                 return (ENOBUFS);
1880                         }
1881                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1882                         ip->ip_len = 0;
1883                         ip->ip_sum = 0;
1884                         /*
1885                          * The pseudo TCP checksum does not include TCP payload
1886                          * length so driver should recompute the checksum here
1887                          * what hardware expect to see. This is adherence of
1888                          * Microsoft's Large Send specification.
1889                          */
1890                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1891                         tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1892                             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1893                 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1894                         m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1895                         if (m_head == NULL) {
1896                                 *m_headp = NULL;
1897                                 return (ENOBUFS);
1898                         }
1899                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1900                         m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1901                         if (m_head == NULL) {
1902                                 *m_headp = NULL;
1903                                 return (ENOBUFS);
1904                         }
1905                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1906                         tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1907                 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1908                         m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1909                         if (m_head == NULL) {
1910                                 *m_headp = NULL;
1911                                 return (ENOBUFS);
1912                         }
1913                         ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1914                 }
1915                 *m_headp = m_head;
1916         }
1917
1918         /*
1919          * Map the packet for DMA
1920          *
1921          * Capture the first descriptor index,
1922          * this descriptor will have the index
1923          * of the EOP which is the only one that
1924          * now gets a DONE bit writeback.
1925          */
1926         first = txr->next_avail_desc;
1927         tx_buffer = &txr->tx_buffers[first];
1928         tx_buffer_mapped = tx_buffer;
1929         map = tx_buffer->map;
1930
1931         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1932             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1933
1934         /*
1935          * There are two types of errors we can (try) to handle:
1936          * - EFBIG means the mbuf chain was too long and bus_dma ran
1937          *   out of segments.  Defragment the mbuf chain and try again.
1938          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1939          *   at this point in time.  Defer sending and try again later.
1940          * All other errors, in particular EINVAL, are fatal and prevent the
1941          * mbuf chain from ever going through.  Drop it and report error.
1942          */
1943         if (error == EFBIG && remap) {
1944                 struct mbuf *m;
1945
1946                 m = m_defrag(*m_headp, M_DONTWAIT);
1947                 if (m == NULL) {
1948                         adapter->mbuf_alloc_failed++;
1949                         m_freem(*m_headp);
1950                         *m_headp = NULL;
1951                         return (ENOBUFS);
1952                 }
1953                 *m_headp = m;
1954
1955                 /* Try it again, but only once */
1956                 remap = 0;
1957                 goto retry;
1958         } else if (error == ENOMEM) {
1959                 adapter->no_tx_dma_setup++;
1960                 return (error);
1961         } else if (error != 0) {
1962                 adapter->no_tx_dma_setup++;
1963                 m_freem(*m_headp);
1964                 *m_headp = NULL;
1965                 return (error);
1966         }
1967
1968         /*
1969          * TSO Hardware workaround, if this packet is not
1970          * TSO, and is only a single descriptor long, and
1971          * it follows a TSO burst, then we need to add a
1972          * sentinel descriptor to prevent premature writeback.
1973          */
1974         if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1975                 if (nsegs == 1)
1976                         tso_desc = TRUE;
1977                 txr->tx_tso = FALSE;
1978         }
1979
1980         if (nsegs > (txr->tx_avail - 2)) {
1981                 txr->no_desc_avail++;
1982                 bus_dmamap_unload(txr->txtag, map);
1983                 return (ENOBUFS);
1984         }
1985         m_head = *m_headp;
1986
1987         /* Do hardware assists */
1988         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1989                 em_tso_setup(txr, m_head, ip_off, ip, tp,
1990                     &txd_upper, &txd_lower);
1991                 /* we need to make a final sentinel transmit desc */
1992                 tso_desc = TRUE;
1993         } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1994                 em_transmit_checksum_setup(txr, m_head,
1995                     ip_off, ip, &txd_upper, &txd_lower);
1996
1997         if (m_head->m_flags & M_VLANTAG) {
1998                 /* Set the vlan id. */
1999                 txd_upper |=
2000                     (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2001                 /* Tell hardware to add tag */
2002                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2003         }
2004
2005         i = txr->next_avail_desc;
2006
2007         /* Set up our transmit descriptors */
2008         for (j = 0; j < nsegs; j++) {
2009                 bus_size_t seg_len;
2010                 bus_addr_t seg_addr;
2011
2012                 tx_buffer = &txr->tx_buffers[i];
2013                 ctxd = &txr->tx_base[i];
2014                 seg_addr = segs[j].ds_addr;
2015                 seg_len  = segs[j].ds_len;
2016                 /*
2017                 ** TSO Workaround:
2018                 ** If this is the last descriptor, we want to
2019                 ** split it so we have a small final sentinel
2020                 */
2021                 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2022                         seg_len -= 4;
2023                         ctxd->buffer_addr = htole64(seg_addr);
2024                         ctxd->lower.data = htole32(
2025                         adapter->txd_cmd | txd_lower | seg_len);
2026                         ctxd->upper.data =
2027                             htole32(txd_upper);
2028                         if (++i == adapter->num_tx_desc)
2029                                 i = 0;
2030                         /* Now make the sentinel */     
2031                         ++txd_used; /* using an extra txd */
2032                         ctxd = &txr->tx_base[i];
2033                         tx_buffer = &txr->tx_buffers[i];
2034                         ctxd->buffer_addr =
2035                             htole64(seg_addr + seg_len);
2036                         ctxd->lower.data = htole32(
2037                         adapter->txd_cmd | txd_lower | 4);
2038                         ctxd->upper.data =
2039                             htole32(txd_upper);
2040                         last = i;
2041                         if (++i == adapter->num_tx_desc)
2042                                 i = 0;
2043                 } else {
2044                         ctxd->buffer_addr = htole64(seg_addr);
2045                         ctxd->lower.data = htole32(
2046                         adapter->txd_cmd | txd_lower | seg_len);
2047                         ctxd->upper.data =
2048                             htole32(txd_upper);
2049                         last = i;
2050                         if (++i == adapter->num_tx_desc)
2051                                 i = 0;
2052                 }
2053                 tx_buffer->m_head = NULL;
2054                 tx_buffer->next_eop = -1;
2055         }
2056
2057         txr->next_avail_desc = i;
2058         txr->tx_avail -= nsegs;
2059         if (tso_desc) /* TSO used an extra for sentinel */
2060                 txr->tx_avail -= txd_used;
2061
2062         tx_buffer->m_head = m_head;
2063         /*
2064         ** Here we swap the map so the last descriptor,
2065         ** which gets the completion interrupt has the
2066         ** real map, and the first descriptor gets the
2067         ** unused map from this descriptor.
2068         */
2069         tx_buffer_mapped->map = tx_buffer->map;
2070         tx_buffer->map = map;
2071         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2072
2073         /*
2074          * Last Descriptor of Packet
2075          * needs End Of Packet (EOP)
2076          * and Report Status (RS)
2077          */
2078         ctxd->lower.data |=
2079             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2080         /*
2081          * Keep track in the first buffer which
2082          * descriptor will be written back
2083          */
2084         tx_buffer = &txr->tx_buffers[first];
2085         tx_buffer->next_eop = last;
2086         /* Update the watchdog time early and often */
2087         txr->watchdog_time = ticks;
2088
2089         /*
2090          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2091          * that this frame is available to transmit.
2092          */
2093         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2094             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2095         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2096
2097         return (0);
2098 }
2099
2100 static void
2101 em_set_promisc(struct adapter *adapter)
2102 {
2103         struct ifnet    *ifp = adapter->ifp;
2104         u32             reg_rctl;
2105
2106         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2107
2108         if (ifp->if_flags & IFF_PROMISC) {
2109                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2110                 /* Turn this on if you want to see bad packets */
2111                 if (em_debug_sbp)
2112                         reg_rctl |= E1000_RCTL_SBP;
2113                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2114         } else if (ifp->if_flags & IFF_ALLMULTI) {
2115                 reg_rctl |= E1000_RCTL_MPE;
2116                 reg_rctl &= ~E1000_RCTL_UPE;
2117                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2118         }
2119 }
2120
2121 static void
2122 em_disable_promisc(struct adapter *adapter)
2123 {
2124         u32     reg_rctl;
2125
2126         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2127
2128         reg_rctl &=  (~E1000_RCTL_UPE);
2129         reg_rctl &=  (~E1000_RCTL_MPE);
2130         reg_rctl &=  (~E1000_RCTL_SBP);
2131         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2132 }
2133
2134
2135 /*********************************************************************
2136  *  Multicast Update
2137  *
2138  *  This routine is called whenever multicast address list is updated.
2139  *
2140  **********************************************************************/
2141
2142 static void
2143 em_set_multi(struct adapter *adapter)
2144 {
2145         struct ifnet    *ifp = adapter->ifp;
2146         struct ifmultiaddr *ifma;
2147         u32 reg_rctl = 0;
2148         u8  *mta; /* Multicast array memory */
2149         int mcnt = 0;
2150
2151         IOCTL_DEBUGOUT("em_set_multi: begin");
2152
2153         mta = adapter->mta;
2154         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2155
2156         if (adapter->hw.mac.type == e1000_82542 && 
2157             adapter->hw.revision_id == E1000_REVISION_2) {
2158                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2159                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2160                         e1000_pci_clear_mwi(&adapter->hw);
2161                 reg_rctl |= E1000_RCTL_RST;
2162                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2163                 msec_delay(5);
2164         }
2165
2166 #if __FreeBSD_version < 800000
2167         IF_ADDR_LOCK(ifp);
2168 #else
2169         if_maddr_rlock(ifp);
2170 #endif
2171         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2172                 if (ifma->ifma_addr->sa_family != AF_LINK)
2173                         continue;
2174
2175                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2176                         break;
2177
2178                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2179                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2180                 mcnt++;
2181         }
2182 #if __FreeBSD_version < 800000
2183         IF_ADDR_UNLOCK(ifp);
2184 #else
2185         if_maddr_runlock(ifp);
2186 #endif
2187         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2188                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2189                 reg_rctl |= E1000_RCTL_MPE;
2190                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2191         } else
2192                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2193
2194         if (adapter->hw.mac.type == e1000_82542 && 
2195             adapter->hw.revision_id == E1000_REVISION_2) {
2196                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2197                 reg_rctl &= ~E1000_RCTL_RST;
2198                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2199                 msec_delay(5);
2200                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2201                         e1000_pci_set_mwi(&adapter->hw);
2202         }
2203 }
2204
2205
2206 /*********************************************************************
2207  *  Timer routine
2208  *
2209  *  This routine checks for link status and updates statistics.
2210  *
2211  **********************************************************************/
2212
2213 static void
2214 em_local_timer(void *arg)
2215 {
2216         struct adapter  *adapter = arg;
2217         struct ifnet    *ifp = adapter->ifp;
2218         struct tx_ring  *txr = adapter->tx_rings;
2219         struct rx_ring  *rxr = adapter->rx_rings;
2220         u32             trigger;
2221
2222         EM_CORE_LOCK_ASSERT(adapter);
2223
2224         em_update_link_status(adapter);
2225         em_update_stats_counters(adapter);
2226
2227         /* Reset LAA into RAR[0] on 82571 */
2228         if ((adapter->hw.mac.type == e1000_82571) &&
2229             e1000_get_laa_state_82571(&adapter->hw))
2230                 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2231
2232         /* Mask to use in the irq trigger */
2233         if (adapter->msix_mem)
2234                 trigger = rxr->ims; /* RX for 82574 */
2235         else
2236                 trigger = E1000_ICS_RXDMT0;
2237
2238         /*
2239         ** Check on the state of the TX queue(s), this 
2240         ** can be done without the lock because its RO
2241         ** and the HUNG state will be static if set.
2242         */
2243         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2244                 if ((txr->queue_status == EM_QUEUE_HUNG) &&
2245                     (adapter->pause_frames == 0))
2246                         goto hung;
2247                 /* Schedule a TX tasklet if needed */
2248                 if (txr->tx_avail <= EM_MAX_SCATTER)
2249                         taskqueue_enqueue(txr->tq, &txr->tx_task);
2250         }
2251         
2252         adapter->pause_frames = 0;
2253         callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2254 #ifndef DEVICE_POLLING
2255         /* Trigger an RX interrupt to guarantee mbuf refresh */
2256         E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2257 #endif
2258         return;
2259 hung:
2260         /* Looks like we're hung */
2261         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2262         device_printf(adapter->dev,
2263             "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2264             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2265             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2266         device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2267             "Next TX to Clean = %d\n",
2268             txr->me, txr->tx_avail, txr->next_to_clean);
2269         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2270         adapter->watchdog_events++;
2271         adapter->pause_frames = 0;
2272         em_init_locked(adapter);
2273 }
2274
2275
2276 static void
2277 em_update_link_status(struct adapter *adapter)
2278 {
2279         struct e1000_hw *hw = &adapter->hw;
2280         struct ifnet *ifp = adapter->ifp;
2281         device_t dev = adapter->dev;
2282         struct tx_ring *txr = adapter->tx_rings;
2283         u32 link_check = 0;
2284
2285         /* Get the cached link value or read phy for real */
2286         switch (hw->phy.media_type) {
2287         case e1000_media_type_copper:
2288                 if (hw->mac.get_link_status) {
2289                         /* Do the work to read phy */
2290                         e1000_check_for_link(hw);
2291                         link_check = !hw->mac.get_link_status;
2292                         if (link_check) /* ESB2 fix */
2293                                 e1000_cfg_on_link_up(hw);
2294                 } else
2295                         link_check = TRUE;
2296                 break;
2297         case e1000_media_type_fiber:
2298                 e1000_check_for_link(hw);
2299                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2300                                  E1000_STATUS_LU);
2301                 break;
2302         case e1000_media_type_internal_serdes:
2303                 e1000_check_for_link(hw);
2304                 link_check = adapter->hw.mac.serdes_has_link;
2305                 break;
2306         default:
2307         case e1000_media_type_unknown:
2308                 break;
2309         }
2310
2311         /* Now check for a transition */
2312         if (link_check && (adapter->link_active == 0)) {
2313                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2314                     &adapter->link_duplex);
2315                 /* Check if we must disable SPEED_MODE bit on PCI-E */
2316                 if ((adapter->link_speed != SPEED_1000) &&
2317                     ((hw->mac.type == e1000_82571) ||
2318                     (hw->mac.type == e1000_82572))) {
2319                         int tarc0;
2320                         tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2321                         tarc0 &= ~SPEED_MODE_BIT;
2322                         E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2323                 }
2324                 if (bootverbose)
2325                         device_printf(dev, "Link is up %d Mbps %s\n",
2326                             adapter->link_speed,
2327                             ((adapter->link_duplex == FULL_DUPLEX) ?
2328                             "Full Duplex" : "Half Duplex"));
2329                 adapter->link_active = 1;
2330                 adapter->smartspeed = 0;
2331                 ifp->if_baudrate = adapter->link_speed * 1000000;
2332                 if_link_state_change(ifp, LINK_STATE_UP);
2333         } else if (!link_check && (adapter->link_active == 1)) {
2334                 ifp->if_baudrate = adapter->link_speed = 0;
2335                 adapter->link_duplex = 0;
2336                 if (bootverbose)
2337                         device_printf(dev, "Link is Down\n");
2338                 adapter->link_active = 0;
2339                 /* Link down, disable watchdog */
2340                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2341                         txr->queue_status = EM_QUEUE_IDLE;
2342                 if_link_state_change(ifp, LINK_STATE_DOWN);
2343         }
2344 }
2345
2346 /*********************************************************************
2347  *
2348  *  This routine disables all traffic on the adapter by issuing a
2349  *  global reset on the MAC and deallocates TX/RX buffers.
2350  *
2351  *  This routine should always be called with BOTH the CORE
2352  *  and TX locks.
2353  **********************************************************************/
2354
2355 static void
2356 em_stop(void *arg)
2357 {
2358         struct adapter  *adapter = arg;
2359         struct ifnet    *ifp = adapter->ifp;
2360         struct tx_ring  *txr = adapter->tx_rings;
2361
2362         EM_CORE_LOCK_ASSERT(adapter);
2363
2364         INIT_DEBUGOUT("em_stop: begin");
2365
2366         em_disable_intr(adapter);
2367         callout_stop(&adapter->timer);
2368
2369         /* Tell the stack that the interface is no longer active */
2370         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2371         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2372
2373         /* Unarm watchdog timer. */
2374         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2375                 EM_TX_LOCK(txr);
2376                 txr->queue_status = EM_QUEUE_IDLE;
2377                 EM_TX_UNLOCK(txr);
2378         }
2379
2380         e1000_reset_hw(&adapter->hw);
2381         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2382
2383         e1000_led_off(&adapter->hw);
2384         e1000_cleanup_led(&adapter->hw);
2385 }
2386
2387
2388 /*********************************************************************
2389  *
2390  *  Determine hardware revision.
2391  *
2392  **********************************************************************/
2393 static void
2394 em_identify_hardware(struct adapter *adapter)
2395 {
2396         device_t dev = adapter->dev;
2397
2398         /* Make sure our PCI config space has the necessary stuff set */
2399         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2400         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2401             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2402                 device_printf(dev, "Memory Access and/or Bus Master bits "
2403                     "were not set!\n");
2404                 adapter->hw.bus.pci_cmd_word |=
2405                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2406                 pci_write_config(dev, PCIR_COMMAND,
2407                     adapter->hw.bus.pci_cmd_word, 2);
2408         }
2409
2410         /* Save off the information about this board */
2411         adapter->hw.vendor_id = pci_get_vendor(dev);
2412         adapter->hw.device_id = pci_get_device(dev);
2413         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2414         adapter->hw.subsystem_vendor_id =
2415             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2416         adapter->hw.subsystem_device_id =
2417             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2418
2419         /* Do Shared Code Init and Setup */
2420         if (e1000_set_mac_type(&adapter->hw)) {
2421                 device_printf(dev, "Setup init failure\n");
2422                 return;
2423         }
2424 }
2425
2426 static int
2427 em_allocate_pci_resources(struct adapter *adapter)
2428 {
2429         device_t        dev = adapter->dev;
2430         int             rid;
2431
2432         rid = PCIR_BAR(0);
2433         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2434             &rid, RF_ACTIVE);
2435         if (adapter->memory == NULL) {
2436                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2437                 return (ENXIO);
2438         }
2439         adapter->osdep.mem_bus_space_tag =
2440             rman_get_bustag(adapter->memory);
2441         adapter->osdep.mem_bus_space_handle =
2442             rman_get_bushandle(adapter->memory);
2443         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2444
2445         /* Default to a single queue */
2446         adapter->num_queues = 1;
2447
2448         /*
2449          * Setup MSI/X or MSI if PCI Express
2450          */
2451         adapter->msix = em_setup_msix(adapter);
2452
2453         adapter->hw.back = &adapter->osdep;
2454
2455         return (0);
2456 }
2457
2458 /*********************************************************************
2459  *
2460  *  Setup the Legacy or MSI Interrupt handler
2461  *
2462  **********************************************************************/
2463 int
2464 em_allocate_legacy(struct adapter *adapter)
2465 {
2466         device_t dev = adapter->dev;
2467         struct tx_ring  *txr = adapter->tx_rings;
2468         int error, rid = 0;
2469
2470         /* Manually turn off all interrupts */
2471         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2472
2473         if (adapter->msix == 1) /* using MSI */
2474                 rid = 1;
2475         /* We allocate a single interrupt resource */
2476         adapter->res = bus_alloc_resource_any(dev,
2477             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2478         if (adapter->res == NULL) {
2479                 device_printf(dev, "Unable to allocate bus resource: "
2480                     "interrupt\n");
2481                 return (ENXIO);
2482         }
2483
2484         /*
2485          * Allocate a fast interrupt and the associated
2486          * deferred processing contexts.
2487          */
2488         TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2489         adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2490             taskqueue_thread_enqueue, &adapter->tq);
2491         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2492             device_get_nameunit(adapter->dev));
2493         /* Use a TX only tasklet for local timer */
2494         TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2495         txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2496             taskqueue_thread_enqueue, &txr->tq);
2497         taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2498             device_get_nameunit(adapter->dev));
2499         TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2500         if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2501             em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2502                 device_printf(dev, "Failed to register fast interrupt "
2503                             "handler: %d\n", error);
2504                 taskqueue_free(adapter->tq);
2505                 adapter->tq = NULL;
2506                 return (error);
2507         }
2508         
2509         return (0);
2510 }
2511
2512 /*********************************************************************
2513  *
2514  *  Setup the MSIX Interrupt handlers
2515  *   This is not really Multiqueue, rather
2516  *   its just seperate interrupt vectors
2517  *   for TX, RX, and Link.
2518  *
2519  **********************************************************************/
2520 int
2521 em_allocate_msix(struct adapter *adapter)
2522 {
2523         device_t        dev = adapter->dev;
2524         struct          tx_ring *txr = adapter->tx_rings;
2525         struct          rx_ring *rxr = adapter->rx_rings;
2526         int             error, rid, vector = 0;
2527
2528
2529         /* Make sure all interrupts are disabled */
2530         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2531
2532         /* First set up ring resources */
2533         for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2534
2535                 /* RX ring */
2536                 rid = vector + 1;
2537
2538                 rxr->res = bus_alloc_resource_any(dev,
2539                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2540                 if (rxr->res == NULL) {
2541                         device_printf(dev,
2542                             "Unable to allocate bus resource: "
2543                             "RX MSIX Interrupt %d\n", i);
2544                         return (ENXIO);
2545                 }
2546                 if ((error = bus_setup_intr(dev, rxr->res,
2547                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2548                     rxr, &rxr->tag)) != 0) {
2549                         device_printf(dev, "Failed to register RX handler");
2550                         return (error);
2551                 }
2552 #if __FreeBSD_version >= 800504
2553                 bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2554 #endif
2555                 rxr->msix = vector++; /* NOTE increment vector for TX */
2556                 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2557                 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2558                     taskqueue_thread_enqueue, &rxr->tq);
2559                 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2560                     device_get_nameunit(adapter->dev));
2561                 /*
2562                 ** Set the bit to enable interrupt
2563                 ** in E1000_IMS -- bits 20 and 21
2564                 ** are for RX0 and RX1, note this has
2565                 ** NOTHING to do with the MSIX vector
2566                 */
2567                 rxr->ims = 1 << (20 + i);
2568                 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2569
2570                 /* TX ring */
2571                 rid = vector + 1;
2572                 txr->res = bus_alloc_resource_any(dev,
2573                     SYS_RES_IRQ, &rid, RF_ACTIVE);
2574                 if (txr->res == NULL) {
2575                         device_printf(dev,
2576                             "Unable to allocate bus resource: "
2577                             "TX MSIX Interrupt %d\n", i);
2578                         return (ENXIO);
2579                 }
2580                 if ((error = bus_setup_intr(dev, txr->res,
2581                     INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2582                     txr, &txr->tag)) != 0) {
2583                         device_printf(dev, "Failed to register TX handler");
2584                         return (error);
2585                 }
2586 #if __FreeBSD_version >= 800504
2587                 bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2588 #endif
2589                 txr->msix = vector++; /* Increment vector for next pass */
2590                 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2591                 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2592                     taskqueue_thread_enqueue, &txr->tq);
2593                 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2594                     device_get_nameunit(adapter->dev));
2595                 /*
2596                 ** Set the bit to enable interrupt
2597                 ** in E1000_IMS -- bits 22 and 23
2598                 ** are for TX0 and TX1, note this has
2599                 ** NOTHING to do with the MSIX vector
2600                 */
2601                 txr->ims = 1 << (22 + i);
2602                 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2603         }
2604
2605         /* Link interrupt */
2606         ++rid;
2607         adapter->res = bus_alloc_resource_any(dev,
2608             SYS_RES_IRQ, &rid, RF_ACTIVE);
2609         if (!adapter->res) {
2610                 device_printf(dev,"Unable to allocate "
2611                     "bus resource: Link interrupt [%d]\n", rid);
2612                 return (ENXIO);
2613         }
2614         /* Set the link handler function */
2615         error = bus_setup_intr(dev, adapter->res,
2616             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2617             em_msix_link, adapter, &adapter->tag);
2618         if (error) {
2619                 adapter->res = NULL;
2620                 device_printf(dev, "Failed to register LINK handler");
2621                 return (error);
2622         }
2623 #if __FreeBSD_version >= 800504
2624                 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2625 #endif
2626         adapter->linkvec = vector;
2627         adapter->ivars |=  (8 | vector) << 16;
2628         adapter->ivars |= 0x80000000;
2629
2630         return (0);
2631 }
2632
2633
2634 static void
2635 em_free_pci_resources(struct adapter *adapter)
2636 {
2637         device_t        dev = adapter->dev;
2638         struct tx_ring  *txr;
2639         struct rx_ring  *rxr;
2640         int             rid;
2641
2642
2643         /*
2644         ** Release all the queue interrupt resources:
2645         */
2646         for (int i = 0; i < adapter->num_queues; i++) {
2647                 txr = &adapter->tx_rings[i];
2648                 rxr = &adapter->rx_rings[i];
2649                 /* an early abort? */
2650                 if ((txr == NULL) || (rxr == NULL))
2651                         break;
2652                 rid = txr->msix +1;
2653                 if (txr->tag != NULL) {
2654                         bus_teardown_intr(dev, txr->res, txr->tag);
2655                         txr->tag = NULL;
2656                 }
2657                 if (txr->res != NULL)
2658                         bus_release_resource(dev, SYS_RES_IRQ,
2659                             rid, txr->res);
2660                 rid = rxr->msix +1;
2661                 if (rxr->tag != NULL) {
2662                         bus_teardown_intr(dev, rxr->res, rxr->tag);
2663                         rxr->tag = NULL;
2664                 }
2665                 if (rxr->res != NULL)
2666                         bus_release_resource(dev, SYS_RES_IRQ,
2667                             rid, rxr->res);
2668         }
2669
2670         if (adapter->linkvec) /* we are doing MSIX */
2671                 rid = adapter->linkvec + 1;
2672         else
2673                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2674
2675         if (adapter->tag != NULL) {
2676                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2677                 adapter->tag = NULL;
2678         }
2679
2680         if (adapter->res != NULL)
2681                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2682
2683
2684         if (adapter->msix)
2685                 pci_release_msi(dev);
2686
2687         if (adapter->msix_mem != NULL)
2688                 bus_release_resource(dev, SYS_RES_MEMORY,
2689                     PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2690
2691         if (adapter->memory != NULL)
2692                 bus_release_resource(dev, SYS_RES_MEMORY,
2693                     PCIR_BAR(0), adapter->memory);
2694
2695         if (adapter->flash != NULL)
2696                 bus_release_resource(dev, SYS_RES_MEMORY,
2697                     EM_FLASH, adapter->flash);
2698 }
2699
2700 /*
2701  * Setup MSI or MSI/X
2702  */
2703 static int
2704 em_setup_msix(struct adapter *adapter)
2705 {
2706         device_t dev = adapter->dev;
2707         int val = 0;
2708
2709         /*
2710         ** Setup MSI/X for Hartwell: tests have shown
2711         ** use of two queues to be unstable, and to
2712         ** provide no great gain anyway, so we simply
2713         ** seperate the interrupts and use a single queue.
2714         */
2715         if ((adapter->hw.mac.type == e1000_82574) &&
2716             (em_enable_msix == TRUE)) {
2717                 /* Map the MSIX BAR */
2718                 int rid = PCIR_BAR(EM_MSIX_BAR);
2719                 adapter->msix_mem = bus_alloc_resource_any(dev,
2720                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2721                 if (!adapter->msix_mem) {
2722                         /* May not be enabled */
2723                         device_printf(adapter->dev,
2724                             "Unable to map MSIX table \n");
2725                         goto msi;
2726                 }
2727                 val = pci_msix_count(dev); 
2728                 /* We only need 3 vectors */
2729                 if (val > 3)
2730                         val = 3;
2731                 if ((val != 3) && (val != 5)) {
2732                         bus_release_resource(dev, SYS_RES_MEMORY,
2733                             PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2734                         adapter->msix_mem = NULL;
2735                         device_printf(adapter->dev,
2736                             "MSIX: incorrect vectors, using MSI\n");
2737                         goto msi;
2738                 }
2739
2740                 if (pci_alloc_msix(dev, &val) == 0) {
2741                         device_printf(adapter->dev,
2742                             "Using MSIX interrupts "
2743                             "with %d vectors\n", val);
2744                 }
2745
2746                 return (val);
2747         }
2748 msi:
2749         val = pci_msi_count(dev);
2750         if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2751                 adapter->msix = 1;
2752                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2753                 return (val);
2754         } 
2755         /* Should only happen due to manual configuration */
2756         device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2757         return (0);
2758 }
2759
2760
2761 /*********************************************************************
2762  *
2763  *  Initialize the hardware to a configuration
2764  *  as specified by the adapter structure.
2765  *
2766  **********************************************************************/
2767 static void
2768 em_reset(struct adapter *adapter)
2769 {
2770         device_t        dev = adapter->dev;
2771         struct ifnet    *ifp = adapter->ifp;
2772         struct e1000_hw *hw = &adapter->hw;
2773         u16             rx_buffer_size;
2774         u32             pba;
2775
2776         INIT_DEBUGOUT("em_reset: begin");
2777
2778         /* Set up smart power down as default off on newer adapters. */
2779         if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2780             hw->mac.type == e1000_82572)) {
2781                 u16 phy_tmp = 0;
2782
2783                 /* Speed up time to link by disabling smart power down. */
2784                 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2785                 phy_tmp &= ~IGP02E1000_PM_SPD;
2786                 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2787         }
2788
2789         /*
2790          * Packet Buffer Allocation (PBA)
2791          * Writing PBA sets the receive portion of the buffer
2792          * the remainder is used for the transmit buffer.
2793          */
2794         switch (hw->mac.type) {
2795         /* Total Packet Buffer on these is 48K */
2796         case e1000_82571:
2797         case e1000_82572:
2798         case e1000_80003es2lan:
2799                         pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2800                 break;
2801         case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2802                         pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2803                 break;
2804         case e1000_82574:
2805         case e1000_82583:
2806                         pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2807                 break;
2808         case e1000_ich8lan:
2809                 pba = E1000_PBA_8K;
2810                 break;
2811         case e1000_ich9lan:
2812         case e1000_ich10lan:
2813                 /* Boost Receive side for jumbo frames */
2814                 if (adapter->max_frame_size > 4096)
2815                         pba = E1000_PBA_14K;
2816                 else
2817                         pba = E1000_PBA_10K;
2818                 break;
2819         case e1000_pchlan:
2820         case e1000_pch2lan:
2821                 pba = E1000_PBA_26K;
2822                 break;
2823         default:
2824                 if (adapter->max_frame_size > 8192)
2825                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2826                 else
2827                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2828         }
2829         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2830
2831         /*
2832          * These parameters control the automatic generation (Tx) and
2833          * response (Rx) to Ethernet PAUSE frames.
2834          * - High water mark should allow for at least two frames to be
2835          *   received after sending an XOFF.
2836          * - Low water mark works best when it is very near the high water mark.
2837          *   This allows the receiver to restart by sending XON when it has
2838          *   drained a bit. Here we use an arbitary value of 1500 which will
2839          *   restart after one full frame is pulled from the buffer. There
2840          *   could be several smaller frames in the buffer and if so they will
2841          *   not trigger the XON until their total number reduces the buffer
2842          *   by 1500.
2843          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2844          */
2845         rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2846         hw->fc.high_water = rx_buffer_size -
2847             roundup2(adapter->max_frame_size, 1024);
2848         hw->fc.low_water = hw->fc.high_water - 1500;
2849
2850         if (adapter->fc) /* locally set flow control value? */
2851                 hw->fc.requested_mode = adapter->fc;
2852         else
2853                 hw->fc.requested_mode = e1000_fc_full;
2854
2855         if (hw->mac.type == e1000_80003es2lan)
2856                 hw->fc.pause_time = 0xFFFF;
2857         else
2858                 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2859
2860         hw->fc.send_xon = TRUE;
2861
2862         /* Device specific overrides/settings */
2863         switch (hw->mac.type) {
2864         case e1000_pchlan:
2865                 /* Workaround: no TX flow ctrl for PCH */
2866                 hw->fc.requested_mode = e1000_fc_rx_pause;
2867                 hw->fc.pause_time = 0xFFFF; /* override */
2868                 if (ifp->if_mtu > ETHERMTU) {
2869                         hw->fc.high_water = 0x3500;
2870                         hw->fc.low_water = 0x1500;
2871                 } else {
2872                         hw->fc.high_water = 0x5000;
2873                         hw->fc.low_water = 0x3000;
2874                 }
2875                 hw->fc.refresh_time = 0x1000;
2876                 break;
2877         case e1000_pch2lan:
2878                 hw->fc.high_water = 0x5C20;
2879                 hw->fc.low_water = 0x5048;
2880                 hw->fc.pause_time = 0x0650;
2881                 hw->fc.refresh_time = 0x0400;
2882                 /* Jumbos need adjusted PBA */
2883                 if (ifp->if_mtu > ETHERMTU)
2884                         E1000_WRITE_REG(hw, E1000_PBA, 12);
2885                 else
2886                         E1000_WRITE_REG(hw, E1000_PBA, 26);
2887                 break;
2888         case e1000_ich9lan:
2889         case e1000_ich10lan:
2890                 if (ifp->if_mtu > ETHERMTU) {
2891                         hw->fc.high_water = 0x2800;
2892                         hw->fc.low_water = hw->fc.high_water - 8;
2893                         break;
2894                 } 
2895                 /* else fall thru */
2896         default:
2897                 if (hw->mac.type == e1000_80003es2lan)
2898                         hw->fc.pause_time = 0xFFFF;
2899                 break;
2900         }
2901
2902         /* Issue a global reset */
2903         e1000_reset_hw(hw);
2904         E1000_WRITE_REG(hw, E1000_WUC, 0);
2905         em_disable_aspm(adapter);
2906         /* and a re-init */
2907         if (e1000_init_hw(hw) < 0) {
2908                 device_printf(dev, "Hardware Initialization Failed\n");
2909                 return;
2910         }
2911
2912         E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2913         e1000_get_phy_info(hw);
2914         e1000_check_for_link(hw);
2915         return;
2916 }
2917
2918 /*********************************************************************
2919  *
2920  *  Setup networking device structure and register an interface.
2921  *
2922  **********************************************************************/
2923 static int
2924 em_setup_interface(device_t dev, struct adapter *adapter)
2925 {
2926         struct ifnet   *ifp;
2927
2928         INIT_DEBUGOUT("em_setup_interface: begin");
2929
2930         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2931         if (ifp == NULL) {
2932                 device_printf(dev, "can not allocate ifnet structure\n");
2933                 return (-1);
2934         }
2935         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2936         ifp->if_init =  em_init;
2937         ifp->if_softc = adapter;
2938         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2939         ifp->if_ioctl = em_ioctl;
2940 #ifdef EM_MULTIQUEUE
2941         /* Multiqueue stack interface */
2942         ifp->if_transmit = em_mq_start;
2943         ifp->if_qflush = em_qflush;
2944 #else
2945         ifp->if_start = em_start;
2946         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2947         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2948         IFQ_SET_READY(&ifp->if_snd);
2949 #endif  
2950
2951         ether_ifattach(ifp, adapter->hw.mac.addr);
2952
2953         ifp->if_capabilities = ifp->if_capenable = 0;
2954
2955
2956         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2957         ifp->if_capabilities |= IFCAP_TSO4;
2958         /*
2959          * Tell the upper layer(s) we
2960          * support full VLAN capability
2961          */
2962         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2963         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2964                              |  IFCAP_VLAN_HWTSO
2965                              |  IFCAP_VLAN_MTU;
2966         ifp->if_capenable = ifp->if_capabilities;
2967
2968         /*
2969         ** Don't turn this on by default, if vlans are
2970         ** created on another pseudo device (eg. lagg)
2971         ** then vlan events are not passed thru, breaking
2972         ** operation, but with HW FILTER off it works. If
2973         ** using vlans directly on the em driver you can
2974         ** enable this and get full hardware tag filtering.
2975         */
2976         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2977
2978 #ifdef DEVICE_POLLING
2979         ifp->if_capabilities |= IFCAP_POLLING;
2980 #endif
2981
2982         /* Enable only WOL MAGIC by default */
2983         if (adapter->wol) {
2984                 ifp->if_capabilities |= IFCAP_WOL;
2985                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
2986         }
2987                 
2988         /*
2989          * Specify the media types supported by this adapter and register
2990          * callbacks to update media and link information
2991          */
2992         ifmedia_init(&adapter->media, IFM_IMASK,
2993             em_media_change, em_media_status);
2994         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2995             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2996                 u_char fiber_type = IFM_1000_SX;        /* default type */
2997
2998                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
2999                             0, NULL);
3000                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3001         } else {
3002                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3003                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3004                             0, NULL);
3005                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3006                             0, NULL);
3007                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3008                             0, NULL);
3009                 if (adapter->hw.phy.type != e1000_phy_ife) {
3010                         ifmedia_add(&adapter->media,
3011                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3012                         ifmedia_add(&adapter->media,
3013                                 IFM_ETHER | IFM_1000_T, 0, NULL);
3014                 }
3015         }
3016         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3017         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3018         return (0);
3019 }
3020
3021
3022 /*
3023  * Manage DMA'able memory.
3024  */
3025 static void
3026 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3027 {
3028         if (error)
3029                 return;
3030         *(bus_addr_t *) arg = segs[0].ds_addr;
3031 }
3032
3033 static int
3034 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3035         struct em_dma_alloc *dma, int mapflags)
3036 {
3037         int error;
3038
3039         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3040                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
3041                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3042                                 BUS_SPACE_MAXADDR,      /* highaddr */
3043                                 NULL, NULL,             /* filter, filterarg */
3044                                 size,                   /* maxsize */
3045                                 1,                      /* nsegments */
3046                                 size,                   /* maxsegsize */
3047                                 0,                      /* flags */
3048                                 NULL,                   /* lockfunc */
3049                                 NULL,                   /* lockarg */
3050                                 &dma->dma_tag);
3051         if (error) {
3052                 device_printf(adapter->dev,
3053                     "%s: bus_dma_tag_create failed: %d\n",
3054                     __func__, error);
3055                 goto fail_0;
3056         }
3057
3058         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3059             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3060         if (error) {
3061                 device_printf(adapter->dev,
3062                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3063                     __func__, (uintmax_t)size, error);
3064                 goto fail_2;
3065         }
3066
3067         dma->dma_paddr = 0;
3068         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3069             size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3070         if (error || dma->dma_paddr == 0) {
3071                 device_printf(adapter->dev,
3072                     "%s: bus_dmamap_load failed: %d\n",
3073                     __func__, error);
3074                 goto fail_3;
3075         }
3076
3077         return (0);
3078
3079 fail_3:
3080         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3081 fail_2:
3082         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3083         bus_dma_tag_destroy(dma->dma_tag);
3084 fail_0:
3085         dma->dma_map = NULL;
3086         dma->dma_tag = NULL;
3087
3088         return (error);
3089 }
3090
3091 static void
3092 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3093 {
3094         if (dma->dma_tag == NULL)
3095                 return;
3096         if (dma->dma_map != NULL) {
3097                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3098                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3099                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3100                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3101                 dma->dma_map = NULL;
3102         }
3103         bus_dma_tag_destroy(dma->dma_tag);
3104         dma->dma_tag = NULL;
3105 }
3106
3107
3108 /*********************************************************************
3109  *
3110  *  Allocate memory for the transmit and receive rings, and then
3111  *  the descriptors associated with each, called only once at attach.
3112  *
3113  **********************************************************************/
3114 static int
3115 em_allocate_queues(struct adapter *adapter)
3116 {
3117         device_t                dev = adapter->dev;
3118         struct tx_ring          *txr = NULL;
3119         struct rx_ring          *rxr = NULL;
3120         int rsize, tsize, error = E1000_SUCCESS;
3121         int txconf = 0, rxconf = 0;
3122
3123
3124         /* Allocate the TX ring struct memory */
3125         if (!(adapter->tx_rings =
3126             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3127             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3128                 device_printf(dev, "Unable to allocate TX ring memory\n");
3129                 error = ENOMEM;
3130                 goto fail;
3131         }
3132
3133         /* Now allocate the RX */
3134         if (!(adapter->rx_rings =
3135             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3136             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3137                 device_printf(dev, "Unable to allocate RX ring memory\n");
3138                 error = ENOMEM;
3139                 goto rx_fail;
3140         }
3141
3142         tsize = roundup2(adapter->num_tx_desc *
3143             sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3144         /*
3145          * Now set up the TX queues, txconf is needed to handle the
3146          * possibility that things fail midcourse and we need to
3147          * undo memory gracefully
3148          */ 
3149         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3150                 /* Set up some basics */
3151                 txr = &adapter->tx_rings[i];
3152                 txr->adapter = adapter;
3153                 txr->me = i;
3154
3155                 /* Initialize the TX lock */
3156                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3157                     device_get_nameunit(dev), txr->me);
3158                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3159
3160                 if (em_dma_malloc(adapter, tsize,
3161                         &txr->txdma, BUS_DMA_NOWAIT)) {
3162                         device_printf(dev,
3163                             "Unable to allocate TX Descriptor memory\n");
3164                         error = ENOMEM;
3165                         goto err_tx_desc;
3166                 }
3167                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3168                 bzero((void *)txr->tx_base, tsize);
3169
3170                 if (em_allocate_transmit_buffers(txr)) {
3171                         device_printf(dev,
3172                             "Critical Failure setting up transmit buffers\n");
3173                         error = ENOMEM;
3174                         goto err_tx_desc;
3175                 }
3176 #if __FreeBSD_version >= 800000
3177                 /* Allocate a buf ring */
3178                 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3179                     M_WAITOK, &txr->tx_mtx);
3180 #endif
3181         }
3182
3183         /*
3184          * Next the RX queues...
3185          */ 
3186         rsize = roundup2(adapter->num_rx_desc *
3187             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3188         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3189                 rxr = &adapter->rx_rings[i];
3190                 rxr->adapter = adapter;
3191                 rxr->me = i;
3192
3193                 /* Initialize the RX lock */
3194                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3195                     device_get_nameunit(dev), txr->me);
3196                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3197
3198                 if (em_dma_malloc(adapter, rsize,
3199                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3200                         device_printf(dev,
3201                             "Unable to allocate RxDescriptor memory\n");
3202                         error = ENOMEM;
3203                         goto err_rx_desc;
3204                 }
3205                 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3206                 bzero((void *)rxr->rx_base, rsize);
3207
3208                 /* Allocate receive buffers for the ring*/
3209                 if (em_allocate_receive_buffers(rxr)) {
3210                         device_printf(dev,
3211                             "Critical Failure setting up receive buffers\n");
3212                         error = ENOMEM;
3213                         goto err_rx_desc;
3214                 }
3215         }
3216
3217         return (0);
3218
3219 err_rx_desc:
3220         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3221                 em_dma_free(adapter, &rxr->rxdma);
3222 err_tx_desc:
3223         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3224                 em_dma_free(adapter, &txr->txdma);
3225         free(adapter->rx_rings, M_DEVBUF);
3226 rx_fail:
3227 #if __FreeBSD_version >= 800000
3228         buf_ring_free(txr->br, M_DEVBUF);
3229 #endif
3230         free(adapter->tx_rings, M_DEVBUF);
3231 fail:
3232         return (error);
3233 }
3234
3235
3236 /*********************************************************************
3237  *
3238  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3239  *  the information needed to transmit a packet on the wire. This is
3240  *  called only once at attach, setup is done every reset.
3241  *
3242  **********************************************************************/
3243 static int
3244 em_allocate_transmit_buffers(struct tx_ring *txr)
3245 {
3246         struct adapter *adapter = txr->adapter;
3247         device_t dev = adapter->dev;
3248         struct em_buffer *txbuf;
3249         int error, i;
3250
3251         /*
3252          * Setup DMA descriptor areas.
3253          */
3254         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3255                                1, 0,                    /* alignment, bounds */
3256                                BUS_SPACE_MAXADDR,       /* lowaddr */
3257                                BUS_SPACE_MAXADDR,       /* highaddr */
3258                                NULL, NULL,              /* filter, filterarg */
3259                                EM_TSO_SIZE,             /* maxsize */
3260                                EM_MAX_SCATTER,          /* nsegments */
3261                                PAGE_SIZE,               /* maxsegsize */
3262                                0,                       /* flags */
3263                                NULL,                    /* lockfunc */
3264                                NULL,                    /* lockfuncarg */
3265                                &txr->txtag))) {
3266                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3267                 goto fail;
3268         }
3269
3270         if (!(txr->tx_buffers =
3271             (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3272             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3273                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3274                 error = ENOMEM;
3275                 goto fail;
3276         }
3277
3278         /* Create the descriptor buffer dma maps */
3279         txbuf = txr->tx_buffers;
3280         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3281                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3282                 if (error != 0) {
3283                         device_printf(dev, "Unable to create TX DMA map\n");
3284                         goto fail;
3285                 }
3286         }
3287
3288         return 0;
3289 fail:
3290         /* We free all, it handles case where we are in the middle */
3291         em_free_transmit_structures(adapter);
3292         return (error);
3293 }
3294
3295 /*********************************************************************
3296  *
3297  *  Initialize a transmit ring.
3298  *
3299  **********************************************************************/
3300 static void
3301 em_setup_transmit_ring(struct tx_ring *txr)
3302 {
3303         struct adapter *adapter = txr->adapter;
3304         struct em_buffer *txbuf;
3305         int i;
3306 #ifdef DEV_NETMAP
3307         struct netmap_adapter *na = NA(adapter->ifp);
3308         struct netmap_slot *slot;
3309 #endif /* DEV_NETMAP */
3310
3311         /* Clear the old descriptor contents */
3312         EM_TX_LOCK(txr);
3313 #ifdef DEV_NETMAP
3314         slot = netmap_reset(na, NR_TX, txr->me, 0);
3315 #endif /* DEV_NETMAP */
3316
3317         bzero((void *)txr->tx_base,
3318               (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3319         /* Reset indices */
3320         txr->next_avail_desc = 0;
3321         txr->next_to_clean = 0;
3322
3323         /* Free any existing tx buffers. */
3324         txbuf = txr->tx_buffers;
3325         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3326                 if (txbuf->m_head != NULL) {
3327                         bus_dmamap_sync(txr->txtag, txbuf->map,
3328                             BUS_DMASYNC_POSTWRITE);
3329                         bus_dmamap_unload(txr->txtag, txbuf->map);
3330                         m_freem(txbuf->m_head);
3331                         txbuf->m_head = NULL;
3332                 }
3333 #ifdef DEV_NETMAP
3334                 if (slot) {
3335                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3336                         uint64_t paddr;
3337                         void *addr;
3338
3339                         addr = PNMB(slot + si, &paddr);
3340                         txr->tx_base[i].buffer_addr = htole64(paddr);
3341                         /* reload the map for netmap mode */
3342                         netmap_load_map(txr->txtag, txbuf->map, addr);
3343                 }
3344 #endif /* DEV_NETMAP */
3345
3346                 /* clear the watch index */
3347                 txbuf->next_eop = -1;
3348         }
3349
3350         /* Set number of descriptors available */
3351         txr->tx_avail = adapter->num_tx_desc;
3352         txr->queue_status = EM_QUEUE_IDLE;
3353
3354         /* Clear checksum offload context. */
3355         txr->last_hw_offload = 0;
3356         txr->last_hw_ipcss = 0;
3357         txr->last_hw_ipcso = 0;
3358         txr->last_hw_tucss = 0;
3359         txr->last_hw_tucso = 0;
3360
3361         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3362             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3363         EM_TX_UNLOCK(txr);
3364 }
3365
3366 /*********************************************************************
3367  *
3368  *  Initialize all transmit rings.
3369  *
3370  **********************************************************************/
3371 static void
3372 em_setup_transmit_structures(struct adapter *adapter)
3373 {
3374         struct tx_ring *txr = adapter->tx_rings;
3375
3376         for (int i = 0; i < adapter->num_queues; i++, txr++)
3377                 em_setup_transmit_ring(txr);
3378
3379         return;
3380 }
3381
3382 /*********************************************************************
3383  *
3384  *  Enable transmit unit.
3385  *
3386  **********************************************************************/
3387 static void
3388 em_initialize_transmit_unit(struct adapter *adapter)
3389 {
3390         struct tx_ring  *txr = adapter->tx_rings;
3391         struct e1000_hw *hw = &adapter->hw;
3392         u32     tctl, tarc, tipg = 0;
3393
3394          INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3395
3396         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3397                 u64 bus_addr = txr->txdma.dma_paddr;
3398                 /* Base and Len of TX Ring */
3399                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3400                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3401                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3402                     (u32)(bus_addr >> 32));
3403                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3404                     (u32)bus_addr);
3405                 /* Init the HEAD/TAIL indices */
3406                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3407                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3408
3409                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3410                     E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3411                     E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3412
3413                 txr->queue_status = EM_QUEUE_IDLE;
3414         }
3415
3416         /* Set the default values for the Tx Inter Packet Gap timer */
3417         switch (adapter->hw.mac.type) {
3418         case e1000_80003es2lan:
3419                 tipg = DEFAULT_82543_TIPG_IPGR1;
3420                 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3421                     E1000_TIPG_IPGR2_SHIFT;
3422                 break;
3423         default:
3424                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3425                     (adapter->hw.phy.media_type ==
3426                     e1000_media_type_internal_serdes))
3427                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3428                 else
3429                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3430                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3431                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3432         }
3433
3434         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3435         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3436
3437         if(adapter->hw.mac.type >= e1000_82540)
3438                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3439                     adapter->tx_abs_int_delay.value);
3440
3441         if ((adapter->hw.mac.type == e1000_82571) ||
3442             (adapter->hw.mac.type == e1000_82572)) {
3443                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3444                 tarc |= SPEED_MODE_BIT;
3445                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3446         } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3447                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3448                 tarc |= 1;
3449                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3450                 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3451                 tarc |= 1;
3452                 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3453         }
3454
3455         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3456         if (adapter->tx_int_delay.value > 0)
3457                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3458
3459         /* Program the Transmit Control Register */
3460         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3461         tctl &= ~E1000_TCTL_CT;
3462         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3463                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3464
3465         if (adapter->hw.mac.type >= e1000_82571)
3466                 tctl |= E1000_TCTL_MULR;
3467
3468         /* This write will effectively turn on the transmit unit. */
3469         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3470
3471 }
3472
3473
3474 /*********************************************************************
3475  *
3476  *  Free all transmit rings.
3477  *
3478  **********************************************************************/
3479 static void
3480 em_free_transmit_structures(struct adapter *adapter)
3481 {
3482         struct tx_ring *txr = adapter->tx_rings;
3483
3484         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3485                 EM_TX_LOCK(txr);
3486                 em_free_transmit_buffers(txr);
3487                 em_dma_free(adapter, &txr->txdma);
3488                 EM_TX_UNLOCK(txr);
3489                 EM_TX_LOCK_DESTROY(txr);
3490         }
3491
3492         free(adapter->tx_rings, M_DEVBUF);
3493 }
3494
3495 /*********************************************************************
3496  *
3497  *  Free transmit ring related data structures.
3498  *
3499  **********************************************************************/
3500 static void
3501 em_free_transmit_buffers(struct tx_ring *txr)
3502 {
3503         struct adapter          *adapter = txr->adapter;
3504         struct em_buffer        *txbuf;
3505
3506         INIT_DEBUGOUT("free_transmit_ring: begin");
3507
3508         if (txr->tx_buffers == NULL)
3509                 return;
3510
3511         for (int i = 0; i < adapter->num_tx_desc; i++) {
3512                 txbuf = &txr->tx_buffers[i];
3513                 if (txbuf->m_head != NULL) {
3514                         bus_dmamap_sync(txr->txtag, txbuf->map,
3515                             BUS_DMASYNC_POSTWRITE);
3516                         bus_dmamap_unload(txr->txtag,
3517                             txbuf->map);
3518                         m_freem(txbuf->m_head);
3519                         txbuf->m_head = NULL;
3520                         if (txbuf->map != NULL) {
3521                                 bus_dmamap_destroy(txr->txtag,
3522                                     txbuf->map);
3523                                 txbuf->map = NULL;
3524                         }
3525                 } else if (txbuf->map != NULL) {
3526                         bus_dmamap_unload(txr->txtag,
3527                             txbuf->map);
3528                         bus_dmamap_destroy(txr->txtag,
3529                             txbuf->map);
3530                         txbuf->map = NULL;
3531                 }
3532         }
3533 #if __FreeBSD_version >= 800000
3534         if (txr->br != NULL)
3535                 buf_ring_free(txr->br, M_DEVBUF);
3536 #endif
3537         if (txr->tx_buffers != NULL) {
3538                 free(txr->tx_buffers, M_DEVBUF);
3539                 txr->tx_buffers = NULL;
3540         }
3541         if (txr->txtag != NULL) {
3542                 bus_dma_tag_destroy(txr->txtag);
3543                 txr->txtag = NULL;
3544         }
3545         return;
3546 }
3547
3548
3549 /*********************************************************************
3550  *  The offload context is protocol specific (TCP/UDP) and thus
3551  *  only needs to be set when the protocol changes. The occasion
3552  *  of a context change can be a performance detriment, and
3553  *  might be better just disabled. The reason arises in the way
3554  *  in which the controller supports pipelined requests from the
3555  *  Tx data DMA. Up to four requests can be pipelined, and they may
3556  *  belong to the same packet or to multiple packets. However all
3557  *  requests for one packet are issued before a request is issued
3558  *  for a subsequent packet and if a request for the next packet
3559  *  requires a context change, that request will be stalled
3560  *  until the previous request completes. This means setting up
3561  *  a new context effectively disables pipelined Tx data DMA which
3562  *  in turn greatly slow down performance to send small sized
3563  *  frames. 
3564  **********************************************************************/
3565 static void
3566 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3567     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3568 {
3569         struct adapter                  *adapter = txr->adapter;
3570         struct e1000_context_desc       *TXD = NULL;
3571         struct em_buffer                *tx_buffer;
3572         int                             cur, hdr_len;
3573         u32                             cmd = 0;
3574         u16                             offload = 0;
3575         u8                              ipcso, ipcss, tucso, tucss;
3576
3577         ipcss = ipcso = tucss = tucso = 0;
3578         hdr_len = ip_off + (ip->ip_hl << 2);
3579         cur = txr->next_avail_desc;
3580
3581         /* Setup of IP header checksum. */
3582         if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3583                 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3584                 offload |= CSUM_IP;
3585                 ipcss = ip_off;
3586                 ipcso = ip_off + offsetof(struct ip, ip_sum);
3587                 /*
3588                  * Start offset for header checksum calculation.
3589                  * End offset for header checksum calculation.
3590                  * Offset of place to put the checksum.
3591                  */
3592                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3593                 TXD->lower_setup.ip_fields.ipcss = ipcss;
3594                 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3595                 TXD->lower_setup.ip_fields.ipcso = ipcso;
3596                 cmd |= E1000_TXD_CMD_IP;
3597         }
3598
3599         if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3600                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3601                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3602                 offload |= CSUM_TCP;
3603                 tucss = hdr_len;
3604                 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3605                 /*
3606                  * Setting up new checksum offload context for every frames
3607                  * takes a lot of processing time for hardware. This also
3608                  * reduces performance a lot for small sized frames so avoid
3609                  * it if driver can use previously configured checksum
3610                  * offload context.
3611                  */
3612                 if (txr->last_hw_offload == offload) {
3613                         if (offload & CSUM_IP) {
3614                                 if (txr->last_hw_ipcss == ipcss &&
3615                                     txr->last_hw_ipcso == ipcso &&
3616                                     txr->last_hw_tucss == tucss &&
3617                                     txr->last_hw_tucso == tucso)
3618                                         return;
3619                         } else {
3620                                 if (txr->last_hw_tucss == tucss &&
3621                                     txr->last_hw_tucso == tucso)
3622                                         return;
3623                         }
3624                 }
3625                 txr->last_hw_offload = offload;
3626                 txr->last_hw_tucss = tucss;
3627                 txr->last_hw_tucso = tucso;
3628                 /*
3629                  * Start offset for payload checksum calculation.
3630                  * End offset for payload checksum calculation.
3631                  * Offset of place to put the checksum.
3632                  */
3633                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3634                 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3635                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3636                 TXD->upper_setup.tcp_fields.tucso = tucso;
3637                 cmd |= E1000_TXD_CMD_TCP;
3638         } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3639                 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3640                 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3641                 tucss = hdr_len;
3642                 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3643                 /*
3644                  * Setting up new checksum offload context for every frames
3645                  * takes a lot of processing time for hardware. This also
3646                  * reduces performance a lot for small sized frames so avoid
3647                  * it if driver can use previously configured checksum
3648                  * offload context.
3649                  */
3650                 if (txr->last_hw_offload == offload) {
3651                         if (offload & CSUM_IP) {
3652                                 if (txr->last_hw_ipcss == ipcss &&
3653                                     txr->last_hw_ipcso == ipcso &&
3654                                     txr->last_hw_tucss == tucss &&
3655                                     txr->last_hw_tucso == tucso)
3656                                         return;
3657                         } else {
3658                                 if (txr->last_hw_tucss == tucss &&
3659                                     txr->last_hw_tucso == tucso)
3660                                         return;
3661                         }
3662                 }
3663                 txr->last_hw_offload = offload;
3664                 txr->last_hw_tucss = tucss;
3665                 txr->last_hw_tucso = tucso;
3666                 /*
3667                  * Start offset for header checksum calculation.
3668                  * End offset for header checksum calculation.
3669                  * Offset of place to put the checksum.
3670                  */
3671                 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3672                 TXD->upper_setup.tcp_fields.tucss = tucss;
3673                 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3674                 TXD->upper_setup.tcp_fields.tucso = tucso;
3675         }
3676   
3677         if (offload & CSUM_IP) {
3678                 txr->last_hw_ipcss = ipcss;
3679                 txr->last_hw_ipcso = ipcso;
3680         }
3681
3682         TXD->tcp_seg_setup.data = htole32(0);
3683         TXD->cmd_and_length =
3684             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3685         tx_buffer = &txr->tx_buffers[cur];
3686         tx_buffer->m_head = NULL;
3687         tx_buffer->next_eop = -1;
3688
3689         if (++cur == adapter->num_tx_desc)
3690                 cur = 0;
3691
3692         txr->tx_avail--;
3693         txr->next_avail_desc = cur;
3694 }
3695
3696
3697 /**********************************************************************
3698  *
3699  *  Setup work for hardware segmentation offload (TSO)
3700  *
3701  **********************************************************************/
3702 static void
3703 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3704     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3705 {
3706         struct adapter                  *adapter = txr->adapter;
3707         struct e1000_context_desc       *TXD;
3708         struct em_buffer                *tx_buffer;
3709         int cur, hdr_len;
3710
3711         /*
3712          * In theory we can use the same TSO context if and only if
3713          * frame is the same type(IP/TCP) and the same MSS. However
3714          * checking whether a frame has the same IP/TCP structure is
3715          * hard thing so just ignore that and always restablish a
3716          * new TSO context.
3717          */
3718         hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3719         *txd_lower = (E1000_TXD_CMD_DEXT |      /* Extended descr type */
3720                       E1000_TXD_DTYP_D |        /* Data descr type */
3721                       E1000_TXD_CMD_TSE);       /* Do TSE on this packet */
3722
3723         /* IP and/or TCP header checksum calculation and insertion. */
3724         *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3725
3726         cur = txr->next_avail_desc;
3727         tx_buffer = &txr->tx_buffers[cur];
3728         TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3729
3730         /*
3731          * Start offset for header checksum calculation.
3732          * End offset for header checksum calculation.
3733          * Offset of place put the checksum.
3734          */
3735         TXD->lower_setup.ip_fields.ipcss = ip_off;
3736         TXD->lower_setup.ip_fields.ipcse =
3737             htole16(ip_off + (ip->ip_hl << 2) - 1);
3738         TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3739         /*
3740          * Start offset for payload checksum calculation.
3741          * End offset for payload checksum calculation.
3742          * Offset of place to put the checksum.
3743          */
3744         TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3745         TXD->upper_setup.tcp_fields.tucse = 0;
3746         TXD->upper_setup.tcp_fields.tucso =
3747             ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3748         /*
3749          * Payload size per packet w/o any headers.
3750          * Length of all headers up to payload.
3751          */
3752         TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3753         TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3754
3755         TXD->cmd_and_length = htole32(adapter->txd_cmd |
3756                                 E1000_TXD_CMD_DEXT |    /* Extended descr */
3757                                 E1000_TXD_CMD_TSE |     /* TSE context */
3758                                 E1000_TXD_CMD_IP |      /* Do IP csum */
3759                                 E1000_TXD_CMD_TCP |     /* Do TCP checksum */
3760                                 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3761
3762         tx_buffer->m_head = NULL;
3763         tx_buffer->next_eop = -1;
3764
3765         if (++cur == adapter->num_tx_desc)
3766                 cur = 0;
3767
3768         txr->tx_avail--;
3769         txr->next_avail_desc = cur;
3770         txr->tx_tso = TRUE;
3771 }
3772
3773
3774 /**********************************************************************
3775  *
3776  *  Examine each tx_buffer in the used queue. If the hardware is done
3777  *  processing the packet then free associated resources. The
3778  *  tx_buffer is put back on the free queue.
3779  *
3780  **********************************************************************/
3781 static void
3782 em_txeof(struct tx_ring *txr)
3783 {
3784         struct adapter  *adapter = txr->adapter;
3785         int first, last, done, processed;
3786         struct em_buffer *tx_buffer;
3787         struct e1000_tx_desc   *tx_desc, *eop_desc;
3788         struct ifnet   *ifp = adapter->ifp;
3789
3790         EM_TX_LOCK_ASSERT(txr);
3791 #ifdef DEV_NETMAP
3792         if (ifp->if_capenable & IFCAP_NETMAP) {
3793                 struct netmap_adapter *na = NA(ifp);
3794
3795                 selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3796                 EM_TX_UNLOCK(txr);
3797                 EM_CORE_LOCK(adapter);
3798                 selwakeuppri(&na->tx_si, PI_NET);
3799                 EM_CORE_UNLOCK(adapter);
3800                 EM_TX_LOCK(txr);
3801                 return;
3802         }
3803 #endif /* DEV_NETMAP */
3804
3805         /* No work, make sure watchdog is off */
3806         if (txr->tx_avail == adapter->num_tx_desc) {
3807                 txr->queue_status = EM_QUEUE_IDLE;
3808                 return;
3809         }
3810
3811         processed = 0;
3812         first = txr->next_to_clean;
3813         tx_desc = &txr->tx_base[first];
3814         tx_buffer = &txr->tx_buffers[first];
3815         last = tx_buffer->next_eop;
3816         eop_desc = &txr->tx_base[last];
3817
3818         /*
3819          * What this does is get the index of the
3820          * first descriptor AFTER the EOP of the 
3821          * first packet, that way we can do the
3822          * simple comparison on the inner while loop.
3823          */
3824         if (++last == adapter->num_tx_desc)
3825                 last = 0;
3826         done = last;
3827
3828         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3829             BUS_DMASYNC_POSTREAD);
3830
3831         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3832                 /* We clean the range of the packet */
3833                 while (first != done) {
3834                         tx_desc->upper.data = 0;
3835                         tx_desc->lower.data = 0;
3836                         tx_desc->buffer_addr = 0;
3837                         ++txr->tx_avail;
3838                         ++processed;
3839
3840                         if (tx_buffer->m_head) {
3841                                 bus_dmamap_sync(txr->txtag,
3842                                     tx_buffer->map,
3843                                     BUS_DMASYNC_POSTWRITE);
3844                                 bus_dmamap_unload(txr->txtag,
3845                                     tx_buffer->map);
3846                                 m_freem(tx_buffer->m_head);
3847                                 tx_buffer->m_head = NULL;
3848                         }
3849                         tx_buffer->next_eop = -1;
3850                         txr->watchdog_time = ticks;
3851
3852                         if (++first == adapter->num_tx_desc)
3853                                 first = 0;
3854
3855                         tx_buffer = &txr->tx_buffers[first];
3856                         tx_desc = &txr->tx_base[first];
3857                 }
3858                 ++ifp->if_opackets;
3859                 /* See if we can continue to the next packet */
3860                 last = tx_buffer->next_eop;
3861                 if (last != -1) {
3862                         eop_desc = &txr->tx_base[last];
3863                         /* Get new done point */
3864                         if (++last == adapter->num_tx_desc) last = 0;
3865                         done = last;
3866                 } else
3867                         break;
3868         }
3869         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3870             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3871
3872         txr->next_to_clean = first;
3873
3874         /*
3875         ** Watchdog calculation, we know there's
3876         ** work outstanding or the first return
3877         ** would have been taken, so none processed
3878         ** for too long indicates a hang. local timer
3879         ** will examine this and do a reset if needed.
3880         */
3881         if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3882                 txr->queue_status = EM_QUEUE_HUNG;
3883
3884         /*
3885          * If we have a minimum free, clear IFF_DRV_OACTIVE
3886          * to tell the stack that it is OK to send packets.
3887          * Notice that all writes of OACTIVE happen under the
3888          * TX lock which, with a single queue, guarantees 
3889          * sanity.
3890          */
3891         if (txr->tx_avail >= EM_MAX_SCATTER)
3892                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3893
3894         /* Disable watchdog if all clean */
3895         if (txr->tx_avail == adapter->num_tx_desc) {
3896                 txr->queue_status = EM_QUEUE_IDLE;
3897         } 
3898 }
3899
3900
3901 /*********************************************************************
3902  *
3903  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3904  *
3905  **********************************************************************/
3906 static void
3907 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3908 {
3909         struct adapter          *adapter = rxr->adapter;
3910         struct mbuf             *m;
3911         bus_dma_segment_t       segs[1];
3912         struct em_buffer        *rxbuf;
3913         int                     i, j, error, nsegs;
3914         bool                    cleaned = FALSE;
3915
3916         i = j = rxr->next_to_refresh;
3917         /*
3918         ** Get one descriptor beyond
3919         ** our work mark to control
3920         ** the loop.
3921         */
3922         if (++j == adapter->num_rx_desc)
3923                 j = 0;
3924
3925         while (j != limit) {
3926                 rxbuf = &rxr->rx_buffers[i];
3927                 if (rxbuf->m_head == NULL) {
3928                         m = m_getjcl(M_DONTWAIT, MT_DATA,
3929                             M_PKTHDR, adapter->rx_mbuf_sz);
3930                         /*
3931                         ** If we have a temporary resource shortage
3932                         ** that causes a failure, just abort refresh
3933                         ** for now, we will return to this point when
3934                         ** reinvoked from em_rxeof.
3935                         */
3936                         if (m == NULL)
3937                                 goto update;
3938                 } else
3939                         m = rxbuf->m_head;
3940
3941                 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3942                 m->m_flags |= M_PKTHDR;
3943                 m->m_data = m->m_ext.ext_buf;
3944
3945                 /* Use bus_dma machinery to setup the memory mapping  */
3946                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3947                     m, segs, &nsegs, BUS_DMA_NOWAIT);
3948                 if (error != 0) {
3949                         printf("Refresh mbufs: hdr dmamap load"
3950                             " failure - %d\n", error);
3951                         m_free(m);
3952                         rxbuf->m_head = NULL;
3953                         goto update;
3954                 }
3955                 rxbuf->m_head = m;
3956                 bus_dmamap_sync(rxr->rxtag,
3957                     rxbuf->map, BUS_DMASYNC_PREREAD);
3958                 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3959                 cleaned = TRUE;
3960
3961                 i = j; /* Next is precalulated for us */
3962                 rxr->next_to_refresh = i;
3963                 /* Calculate next controlling index */
3964                 if (++j == adapter->num_rx_desc)
3965                         j = 0;
3966         }
3967 update:
3968         /*
3969         ** Update the tail pointer only if,
3970         ** and as far as we have refreshed.
3971         */
3972         if (cleaned)
3973                 E1000_WRITE_REG(&adapter->hw,
3974                     E1000_RDT(rxr->me), rxr->next_to_refresh);
3975
3976         return;
3977 }
3978
3979
3980 /*********************************************************************
3981  *
3982  *  Allocate memory for rx_buffer structures. Since we use one
3983  *  rx_buffer per received packet, the maximum number of rx_buffer's
3984  *  that we'll need is equal to the number of receive descriptors
3985  *  that we've allocated.
3986  *
3987  **********************************************************************/
3988 static int
3989 em_allocate_receive_buffers(struct rx_ring *rxr)
3990 {
3991         struct adapter          *adapter = rxr->adapter;
3992         device_t                dev = adapter->dev;
3993         struct em_buffer        *rxbuf;
3994         int                     error;
3995
3996         rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3997             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3998         if (rxr->rx_buffers == NULL) {
3999                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4000                 return (ENOMEM);
4001         }
4002
4003         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4004                                 1, 0,                   /* alignment, bounds */
4005                                 BUS_SPACE_MAXADDR,      /* lowaddr */
4006                                 BUS_SPACE_MAXADDR,      /* highaddr */
4007                                 NULL, NULL,             /* filter, filterarg */
4008                                 MJUM9BYTES,             /* maxsize */
4009                                 1,                      /* nsegments */
4010                                 MJUM9BYTES,             /* maxsegsize */
4011                                 0,                      /* flags */
4012                                 NULL,                   /* lockfunc */
4013                                 NULL,                   /* lockarg */
4014                                 &rxr->rxtag);
4015         if (error) {
4016                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4017                     __func__, error);
4018                 goto fail;
4019         }
4020
4021         rxbuf = rxr->rx_buffers;
4022         for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4023                 rxbuf = &rxr->rx_buffers[i];
4024                 error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4025                     &rxbuf->map);
4026                 if (error) {
4027                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4028                             __func__, error);
4029                         goto fail;
4030                 }
4031         }
4032
4033         return (0);
4034
4035 fail:
4036         em_free_receive_structures(adapter);
4037         return (error);
4038 }
4039
4040
4041 /*********************************************************************
4042  *
4043  *  Initialize a receive ring and its buffers.
4044  *
4045  **********************************************************************/
4046 static int
4047 em_setup_receive_ring(struct rx_ring *rxr)
4048 {
4049         struct  adapter         *adapter = rxr->adapter;
4050         struct em_buffer        *rxbuf;
4051         bus_dma_segment_t       seg[1];
4052         int                     rsize, nsegs, error = 0;
4053 #ifdef DEV_NETMAP
4054         struct netmap_adapter *na = NA(adapter->ifp);
4055         struct netmap_slot *slot;
4056 #endif
4057
4058
4059         /* Clear the ring contents */
4060         EM_RX_LOCK(rxr);
4061         rsize = roundup2(adapter->num_rx_desc *
4062             sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4063         bzero((void *)rxr->rx_base, rsize);
4064 #ifdef DEV_NETMAP
4065         slot = netmap_reset(na, NR_RX, 0, 0);
4066 #endif
4067
4068         /*
4069         ** Free current RX buffer structs and their mbufs
4070         */
4071         for (int i = 0; i < adapter->num_rx_desc; i++) {
4072                 rxbuf = &rxr->rx_buffers[i];
4073                 if (rxbuf->m_head != NULL) {
4074                         bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4075                             BUS_DMASYNC_POSTREAD);
4076                         bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4077                         m_freem(rxbuf->m_head);
4078                         rxbuf->m_head = NULL; /* mark as freed */
4079                 }
4080         }
4081
4082         /* Now replenish the mbufs */
4083         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4084                 rxbuf = &rxr->rx_buffers[j];
4085 #ifdef DEV_NETMAP
4086                 if (slot) {
4087                         int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4088                         uint64_t paddr;
4089                         void *addr;
4090
4091                         addr = PNMB(slot + si, &paddr);
4092                         netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4093                         /* Update descriptor */
4094                         rxr->rx_base[j].buffer_addr = htole64(paddr);
4095                         continue;
4096                 }
4097 #endif /* DEV_NETMAP */
4098                 rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
4099                     M_PKTHDR, adapter->rx_mbuf_sz);
4100                 if (rxbuf->m_head == NULL) {
4101                         error = ENOBUFS;
4102                         goto fail;
4103                 }
4104                 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4105                 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4106                 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4107
4108                 /* Get the memory mapping */
4109                 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4110                     rxbuf->map, rxbuf->m_head, seg,
4111                     &nsegs, BUS_DMA_NOWAIT);
4112                 if (error != 0) {
4113                         m_freem(rxbuf->m_head);
4114                         rxbuf->m_head = NULL;
4115                         goto fail;
4116                 }
4117                 bus_dmamap_sync(rxr->rxtag,
4118                     rxbuf->map, BUS_DMASYNC_PREREAD);
4119
4120                 /* Update descriptor */
4121                 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4122         }
4123         rxr->next_to_check = 0;
4124         rxr->next_to_refresh = 0;
4125         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4126             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4127
4128 fail:
4129         EM_RX_UNLOCK(rxr);
4130         return (error);
4131 }
4132
4133 /*********************************************************************
4134  *
4135  *  Initialize all receive rings.
4136  *
4137  **********************************************************************/
4138 static int
4139 em_setup_receive_structures(struct adapter *adapter)
4140 {
4141         struct rx_ring *rxr = adapter->rx_rings;
4142         int q;
4143
4144         for (q = 0; q < adapter->num_queues; q++, rxr++)
4145                 if (em_setup_receive_ring(rxr))
4146                         goto fail;
4147
4148         return (0);
4149 fail:
4150         /*
4151          * Free RX buffers allocated so far, we will only handle
4152          * the rings that completed, the failing case will have
4153          * cleaned up for itself. 'q' failed, so its the terminus.
4154          */
4155         for (int i = 0; i < q; ++i) {
4156                 rxr = &adapter->rx_rings[i];
4157                 for (int n = 0; n < adapter->num_rx_desc; n++) {
4158                         struct em_buffer *rxbuf;
4159                         rxbuf = &rxr->rx_buffers[n];
4160                         if (rxbuf->m_head != NULL) {
4161                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4162                                   BUS_DMASYNC_POSTREAD);
4163                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4164                                 m_freem(rxbuf->m_head);
4165                                 rxbuf->m_head = NULL;
4166                         }
4167                 }
4168                 rxr->next_to_check = 0;
4169                 rxr->next_to_refresh = 0;
4170         }
4171
4172         return (ENOBUFS);
4173 }
4174
4175 /*********************************************************************
4176  *
4177  *  Free all receive rings.
4178  *
4179  **********************************************************************/
4180 static void
4181 em_free_receive_structures(struct adapter *adapter)
4182 {
4183         struct rx_ring *rxr = adapter->rx_rings;
4184
4185         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4186                 em_free_receive_buffers(rxr);
4187                 /* Free the ring memory as well */
4188                 em_dma_free(adapter, &rxr->rxdma);
4189                 EM_RX_LOCK_DESTROY(rxr);
4190         }
4191
4192         free(adapter->rx_rings, M_DEVBUF);
4193 }
4194
4195
4196 /*********************************************************************
4197  *
4198  *  Free receive ring data structures
4199  *
4200  **********************************************************************/
4201 static void
4202 em_free_receive_buffers(struct rx_ring *rxr)
4203 {
4204         struct adapter          *adapter = rxr->adapter;
4205         struct em_buffer        *rxbuf = NULL;
4206
4207         INIT_DEBUGOUT("free_receive_buffers: begin");
4208
4209         if (rxr->rx_buffers != NULL) {
4210                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4211                         rxbuf = &rxr->rx_buffers[i];
4212                         if (rxbuf->map != NULL) {
4213                                 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4214                                     BUS_DMASYNC_POSTREAD);
4215                                 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4216                                 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4217                         }
4218                         if (rxbuf->m_head != NULL) {
4219                                 m_freem(rxbuf->m_head);
4220                                 rxbuf->m_head = NULL;
4221                         }
4222                 }
4223                 free(rxr->rx_buffers, M_DEVBUF);
4224                 rxr->rx_buffers = NULL;
4225                 rxr->next_to_check = 0;
4226                 rxr->next_to_refresh = 0;
4227         }
4228
4229         if (rxr->rxtag != NULL) {
4230                 bus_dma_tag_destroy(rxr->rxtag);
4231                 rxr->rxtag = NULL;
4232         }
4233
4234         return;
4235 }
4236
4237
4238 /*********************************************************************
4239  *
4240  *  Enable receive unit.
4241  *
4242  **********************************************************************/
4243 #define MAX_INTS_PER_SEC        8000
4244 #define DEFAULT_ITR          1000000000/(MAX_INTS_PER_SEC * 256)
4245
4246 static void
4247 em_initialize_receive_unit(struct adapter *adapter)
4248 {
4249         struct rx_ring  *rxr = adapter->rx_rings;
4250         struct ifnet    *ifp = adapter->ifp;
4251         struct e1000_hw *hw = &adapter->hw;
4252         u64     bus_addr;
4253         u32     rctl, rxcsum;
4254
4255         INIT_DEBUGOUT("em_initialize_receive_units: begin");
4256
4257         /*
4258          * Make sure receives are disabled while setting
4259          * up the descriptor ring
4260          */
4261         rctl = E1000_READ_REG(hw, E1000_RCTL);
4262         /* Do not disable if ever enabled on this hardware */
4263         if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4264                 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4265
4266         E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4267             adapter->rx_abs_int_delay.value);
4268         /*
4269          * Set the interrupt throttling rate. Value is calculated
4270          * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4271          */
4272         E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4273
4274         /*
4275         ** When using MSIX interrupts we need to throttle
4276         ** using the EITR register (82574 only)
4277         */
4278         if (hw->mac.type == e1000_82574) {
4279                 for (int i = 0; i < 4; i++)
4280                         E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4281                             DEFAULT_ITR);
4282                 /* Disable accelerated acknowledge */
4283                 E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4284         }
4285
4286         if (ifp->if_capenable & IFCAP_RXCSUM) {
4287                 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4288                 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4289                 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4290         }
4291
4292         /*
4293         ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4294         ** long latencies are observed, like Lenovo X60. This
4295         ** change eliminates the problem, but since having positive
4296         ** values in RDTR is a known source of problems on other
4297         ** platforms another solution is being sought.
4298         */
4299         if (hw->mac.type == e1000_82573)
4300                 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4301
4302         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4303                 /* Setup the Base and Length of the Rx Descriptor Ring */
4304                 bus_addr = rxr->rxdma.dma_paddr;
4305                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4306                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4307                 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4308                 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4309                 /* Setup the Head and Tail Descriptor Pointers */
4310                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4311 #ifdef DEV_NETMAP
4312                 /*
4313                  * an init() while a netmap client is active must
4314                  * preserve the rx buffers passed to userspace.
4315                  * In this driver it means we adjust RDT to
4316                  * something different from na->num_rx_desc - 1.
4317                  */
4318                 if (ifp->if_capenable & IFCAP_NETMAP) {
4319                         struct netmap_adapter *na = NA(adapter->ifp);
4320                         struct netmap_kring *kring = &na->rx_rings[i];
4321                         int t = na->num_rx_desc - 1 - kring->nr_hwavail;
4322
4323                         E1000_WRITE_REG(hw, E1000_RDT(i), t);
4324                 } else
4325 #endif /* DEV_NETMAP */
4326                 E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4327         }
4328
4329         /* Set PTHRESH for improved jumbo performance */
4330         if (((adapter->hw.mac.type == e1000_ich9lan) ||
4331             (adapter->hw.mac.type == e1000_pch2lan) ||
4332             (adapter->hw.mac.type == e1000_ich10lan)) &&
4333             (ifp->if_mtu > ETHERMTU)) {
4334                 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4335                 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4336         }
4337                 
4338         if (adapter->hw.mac.type == e1000_pch2lan) {
4339                 if (ifp->if_mtu > ETHERMTU)
4340                         e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4341                 else
4342                         e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4343         }
4344
4345         /* Setup the Receive Control Register */
4346         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4347         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4348             E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4349             (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4350
4351         /* Strip the CRC */
4352         rctl |= E1000_RCTL_SECRC;
4353
4354         /* Make sure VLAN Filters are off */
4355         rctl &= ~E1000_RCTL_VFE;
4356         rctl &= ~E1000_RCTL_SBP;
4357
4358         if (adapter->rx_mbuf_sz == MCLBYTES)
4359                 rctl |= E1000_RCTL_SZ_2048;
4360         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4361                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4362         else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4363                 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4364
4365         if (ifp->if_mtu > ETHERMTU)
4366                 rctl |= E1000_RCTL_LPE;
4367         else
4368                 rctl &= ~E1000_RCTL_LPE;
4369
4370         /* Write out the settings */
4371         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4372
4373         return;
4374 }
4375
4376
4377 /*********************************************************************
4378  *
4379  *  This routine executes in interrupt context. It replenishes
4380  *  the mbufs in the descriptor and sends data which has been
4381  *  dma'ed into host memory to upper layer.
4382  *
4383  *  We loop at most count times if count is > 0, or until done if
4384  *  count < 0.
4385  *  
4386  *  For polling we also now return the number of cleaned packets
4387  *********************************************************************/
4388 static bool
4389 em_rxeof(struct rx_ring *rxr, int count, int *done)
4390 {
4391         struct adapter          *adapter = rxr->adapter;
4392         struct ifnet            *ifp = adapter->ifp;
4393         struct mbuf             *mp, *sendmp;
4394         u8                      status = 0;
4395         u16                     len;
4396         int                     i, processed, rxdone = 0;
4397         bool                    eop;
4398         struct e1000_rx_desc    *cur;
4399
4400         EM_RX_LOCK(rxr);
4401
4402 #ifdef DEV_NETMAP
4403         if (ifp->if_capenable & IFCAP_NETMAP) {
4404                 struct netmap_adapter *na = NA(ifp);
4405
4406                 na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4407                 selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4408                 EM_RX_UNLOCK(rxr);
4409                 EM_CORE_LOCK(adapter);
4410                 selwakeuppri(&na->rx_si, PI_NET);
4411                 EM_CORE_UNLOCK(adapter);
4412                 return (0);
4413         }
4414 #endif /* DEV_NETMAP */
4415
4416         for (i = rxr->next_to_check, processed = 0; count != 0;) {
4417
4418                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4419                         break;
4420
4421                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4422                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4423
4424                 cur = &rxr->rx_base[i];
4425                 status = cur->status;
4426                 mp = sendmp = NULL;
4427
4428                 if ((status & E1000_RXD_STAT_DD) == 0)
4429                         break;
4430
4431                 len = le16toh(cur->length);
4432                 eop = (status & E1000_RXD_STAT_EOP) != 0;
4433
4434                 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4435                     (rxr->discard == TRUE)) {
4436                         adapter->dropped_pkts++;
4437                         ++rxr->rx_discarded;
4438                         if (!eop) /* Catch subsequent segs */
4439                                 rxr->discard = TRUE;
4440                         else
4441                                 rxr->discard = FALSE;
4442                         em_rx_discard(rxr, i);
4443                         goto next_desc;
4444                 }
4445
4446                 /* Assign correct length to the current fragment */
4447                 mp = rxr->rx_buffers[i].m_head;
4448                 mp->m_len = len;
4449
4450                 /* Trigger for refresh */
4451                 rxr->rx_buffers[i].m_head = NULL;
4452
4453                 /* First segment? */
4454                 if (rxr->fmp == NULL) {
4455                         mp->m_pkthdr.len = len;
4456                         rxr->fmp = rxr->lmp = mp;
4457                 } else {
4458                         /* Chain mbuf's together */
4459                         mp->m_flags &= ~M_PKTHDR;
4460                         rxr->lmp->m_next = mp;
4461                         rxr->lmp = mp;
4462                         rxr->fmp->m_pkthdr.len += len;
4463                 }
4464
4465                 if (eop) {
4466                         --count;
4467                         sendmp = rxr->fmp;
4468                         sendmp->m_pkthdr.rcvif = ifp;
4469                         ifp->if_ipackets++;
4470                         em_receive_checksum(cur, sendmp);
4471 #ifndef __NO_STRICT_ALIGNMENT
4472                         if (adapter->max_frame_size >
4473                             (MCLBYTES - ETHER_ALIGN) &&
4474                             em_fixup_rx(rxr) != 0)
4475                                 goto skip;
4476 #endif
4477                         if (status & E1000_RXD_STAT_VP) {
4478                                 sendmp->m_pkthdr.ether_vtag =
4479                                     le16toh(cur->special);
4480                                 sendmp->m_flags |= M_VLANTAG;
4481                         }
4482 #ifndef __NO_STRICT_ALIGNMENT
4483 skip:
4484 #endif
4485                         rxr->fmp = rxr->lmp = NULL;
4486                 }
4487 next_desc:
4488                 /* Zero out the receive descriptors status. */
4489                 cur->status = 0;
4490                 ++rxdone;       /* cumulative for POLL */
4491                 ++processed;
4492
4493                 /* Advance our pointers to the next descriptor. */
4494                 if (++i == adapter->num_rx_desc)
4495                         i = 0;
4496
4497                 /* Send to the stack */
4498                 if (sendmp != NULL) {
4499                         rxr->next_to_check = i;
4500                         EM_RX_UNLOCK(rxr);
4501                         (*ifp->if_input)(ifp, sendmp);
4502                         EM_RX_LOCK(rxr);
4503                         i = rxr->next_to_check;
4504                 }
4505
4506                 /* Only refresh mbufs every 8 descriptors */
4507                 if (processed == 8) {
4508                         em_refresh_mbufs(rxr, i);
4509                         processed = 0;
4510                 }
4511         }
4512
4513         /* Catch any remaining refresh work */
4514         if (e1000_rx_unrefreshed(rxr))
4515                 em_refresh_mbufs(rxr, i);
4516
4517         rxr->next_to_check = i;
4518         if (done != NULL)
4519                 *done = rxdone;
4520         EM_RX_UNLOCK(rxr);
4521
4522         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4523 }
4524
4525 static __inline void
4526 em_rx_discard(struct rx_ring *rxr, int i)
4527 {
4528         struct em_buffer        *rbuf;
4529
4530         rbuf = &rxr->rx_buffers[i];
4531         /* Free any previous pieces */
4532         if (rxr->fmp != NULL) {
4533                 rxr->fmp->m_flags |= M_PKTHDR;
4534                 m_freem(rxr->fmp);
4535                 rxr->fmp = NULL;
4536                 rxr->lmp = NULL;
4537         }
4538         /*
4539         ** Free buffer and allow em_refresh_mbufs()
4540         ** to clean up and recharge buffer.
4541         */
4542         if (rbuf->m_head) {
4543                 m_free(rbuf->m_head);
4544                 rbuf->m_head = NULL;
4545         }
4546         return;
4547 }
4548
4549 #ifndef __NO_STRICT_ALIGNMENT
4550 /*
4551  * When jumbo frames are enabled we should realign entire payload on
4552  * architecures with strict alignment. This is serious design mistake of 8254x
4553  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4554  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4555  * payload. On architecures without strict alignment restrictions 8254x still
4556  * performs unaligned memory access which would reduce the performance too.
4557  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4558  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4559  * existing mbuf chain.
4560  *
4561  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4562  * not used at all on architectures with strict alignment.
4563  */
4564 static int
4565 em_fixup_rx(struct rx_ring *rxr)
4566 {
4567         struct adapter *adapter = rxr->adapter;
4568         struct mbuf *m, *n;
4569         int error;
4570
4571         error = 0;
4572         m = rxr->fmp;
4573         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4574                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4575                 m->m_data += ETHER_HDR_LEN;
4576         } else {
4577                 MGETHDR(n, M_DONTWAIT, MT_DATA);
4578                 if (n != NULL) {
4579                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4580                         m->m_data += ETHER_HDR_LEN;
4581                         m->m_len -= ETHER_HDR_LEN;
4582                         n->m_len = ETHER_HDR_LEN;
4583                         M_MOVE_PKTHDR(n, m);
4584                         n->m_next = m;
4585                         rxr->fmp = n;
4586                 } else {
4587                         adapter->dropped_pkts++;
4588                         m_freem(rxr->fmp);
4589                         rxr->fmp = NULL;
4590                         error = ENOMEM;
4591                 }
4592         }
4593
4594         return (error);
4595 }
4596 #endif
4597
4598 /*********************************************************************
4599  *
4600  *  Verify that the hardware indicated that the checksum is valid.
4601  *  Inform the stack about the status of checksum so that stack
4602  *  doesn't spend time verifying the checksum.
4603  *
4604  *********************************************************************/
4605 static void
4606 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4607 {
4608         /* Ignore Checksum bit is set */
4609         if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4610                 mp->m_pkthdr.csum_flags = 0;
4611                 return;
4612         }
4613
4614         if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4615                 /* Did it pass? */
4616                 if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4617                         /* IP Checksum Good */
4618                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4619                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4620
4621                 } else {
4622                         mp->m_pkthdr.csum_flags = 0;
4623                 }
4624         }
4625
4626         if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4627                 /* Did it pass? */
4628                 if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4629                         mp->m_pkthdr.csum_flags |=
4630                         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4631                         mp->m_pkthdr.csum_data = htons(0xffff);
4632                 }
4633         }
4634 }
4635
4636 /*
4637  * This routine is run via an vlan
4638  * config EVENT
4639  */
4640 static void
4641 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4642 {
4643         struct adapter  *adapter = ifp->if_softc;
4644         u32             index, bit;
4645
4646         if (ifp->if_softc !=  arg)   /* Not our event */
4647                 return;
4648
4649         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4650                 return;
4651
4652         EM_CORE_LOCK(adapter);
4653         index = (vtag >> 5) & 0x7F;
4654         bit = vtag & 0x1F;
4655         adapter->shadow_vfta[index] |= (1 << bit);
4656         ++adapter->num_vlans;
4657         /* Re-init to load the changes */
4658         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4659                 em_init_locked(adapter);
4660         EM_CORE_UNLOCK(adapter);
4661 }
4662
4663 /*
4664  * This routine is run via an vlan
4665  * unconfig EVENT
4666  */
4667 static void
4668 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4669 {
4670         struct adapter  *adapter = ifp->if_softc;
4671         u32             index, bit;
4672
4673         if (ifp->if_softc !=  arg)
4674                 return;
4675
4676         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4677                 return;
4678
4679         EM_CORE_LOCK(adapter);
4680         index = (vtag >> 5) & 0x7F;
4681         bit = vtag & 0x1F;
4682         adapter->shadow_vfta[index] &= ~(1 << bit);
4683         --adapter->num_vlans;
4684         /* Re-init to load the changes */
4685         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4686                 em_init_locked(adapter);
4687         EM_CORE_UNLOCK(adapter);
4688 }
4689
4690 static void
4691 em_setup_vlan_hw_support(struct adapter *adapter)
4692 {
4693         struct e1000_hw *hw = &adapter->hw;
4694         u32             reg;
4695
4696         /*
4697         ** We get here thru init_locked, meaning
4698         ** a soft reset, this has already cleared
4699         ** the VFTA and other state, so if there
4700         ** have been no vlan's registered do nothing.
4701         */
4702         if (adapter->num_vlans == 0)
4703                 return;
4704
4705         /*
4706         ** A soft reset zero's out the VFTA, so
4707         ** we need to repopulate it now.
4708         */
4709         for (int i = 0; i < EM_VFTA_SIZE; i++)
4710                 if (adapter->shadow_vfta[i] != 0)
4711                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4712                             i, adapter->shadow_vfta[i]);
4713
4714         reg = E1000_READ_REG(hw, E1000_CTRL);
4715         reg |= E1000_CTRL_VME;
4716         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4717
4718         /* Enable the Filter Table */
4719         reg = E1000_READ_REG(hw, E1000_RCTL);
4720         reg &= ~E1000_RCTL_CFIEN;
4721         reg |= E1000_RCTL_VFE;
4722         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4723 }
4724
4725 static void
4726 em_enable_intr(struct adapter *adapter)
4727 {
4728         struct e1000_hw *hw = &adapter->hw;
4729         u32 ims_mask = IMS_ENABLE_MASK;
4730
4731         if (hw->mac.type == e1000_82574) {
4732                 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4733                 ims_mask |= EM_MSIX_MASK;
4734         } 
4735         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4736 }
4737
4738 static void
4739 em_disable_intr(struct adapter *adapter)
4740 {
4741         struct e1000_hw *hw = &adapter->hw;
4742
4743         if (hw->mac.type == e1000_82574)
4744                 E1000_WRITE_REG(hw, EM_EIAC, 0);
4745         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4746 }
4747
4748 /*
4749  * Bit of a misnomer, what this really means is
4750  * to enable OS management of the system... aka
4751  * to disable special hardware management features 
4752  */
4753 static void
4754 em_init_manageability(struct adapter *adapter)
4755 {
4756         /* A shared code workaround */
4757 #define E1000_82542_MANC2H E1000_MANC2H
4758         if (adapter->has_manage) {
4759                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4760                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4761
4762                 /* disable hardware interception of ARP */
4763                 manc &= ~(E1000_MANC_ARP_EN);
4764
4765                 /* enable receiving management packets to the host */
4766                 manc |= E1000_MANC_EN_MNG2HOST;
4767 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4768 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4769                 manc2h |= E1000_MNG2HOST_PORT_623;
4770                 manc2h |= E1000_MNG2HOST_PORT_664;
4771                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4772                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4773         }
4774 }
4775
4776 /*
4777  * Give control back to hardware management
4778  * controller if there is one.
4779  */
4780 static void
4781 em_release_manageability(struct adapter *adapter)
4782 {
4783         if (adapter->has_manage) {
4784                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4785
4786                 /* re-enable hardware interception of ARP */
4787                 manc |= E1000_MANC_ARP_EN;
4788                 manc &= ~E1000_MANC_EN_MNG2HOST;
4789
4790                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4791         }
4792 }
4793
4794 /*
4795  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4796  * For ASF and Pass Through versions of f/w this means
4797  * that the driver is loaded. For AMT version type f/w
4798  * this means that the network i/f is open.
4799  */
4800 static void
4801 em_get_hw_control(struct adapter *adapter)
4802 {
4803         u32 ctrl_ext, swsm;
4804
4805         if (adapter->hw.mac.type == e1000_82573) {
4806                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4807                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4808                     swsm | E1000_SWSM_DRV_LOAD);
4809                 return;
4810         }
4811         /* else */
4812         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4813         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4814             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4815         return;
4816 }
4817
4818 /*
4819  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4820  * For ASF and Pass Through versions of f/w this means that
4821  * the driver is no longer loaded. For AMT versions of the
4822  * f/w this means that the network i/f is closed.
4823  */
4824 static void
4825 em_release_hw_control(struct adapter *adapter)
4826 {
4827         u32 ctrl_ext, swsm;
4828
4829         if (!adapter->has_manage)
4830                 return;
4831
4832         if (adapter->hw.mac.type == e1000_82573) {
4833                 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4834                 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4835                     swsm & ~E1000_SWSM_DRV_LOAD);
4836                 return;
4837         }
4838         /* else */
4839         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4840         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4841             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4842         return;
4843 }
4844
4845 static int
4846 em_is_valid_ether_addr(u8 *addr)
4847 {
4848         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4849
4850         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4851                 return (FALSE);
4852         }
4853
4854         return (TRUE);
4855 }
4856
4857 /*
4858 ** Parse the interface capabilities with regard
4859 ** to both system management and wake-on-lan for
4860 ** later use.
4861 */
4862 static void
4863 em_get_wakeup(device_t dev)
4864 {
4865         struct adapter  *adapter = device_get_softc(dev);
4866         u16             eeprom_data = 0, device_id, apme_mask;
4867
4868         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4869         apme_mask = EM_EEPROM_APME;
4870
4871         switch (adapter->hw.mac.type) {
4872         case e1000_82573:
4873         case e1000_82583:
4874                 adapter->has_amt = TRUE;
4875                 /* Falls thru */
4876         case e1000_82571:
4877         case e1000_82572:
4878         case e1000_80003es2lan:
4879                 if (adapter->hw.bus.func == 1) {
4880                         e1000_read_nvm(&adapter->hw,
4881                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4882                         break;
4883                 } else
4884                         e1000_read_nvm(&adapter->hw,
4885                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4886                 break;
4887         case e1000_ich8lan:
4888         case e1000_ich9lan:
4889         case e1000_ich10lan:
4890         case e1000_pchlan:
4891         case e1000_pch2lan:
4892                 apme_mask = E1000_WUC_APME;
4893                 adapter->has_amt = TRUE;
4894                 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4895                 break;
4896         default:
4897                 e1000_read_nvm(&adapter->hw,
4898                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4899                 break;
4900         }
4901         if (eeprom_data & apme_mask)
4902                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4903         /*
4904          * We have the eeprom settings, now apply the special cases
4905          * where the eeprom may be wrong or the board won't support
4906          * wake on lan on a particular port
4907          */
4908         device_id = pci_get_device(dev);
4909         switch (device_id) {
4910         case E1000_DEV_ID_82571EB_FIBER:
4911                 /* Wake events only supported on port A for dual fiber
4912                  * regardless of eeprom setting */
4913                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4914                     E1000_STATUS_FUNC_1)
4915                         adapter->wol = 0;
4916                 break;
4917         case E1000_DEV_ID_82571EB_QUAD_COPPER:
4918         case E1000_DEV_ID_82571EB_QUAD_FIBER:
4919         case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4920                 /* if quad port adapter, disable WoL on all but port A */
4921                 if (global_quad_port_a != 0)
4922                         adapter->wol = 0;
4923                 /* Reset for multiple quad port adapters */
4924                 if (++global_quad_port_a == 4)
4925                         global_quad_port_a = 0;
4926                 break;
4927         }
4928         return;
4929 }
4930
4931
4932 /*
4933  * Enable PCI Wake On Lan capability
4934  */
4935 static void
4936 em_enable_wakeup(device_t dev)
4937 {
4938         struct adapter  *adapter = device_get_softc(dev);
4939         struct ifnet    *ifp = adapter->ifp;
4940         u32             pmc, ctrl, ctrl_ext, rctl;
4941         u16             status;
4942
4943         if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4944                 return;
4945
4946         /* Advertise the wakeup capability */
4947         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4948         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4949         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4950         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4951
4952         if ((adapter->hw.mac.type == e1000_ich8lan) ||
4953             (adapter->hw.mac.type == e1000_pchlan) ||
4954             (adapter->hw.mac.type == e1000_ich9lan) ||
4955             (adapter->hw.mac.type == e1000_ich10lan))
4956                 e1000_suspend_workarounds_ich8lan(&adapter->hw);
4957
4958         /* Keep the laser running on Fiber adapters */
4959         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4960             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4961                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4962                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4963                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4964         }
4965
4966         /*
4967         ** Determine type of Wakeup: note that wol
4968         ** is set with all bits on by default.
4969         */
4970         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4971                 adapter->wol &= ~E1000_WUFC_MAG;
4972
4973         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4974                 adapter->wol &= ~E1000_WUFC_MC;
4975         else {
4976                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4977                 rctl |= E1000_RCTL_MPE;
4978                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4979         }
4980
4981         if ((adapter->hw.mac.type == e1000_pchlan) ||
4982             (adapter->hw.mac.type == e1000_pch2lan)) {
4983                 if (em_enable_phy_wakeup(adapter))
4984                         return;
4985         } else {
4986                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4987                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4988         }
4989
4990         if (adapter->hw.phy.type == e1000_phy_igp_3)
4991                 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4992
4993         /* Request PME */
4994         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4995         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4996         if (ifp->if_capenable & IFCAP_WOL)
4997                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4998         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4999
5000         return;
5001 }
5002
5003 /*
5004 ** WOL in the newer chipset interfaces (pchlan)
5005 ** require thing to be copied into the phy
5006 */
5007 static int
5008 em_enable_phy_wakeup(struct adapter *adapter)
5009 {
5010         struct e1000_hw *hw = &adapter->hw;
5011         u32 mreg, ret = 0;
5012         u16 preg;
5013
5014         /* copy MAC RARs to PHY RARs */
5015         e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5016
5017         /* copy MAC MTA to PHY MTA */
5018         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5019                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5020                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5021                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5022                     (u16)((mreg >> 16) & 0xFFFF));
5023         }
5024
5025         /* configure PHY Rx Control register */
5026         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5027         mreg = E1000_READ_REG(hw, E1000_RCTL);
5028         if (mreg & E1000_RCTL_UPE)
5029                 preg |= BM_RCTL_UPE;
5030         if (mreg & E1000_RCTL_MPE)
5031                 preg |= BM_RCTL_MPE;
5032         preg &= ~(BM_RCTL_MO_MASK);
5033         if (mreg & E1000_RCTL_MO_3)
5034                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5035                                 << BM_RCTL_MO_SHIFT);
5036         if (mreg & E1000_RCTL_BAM)
5037                 preg |= BM_RCTL_BAM;
5038         if (mreg & E1000_RCTL_PMCF)
5039                 preg |= BM_RCTL_PMCF;
5040         mreg = E1000_READ_REG(hw, E1000_CTRL);
5041         if (mreg & E1000_CTRL_RFCE)
5042                 preg |= BM_RCTL_RFCE;
5043         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5044
5045         /* enable PHY wakeup in MAC register */
5046         E1000_WRITE_REG(hw, E1000_WUC,
5047             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5048         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5049
5050         /* configure and enable PHY wakeup in PHY registers */
5051         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5052         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5053
5054         /* activate PHY wakeup */
5055         ret = hw->phy.ops.acquire(hw);
5056         if (ret) {
5057                 printf("Could not acquire PHY\n");
5058                 return ret;
5059         }
5060         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5061                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5062         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5063         if (ret) {
5064                 printf("Could not read PHY page 769\n");
5065                 goto out;
5066         }
5067         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5068         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5069         if (ret)
5070                 printf("Could not set PHY Host Wakeup bit\n");
5071 out:
5072         hw->phy.ops.release(hw);
5073
5074         return ret;
5075 }
5076
5077 static void
5078 em_led_func(void *arg, int onoff)
5079 {
5080         struct adapter  *adapter = arg;
5081  
5082         EM_CORE_LOCK(adapter);
5083         if (onoff) {
5084                 e1000_setup_led(&adapter->hw);
5085                 e1000_led_on(&adapter->hw);
5086         } else {
5087                 e1000_led_off(&adapter->hw);
5088                 e1000_cleanup_led(&adapter->hw);
5089         }
5090         EM_CORE_UNLOCK(adapter);
5091 }
5092
5093 /*
5094 ** Disable the L0S and L1 LINK states
5095 */
5096 static void
5097 em_disable_aspm(struct adapter *adapter)
5098 {
5099         int             base, reg;
5100         u16             link_cap,link_ctrl;
5101         device_t        dev = adapter->dev;
5102
5103         switch (adapter->hw.mac.type) {
5104                 case e1000_82573:
5105                 case e1000_82574:
5106                 case e1000_82583:
5107                         break;
5108                 default:
5109                         return;
5110         }
5111         if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5112                 return;
5113         reg = base + PCIER_LINK_CAP;
5114         link_cap = pci_read_config(dev, reg, 2);
5115         if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5116                 return;
5117         reg = base + PCIER_LINK_CTL;
5118         link_ctrl = pci_read_config(dev, reg, 2);
5119         link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5120         pci_write_config(dev, reg, link_ctrl, 2);
5121         return;
5122 }
5123
5124 /**********************************************************************
5125  *
5126  *  Update the board statistics counters.
5127  *
5128  **********************************************************************/
5129 static void
5130 em_update_stats_counters(struct adapter *adapter)
5131 {
5132         struct ifnet   *ifp;
5133
5134         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5135            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5136                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5137                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5138         }
5139         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5140         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5141         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5142         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5143
5144         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5145         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5146         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5147         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5148         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5149         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5150         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5151         /*
5152         ** For watchdog management we need to know if we have been
5153         ** paused during the last interval, so capture that here.
5154         */
5155         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5156         adapter->stats.xoffrxc += adapter->pause_frames;
5157         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5158         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5159         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5160         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5161         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5162         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5163         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5164         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5165         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5166         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5167         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5168         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5169
5170         /* For the 64-bit byte counters the low dword must be read first. */
5171         /* Both registers clear on the read of the high dword */
5172
5173         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5174             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5175         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5176             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5177
5178         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5179         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5180         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5181         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5182         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5183
5184         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5185         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5186
5187         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5188         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5189         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5190         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5191         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5192         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5193         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5194         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5195         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5196         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5197
5198         /* Interrupt Counts */
5199
5200         adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5201         adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5202         adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5203         adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5204         adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5205         adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5206         adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5207         adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5208         adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5209
5210         if (adapter->hw.mac.type >= e1000_82543) {
5211                 adapter->stats.algnerrc += 
5212                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5213                 adapter->stats.rxerrc += 
5214                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5215                 adapter->stats.tncrs += 
5216                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5217                 adapter->stats.cexterr += 
5218                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5219                 adapter->stats.tsctc += 
5220                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5221                 adapter->stats.tsctfc += 
5222                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5223         }
5224         ifp = adapter->ifp;
5225
5226         ifp->if_collisions = adapter->stats.colc;
5227
5228         /* Rx Errors */
5229         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5230             adapter->stats.crcerrs + adapter->stats.algnerrc +
5231             adapter->stats.ruc + adapter->stats.roc +
5232             adapter->stats.mpc + adapter->stats.cexterr;
5233
5234         /* Tx Errors */
5235         ifp->if_oerrors = adapter->stats.ecol +
5236             adapter->stats.latecol + adapter->watchdog_events;
5237 }
5238
5239 /* Export a single 32-bit register via a read-only sysctl. */
5240 static int
5241 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5242 {
5243         struct adapter *adapter;
5244         u_int val;
5245
5246         adapter = oidp->oid_arg1;
5247         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5248         return (sysctl_handle_int(oidp, &val, 0, req));
5249 }
5250
5251 /*
5252  * Add sysctl variables, one per statistic, to the system.
5253  */
5254 static void
5255 em_add_hw_stats(struct adapter *adapter)
5256 {
5257         device_t dev = adapter->dev;
5258
5259         struct tx_ring *txr = adapter->tx_rings;
5260         struct rx_ring *rxr = adapter->rx_rings;
5261
5262         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5263         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5264         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5265         struct e1000_hw_stats *stats = &adapter->stats;
5266
5267         struct sysctl_oid *stat_node, *queue_node, *int_node;
5268         struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5269
5270 #define QUEUE_NAME_LEN 32
5271         char namebuf[QUEUE_NAME_LEN];
5272         
5273         /* Driver Statistics */
5274         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5275                         CTLFLAG_RD, &adapter->link_irq,
5276                         "Link MSIX IRQ Handled");
5277         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", 
5278                          CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5279                          "Std mbuf failed");
5280         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
5281                          CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5282                          "Std mbuf cluster failed");
5283         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5284                         CTLFLAG_RD, &adapter->dropped_pkts,
5285                         "Driver dropped packets");
5286         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5287                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5288                         "Driver tx dma failure in xmit");
5289         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5290                         CTLFLAG_RD, &adapter->rx_overruns,
5291                         "RX overruns");
5292         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5293                         CTLFLAG_RD, &adapter->watchdog_events,
5294                         "Watchdog timeouts");
5295         
5296         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5297                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5298                         em_sysctl_reg_handler, "IU",
5299                         "Device Control Register");
5300         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5301                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5302                         em_sysctl_reg_handler, "IU",
5303                         "Receiver Control Register");
5304         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5305                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5306                         "Flow Control High Watermark");
5307         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5308                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5309                         "Flow Control Low Watermark");
5310
5311         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5312                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5313                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5314                                             CTLFLAG_RD, NULL, "Queue Name");
5315                 queue_list = SYSCTL_CHILDREN(queue_node);
5316
5317                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5318                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5319                                 E1000_TDH(txr->me),
5320                                 em_sysctl_reg_handler, "IU",
5321                                 "Transmit Descriptor Head");
5322                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5323                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5324                                 E1000_TDT(txr->me),
5325                                 em_sysctl_reg_handler, "IU",
5326                                 "Transmit Descriptor Tail");
5327                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5328                                 CTLFLAG_RD, &txr->tx_irq,
5329                                 "Queue MSI-X Transmit Interrupts");
5330                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5331                                 CTLFLAG_RD, &txr->no_desc_avail,
5332                                 "Queue No Descriptor Available");
5333                 
5334                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5335                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5336                                 E1000_RDH(rxr->me),
5337                                 em_sysctl_reg_handler, "IU",
5338                                 "Receive Descriptor Head");
5339                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5340                                 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5341                                 E1000_RDT(rxr->me),
5342                                 em_sysctl_reg_handler, "IU",
5343                                 "Receive Descriptor Tail");
5344                 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5345                                 CTLFLAG_RD, &rxr->rx_irq,
5346                                 "Queue MSI-X Receive Interrupts");
5347         }
5348
5349         /* MAC stats get their own sub node */
5350
5351         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5352                                     CTLFLAG_RD, NULL, "Statistics");
5353         stat_list = SYSCTL_CHILDREN(stat_node);
5354
5355         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5356                         CTLFLAG_RD, &stats->ecol,
5357                         "Excessive collisions");
5358         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5359                         CTLFLAG_RD, &stats->scc,
5360                         "Single collisions");
5361         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5362                         CTLFLAG_RD, &stats->mcc,
5363                         "Multiple collisions");
5364         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5365                         CTLFLAG_RD, &stats->latecol,
5366                         "Late collisions");
5367         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5368                         CTLFLAG_RD, &stats->colc,
5369                         "Collision Count");
5370         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5371                         CTLFLAG_RD, &adapter->stats.symerrs,
5372                         "Symbol Errors");
5373         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5374                         CTLFLAG_RD, &adapter->stats.sec,
5375                         "Sequence Errors");
5376         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5377                         CTLFLAG_RD, &adapter->stats.dc,
5378                         "Defer Count");
5379         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5380                         CTLFLAG_RD, &adapter->stats.mpc,
5381                         "Missed Packets");
5382         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5383                         CTLFLAG_RD, &adapter->stats.rnbc,
5384                         "Receive No Buffers");
5385         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5386                         CTLFLAG_RD, &adapter->stats.ruc,
5387                         "Receive Undersize");
5388         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5389                         CTLFLAG_RD, &adapter->stats.rfc,
5390                         "Fragmented Packets Received ");
5391         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5392                         CTLFLAG_RD, &adapter->stats.roc,
5393                         "Oversized Packets Received");
5394         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5395                         CTLFLAG_RD, &adapter->stats.rjc,
5396                         "Recevied Jabber");
5397         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5398                         CTLFLAG_RD, &adapter->stats.rxerrc,
5399                         "Receive Errors");
5400         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5401                         CTLFLAG_RD, &adapter->stats.crcerrs,
5402                         "CRC errors");
5403         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5404                         CTLFLAG_RD, &adapter->stats.algnerrc,
5405                         "Alignment Errors");
5406         /* On 82575 these are collision counts */
5407         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5408                         CTLFLAG_RD, &adapter->stats.cexterr,
5409                         "Collision/Carrier extension errors");
5410         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5411                         CTLFLAG_RD, &adapter->stats.xonrxc,
5412                         "XON Received");
5413         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5414                         CTLFLAG_RD, &adapter->stats.xontxc,
5415                         "XON Transmitted");
5416         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5417                         CTLFLAG_RD, &adapter->stats.xoffrxc,
5418                         "XOFF Received");
5419         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5420                         CTLFLAG_RD, &adapter->stats.xofftxc,
5421                         "XOFF Transmitted");
5422
5423         /* Packet Reception Stats */
5424         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5425                         CTLFLAG_RD, &adapter->stats.tpr,
5426                         "Total Packets Received ");
5427         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5428                         CTLFLAG_RD, &adapter->stats.gprc,
5429                         "Good Packets Received");
5430         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5431                         CTLFLAG_RD, &adapter->stats.bprc,
5432                         "Broadcast Packets Received");
5433         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5434                         CTLFLAG_RD, &adapter->stats.mprc,
5435                         "Multicast Packets Received");
5436         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5437                         CTLFLAG_RD, &adapter->stats.prc64,
5438                         "64 byte frames received ");
5439         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5440                         CTLFLAG_RD, &adapter->stats.prc127,
5441                         "65-127 byte frames received");
5442         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5443                         CTLFLAG_RD, &adapter->stats.prc255,
5444                         "128-255 byte frames received");
5445         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5446                         CTLFLAG_RD, &adapter->stats.prc511,
5447                         "256-511 byte frames received");
5448         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5449                         CTLFLAG_RD, &adapter->stats.prc1023,
5450                         "512-1023 byte frames received");
5451         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5452                         CTLFLAG_RD, &adapter->stats.prc1522,
5453                         "1023-1522 byte frames received");
5454         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5455                         CTLFLAG_RD, &adapter->stats.gorc, 
5456                         "Good Octets Received"); 
5457
5458         /* Packet Transmission Stats */
5459         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5460                         CTLFLAG_RD, &adapter->stats.gotc, 
5461                         "Good Octets Transmitted"); 
5462         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5463                         CTLFLAG_RD, &adapter->stats.tpt,
5464                         "Total Packets Transmitted");
5465         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5466                         CTLFLAG_RD, &adapter->stats.gptc,
5467                         "Good Packets Transmitted");
5468         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5469                         CTLFLAG_RD, &adapter->stats.bptc,
5470                         "Broadcast Packets Transmitted");
5471         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5472                         CTLFLAG_RD, &adapter->stats.mptc,
5473                         "Multicast Packets Transmitted");
5474         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5475                         CTLFLAG_RD, &adapter->stats.ptc64,
5476                         "64 byte frames transmitted ");
5477         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5478                         CTLFLAG_RD, &adapter->stats.ptc127,
5479                         "65-127 byte frames transmitted");
5480         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5481                         CTLFLAG_RD, &adapter->stats.ptc255,
5482                         "128-255 byte frames transmitted");
5483         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5484                         CTLFLAG_RD, &adapter->stats.ptc511,
5485                         "256-511 byte frames transmitted");
5486         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5487                         CTLFLAG_RD, &adapter->stats.ptc1023,
5488                         "512-1023 byte frames transmitted");
5489         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5490                         CTLFLAG_RD, &adapter->stats.ptc1522,
5491                         "1024-1522 byte frames transmitted");
5492         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5493                         CTLFLAG_RD, &adapter->stats.tsctc,
5494                         "TSO Contexts Transmitted");
5495         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5496                         CTLFLAG_RD, &adapter->stats.tsctfc,
5497                         "TSO Contexts Failed");
5498
5499
5500         /* Interrupt Stats */
5501
5502         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5503                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5504         int_list = SYSCTL_CHILDREN(int_node);
5505
5506         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5507                         CTLFLAG_RD, &adapter->stats.iac,
5508                         "Interrupt Assertion Count");
5509
5510         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5511                         CTLFLAG_RD, &adapter->stats.icrxptc,
5512                         "Interrupt Cause Rx Pkt Timer Expire Count");
5513
5514         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5515                         CTLFLAG_RD, &adapter->stats.icrxatc,
5516                         "Interrupt Cause Rx Abs Timer Expire Count");
5517
5518         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5519                         CTLFLAG_RD, &adapter->stats.ictxptc,
5520                         "Interrupt Cause Tx Pkt Timer Expire Count");
5521
5522         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5523                         CTLFLAG_RD, &adapter->stats.ictxatc,
5524                         "Interrupt Cause Tx Abs Timer Expire Count");
5525
5526         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5527                         CTLFLAG_RD, &adapter->stats.ictxqec,
5528                         "Interrupt Cause Tx Queue Empty Count");
5529
5530         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5531                         CTLFLAG_RD, &adapter->stats.ictxqmtc,
5532                         "Interrupt Cause Tx Queue Min Thresh Count");
5533
5534         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5535                         CTLFLAG_RD, &adapter->stats.icrxdmtc,
5536                         "Interrupt Cause Rx Desc Min Thresh Count");
5537
5538         SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5539                         CTLFLAG_RD, &adapter->stats.icrxoc,
5540                         "Interrupt Cause Receiver Overrun Count");
5541 }
5542
5543 /**********************************************************************
5544  *
5545  *  This routine provides a way to dump out the adapter eeprom,
5546  *  often a useful debug/service tool. This only dumps the first
5547  *  32 words, stuff that matters is in that extent.
5548  *
5549  **********************************************************************/
5550 static int
5551 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5552 {
5553         struct adapter *adapter = (struct adapter *)arg1;
5554         int error;
5555         int result;
5556
5557         result = -1;
5558         error = sysctl_handle_int(oidp, &result, 0, req);
5559
5560         if (error || !req->newptr)
5561                 return (error);
5562
5563         /*
5564          * This value will cause a hex dump of the
5565          * first 32 16-bit words of the EEPROM to
5566          * the screen.
5567          */
5568         if (result == 1)
5569                 em_print_nvm_info(adapter);
5570
5571         return (error);
5572 }
5573
5574 static void
5575 em_print_nvm_info(struct adapter *adapter)
5576 {
5577         u16     eeprom_data;
5578         int     i, j, row = 0;
5579
5580         /* Its a bit crude, but it gets the job done */
5581         printf("\nInterface EEPROM Dump:\n");
5582         printf("Offset\n0x0000  ");
5583         for (i = 0, j = 0; i < 32; i++, j++) {
5584                 if (j == 8) { /* Make the offset block */
5585                         j = 0; ++row;
5586                         printf("\n0x00%x0  ",row);
5587                 }
5588                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5589                 printf("%04x ", eeprom_data);
5590         }
5591         printf("\n");
5592 }
5593
5594 static int
5595 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5596 {
5597         struct em_int_delay_info *info;
5598         struct adapter *adapter;
5599         u32 regval;
5600         int error, usecs, ticks;
5601
5602         info = (struct em_int_delay_info *)arg1;
5603         usecs = info->value;
5604         error = sysctl_handle_int(oidp, &usecs, 0, req);
5605         if (error != 0 || req->newptr == NULL)
5606                 return (error);
5607         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5608                 return (EINVAL);
5609         info->value = usecs;
5610         ticks = EM_USECS_TO_TICKS(usecs);
5611
5612         adapter = info->adapter;
5613         
5614         EM_CORE_LOCK(adapter);
5615         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5616         regval = (regval & ~0xffff) | (ticks & 0xffff);
5617         /* Handle a few special cases. */
5618         switch (info->offset) {
5619         case E1000_RDTR:
5620                 break;
5621         case E1000_TIDV:
5622                 if (ticks == 0) {
5623                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5624                         /* Don't write 0 into the TIDV register. */
5625                         regval++;
5626                 } else
5627                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5628                 break;
5629         }
5630         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5631         EM_CORE_UNLOCK(adapter);
5632         return (0);
5633 }
5634
5635 static void
5636 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5637         const char *description, struct em_int_delay_info *info,
5638         int offset, int value)
5639 {
5640         info->adapter = adapter;
5641         info->offset = offset;
5642         info->value = value;
5643         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5644             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5645             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5646             info, 0, em_sysctl_int_delay, "I", description);
5647 }
5648
5649 static void
5650 em_set_sysctl_value(struct adapter *adapter, const char *name,
5651         const char *description, int *limit, int value)
5652 {
5653         *limit = value;
5654         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5655             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5656             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5657 }
5658
5659
5660 /*
5661 ** Set flow control using sysctl:
5662 ** Flow control values:
5663 **      0 - off
5664 **      1 - rx pause
5665 **      2 - tx pause
5666 **      3 - full
5667 */
5668 static int
5669 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5670 {       
5671         int             error;
5672         static int      input = 3; /* default is full */
5673         struct adapter  *adapter = (struct adapter *) arg1;
5674                     
5675         error = sysctl_handle_int(oidp, &input, 0, req);
5676     
5677         if ((error) || (req->newptr == NULL))
5678                 return (error);
5679                 
5680         if (input == adapter->fc) /* no change? */
5681                 return (error);
5682
5683         switch (input) {
5684                 case e1000_fc_rx_pause:
5685                 case e1000_fc_tx_pause:
5686                 case e1000_fc_full:
5687                 case e1000_fc_none:
5688                         adapter->hw.fc.requested_mode = input;
5689                         adapter->fc = input;
5690                         break;
5691                 default:
5692                         /* Do nothing */
5693                         return (error);
5694         }
5695
5696         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5697         e1000_force_mac_fc(&adapter->hw);
5698         return (error);
5699 }
5700
5701 /*
5702 ** Manage Energy Efficient Ethernet:
5703 ** Control values:
5704 **     0/1 - enabled/disabled
5705 */
5706 static int
5707 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5708 {
5709        struct adapter *adapter = (struct adapter *) arg1;
5710        int             error, value;
5711
5712        value = adapter->hw.dev_spec.ich8lan.eee_disable;
5713        error = sysctl_handle_int(oidp, &value, 0, req);
5714        if (error || req->newptr == NULL)
5715                return (error);
5716        EM_CORE_LOCK(adapter);
5717        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5718        em_init_locked(adapter);
5719        EM_CORE_UNLOCK(adapter);
5720        return (0);
5721 }
5722
5723 static int
5724 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5725 {
5726         struct adapter *adapter;
5727         int error;
5728         int result;
5729
5730         result = -1;
5731         error = sysctl_handle_int(oidp, &result, 0, req);
5732
5733         if (error || !req->newptr)
5734                 return (error);
5735
5736         if (result == 1) {
5737                 adapter = (struct adapter *)arg1;
5738                 em_print_debug_info(adapter);
5739         }
5740
5741         return (error);
5742 }
5743
5744 /*
5745 ** This routine is meant to be fluid, add whatever is
5746 ** needed for debugging a problem.  -jfv
5747 */
5748 static void
5749 em_print_debug_info(struct adapter *adapter)
5750 {
5751         device_t dev = adapter->dev;
5752         struct tx_ring *txr = adapter->tx_rings;
5753         struct rx_ring *rxr = adapter->rx_rings;
5754
5755         if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5756                 printf("Interface is RUNNING ");
5757         else
5758                 printf("Interface is NOT RUNNING\n");
5759
5760         if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5761                 printf("and INACTIVE\n");
5762         else
5763                 printf("and ACTIVE\n");
5764
5765         device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5766             E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5767             E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5768         device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5769             E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5770             E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5771         device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5772         device_printf(dev, "TX descriptors avail = %d\n",
5773             txr->tx_avail);
5774         device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5775             txr->no_desc_avail);
5776         device_printf(dev, "RX discarded packets = %ld\n",
5777             rxr->rx_discarded);
5778         device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5779         device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5780 }